1
1
Fork 0
mirror of https://github.com/pbatard/rufus.git synced 2024-08-14 23:57:05 +00:00

[checksum] additional cleanup and optimization

* Use the commonly used function names for SHA-256 (Sigma, etc.)
* Use the same xxx_write for all, and avoid unwarranted memcpy
* Remove the bitcount
* Use nested ROR() speedup
This commit is contained in:
Pete Batard 2016-03-05 21:20:09 +00:00
parent 71520baf31
commit 5afffd1018
3 changed files with 110 additions and 103 deletions

View file

@ -97,7 +97,6 @@ typedef struct ALIGNED(64) {
unsigned char buf[64]; unsigned char buf[64];
uint32_t state[8]; uint32_t state[8];
uint64_t bytecount; uint64_t bytecount;
uint64_t bitcount;
} SUM_CONTEXT; } SUM_CONTEXT;
static void md5_init(SUM_CONTEXT *ctx) static void md5_init(SUM_CONTEXT *ctx)
@ -267,7 +266,7 @@ static void sha1_transform(SUM_CONTEXT *ctx, const unsigned char *data)
} }
/* Transform the message X which consists of 16 32-bit-words (SHA-256) */ /* Transform the message X which consists of 16 32-bit-words (SHA-256) */
static void sha256_transform(SUM_CONTEXT *ctx, const unsigned char *data) static __inline void sha256_transform(SUM_CONTEXT *ctx, const unsigned char *data)
{ {
uint32_t a, b, c, d, e, f, g, h, j, x[16]; uint32_t a, b, c, d, e, f, g, h, j, x[16];
@ -282,16 +281,17 @@ static void sha256_transform(SUM_CONTEXT *ctx, const unsigned char *data)
#define CH(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define CH(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
#define MAJ(x,y,z) (((x) & (y)) | ((z) & ((x) | (y)))) #define MAJ(x,y,z) (((x) & (y)) | ((z) & ((x) | (y))))
#define EP0(x) (ROR(x,2) ^ ROR(x,13) ^ ROR(x,22)) // Nesting the ROR allows for single register compiler optimizations
#define EP1(x) (ROR(x,6) ^ ROR(x,11) ^ ROR(x,25)) #define S0(x) (ROR(ROR(ROR(x,9)^(x),11)^(x),2))
#define SIG0(x) (ROR(x,7) ^ ROR(x,18) ^ ((x) >> 3)) #define S1(x) (ROR(ROR(ROR(x,14)^(x),5)^(x),6))
#define SIG1(x) (ROR(x,17) ^ ROR(x,19) ^ ((x) >> 10)) #define s0(x) (ROR(ROR(x,11)^(x),7)^((x)>>3))
#define s1(x) (ROR(ROR(x,2)^(x),17)^((x)>>10))
#define BLK0(i) (x[i]) #define BLK0(i) (x[i])
#define BLK2(i) (x[i] += SIG1(x[((i)-2)&15]) + x[((i)-7)&15] + SIG0(x[((i)-15)&15])) #define BLK2(i) (x[i] += s1(x[((i)-2)&15]) + x[((i)-7)&15] + s0(x[((i)-15)&15]))
#define R(a,b,c,d,e,f,g,h, i) \ #define R(a,b,c,d,e,f,g,h,i) \
h += EP1(e) + CH(e,f,g) + K[(i)+(j)] + (j ? BLK2(i) : BLK0(i)); \ h += S1(e) + CH(e,f,g) + K[(i)+(j)] + (j ? BLK2(i) : BLK0(i)); \
d += h; \ d += h; \
h += EP0(a) + MAJ(a, b, c) h += S0(a) + MAJ(a, b, c)
#define RX_8(i) \ #define RX_8(i) \
R(a,b,c,d,e,f,g,h, i); \ R(a,b,c,d,e,f,g,h, i); \
R(h,a,b,c,d,e,f,g, i+1); \ R(h,a,b,c,d,e,f,g, i+1); \
@ -322,10 +322,10 @@ static void sha256_transform(SUM_CONTEXT *ctx, const unsigned char *data)
RX_8(8); RX_8(8);
} }
#undef EP0 #undef S0
#undef EP1 #undef S1
#undef SIG0 #undef s0
#undef SIG1 #undef s1
ctx->state[0] += a; ctx->state[0] += a;
ctx->state[1] += b; ctx->state[1] += b;
@ -453,60 +453,74 @@ static void md5_transform(SUM_CONTEXT *ctx, const unsigned char *data)
/* Update the message digest with the contents of the buffer (SHA-1) */ /* Update the message digest with the contents of the buffer (SHA-1) */
static void sha1_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len) static void sha1_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len)
{ {
if (ctx->bytecount == 64) { /* flush the buffer */ size_t t;
sha1_transform(ctx, ctx->buf);
ctx->bytecount = 0; /* Update bytecount */
ctx->bitcount += 64 * 8; ctx->bytecount += len;
t = ctx->bytecount & 0x3f;
/* Handle any leading odd-sized chunks */
if (t) {
unsigned char *p = ctx->buf + t;
t = 64 - t;
if (len < t) {
memcpy(p, buf, len);
return;
} }
if (!buf) memcpy(p, buf, t);
return; sha1_transform(ctx, ctx->buf);
if (ctx->bytecount) { buf += t;
for (; len && ctx->bytecount < 64; len--) len -= t;
ctx->buf[ctx->bytecount++] = *buf++;
sha1_write(ctx, NULL, 0);
if (!len)
return;
} }
/* Process data in 64-byte chunks */
while (len >= 64) { while (len >= 64) {
PREFETCH64(buf + 64); PREFETCH64(buf + 64);
sha1_transform(ctx, buf); sha1_transform(ctx, buf);
ctx->bytecount = 0;
ctx->bitcount += 64 * 8;
len -= 64;
buf += 64; buf += 64;
len -= 64;
} }
for (; len && ctx->bytecount < 64; len--)
ctx->buf[ctx->bytecount++] = *buf++; /* Handle any remaining bytes of data. */
memcpy(ctx->buf, buf, len);
} }
/* Update the message digest with the contents of the buffer (SHA-256) */ /* Update the message digest with the contents of the buffer (SHA-256) */
static void sha256_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len) static void sha256_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len)
{ {
size_t num, pos = ctx->bytecount & 0x3F; size_t t;
/* Update bytecount */
ctx->bytecount += len; ctx->bytecount += len;
num = 64 - pos; t = ctx->bytecount & 0x3f;
if (num > len) {
memcpy(ctx->buf + pos, buf, len); /* Handle any leading odd-sized chunks */
if (t) {
unsigned char *p = ctx->buf + t;
t = 64 - t;
if (len < t) {
memcpy(p, buf, len);
return; return;
} }
len -= num; memcpy(p, buf, t);
memcpy(ctx->buf + pos, buf, num);
buf += num;
for (;;) {
PREFETCH64(buf + 64);
sha256_transform(ctx, ctx->buf); sha256_transform(ctx, ctx->buf);
if (len < 64) buf += t;
break; len -= t;
len -= 64;
memcpy(ctx->buf, buf, 64);
buf += 64;
} }
if (len != 0) /* Process data in 64-byte chunks */
while (len >= 64) {
PREFETCH64(buf + 64);
sha256_transform(ctx, buf);
buf += 64;
len -= 64;
}
/* Handle any remaining bytes of data. */
memcpy(ctx->buf, buf, len); memcpy(ctx->buf, buf, len);
} }
@ -515,10 +529,10 @@ static void md5_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len)
{ {
size_t t; size_t t;
/* Update bitcount */ /* Update bytecount */
ctx->bitcount += (len << 3); ctx->bytecount += len;
t = (ctx->bitcount >> 3) & 0x3f; t = ctx->bytecount & 0x3f;
/* Handle any leading odd-sized chunks */ /* Handle any leading odd-sized chunks */
if (t) { if (t) {
@ -538,8 +552,7 @@ static void md5_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len)
/* Process data in 64-byte chunks */ /* Process data in 64-byte chunks */
while (len >= 64) { while (len >= 64) {
PREFETCH64(buf + 64); PREFETCH64(buf + 64);
memcpy(ctx->buf, buf, 64); md5_transform(ctx, buf);
md5_transform(ctx, ctx->buf);
buf += 64; buf += 64;
len -= 64; len -= 64;
} }
@ -551,31 +564,29 @@ static void md5_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len)
/* Finalize the computation and write the digest in ctx->state[] (SHA-1) */ /* Finalize the computation and write the digest in ctx->state[] (SHA-1) */
static void sha1_final(SUM_CONTEXT *ctx) static void sha1_final(SUM_CONTEXT *ctx)
{ {
uint64_t bitcount = ctx->bytecount << 3;
size_t pos = ((size_t)ctx->bytecount) & 0x3F;
unsigned char *p; unsigned char *p;
sha1_write(ctx, NULL, 0); /* flush */; ctx->buf[pos++] = 0x80;
if (ctx->bytecount < 56) { /* enough room */ /* Pad whatever data is left in the buffer */
ctx->buf[ctx->bytecount++] = 0x80; /* pad */ while (pos != (64 - 8)) {
while (ctx->bytecount < 56) pos &= 0x3F;
ctx->buf[ctx->bytecount++] = 0; /* pad */ if (pos == 0)
} else { /* need one extra block */ sha1_transform(ctx, ctx->buf);
ctx->buf[ctx->bytecount++] = 0x80; /* pad character */ ctx->buf[pos++] = 0;
while (ctx->bytecount < 64)
ctx->buf[ctx->bytecount++] = 0;
sha1_write(ctx, NULL, 0); /* flush */;
memset(ctx->buf, 0, 56); /* fill next block with zeroes */
} }
/* append the 64 bit count (big-endian) */ /* Append to the padding the total message's length in bits and transform */
ctx->buf[56] = (unsigned char) (ctx->bitcount >> 56); ctx->buf[63] = (unsigned char) bitcount;
ctx->buf[57] = (unsigned char) (ctx->bitcount >> 48); ctx->buf[62] = (unsigned char) (bitcount >> 8);
ctx->buf[58] = (unsigned char) (ctx->bitcount >> 40); ctx->buf[61] = (unsigned char) (bitcount >> 16);
ctx->buf[59] = (unsigned char) (ctx->bitcount >> 32); ctx->buf[60] = (unsigned char) (bitcount >> 24);
ctx->buf[60] = (unsigned char) (ctx->bitcount >> 24); ctx->buf[59] = (unsigned char) (bitcount >> 32);
ctx->buf[61] = (unsigned char) (ctx->bitcount >> 16); ctx->buf[58] = (unsigned char) (bitcount >> 40);
ctx->buf[62] = (unsigned char) (ctx->bitcount >> 8); ctx->buf[57] = (unsigned char) (bitcount >> 48);
ctx->buf[63] = (unsigned char) ctx->bitcount; ctx->buf[56] = (unsigned char) (bitcount >> 56);
sha1_transform(ctx, ctx->buf); sha1_transform(ctx, ctx->buf);
@ -596,7 +607,8 @@ static void sha1_final(SUM_CONTEXT *ctx)
/* Finalize the computation and write the digest in ctx->state[] (SHA-256) */ /* Finalize the computation and write the digest in ctx->state[] (SHA-256) */
static void sha256_final(SUM_CONTEXT *ctx) static void sha256_final(SUM_CONTEXT *ctx)
{ {
size_t pos = ctx->bytecount & 0x3F; uint64_t bitcount = ctx->bytecount << 3;
size_t pos = ((size_t)ctx->bytecount) & 0x3F;
unsigned char *p; unsigned char *p;
ctx->buf[pos++] = 0x80; ctx->buf[pos++] = 0x80;
@ -610,15 +622,14 @@ static void sha256_final(SUM_CONTEXT *ctx)
} }
/* Append to the padding the total message's length in bits and transform */ /* Append to the padding the total message's length in bits and transform */
ctx->bitcount = ctx->bytecount << 3; ctx->buf[63] = (unsigned char) bitcount;
ctx->buf[63] = (unsigned char) (ctx->bitcount); ctx->buf[62] = (unsigned char) (bitcount >> 8);
ctx->buf[62] = (unsigned char) (ctx->bitcount >> 8); ctx->buf[61] = (unsigned char) (bitcount >> 16);
ctx->buf[61] = (unsigned char) (ctx->bitcount >> 16); ctx->buf[60] = (unsigned char) (bitcount >> 24);
ctx->buf[60] = (unsigned char) (ctx->bitcount >> 24); ctx->buf[59] = (unsigned char) (bitcount >> 32);
ctx->buf[59] = (unsigned char) (ctx->bitcount >> 32); ctx->buf[58] = (unsigned char) (bitcount >> 40);
ctx->buf[58] = (unsigned char) (ctx->bitcount >> 40); ctx->buf[57] = (unsigned char) (bitcount >> 48);
ctx->buf[57] = (unsigned char) (ctx->bitcount >> 48); ctx->buf[56] = (unsigned char) (bitcount >> 56);
ctx->buf[56] = (unsigned char) (ctx->bitcount >> 56);
sha256_transform(ctx, ctx->buf); sha256_transform(ctx, ctx->buf);
@ -642,12 +653,10 @@ static void sha256_final(SUM_CONTEXT *ctx)
/* Finalize the computation and write the digest in ctx->state[] (MD5) */ /* Finalize the computation and write the digest in ctx->state[] (MD5) */
static void md5_final(SUM_CONTEXT *ctx) static void md5_final(SUM_CONTEXT *ctx)
{ {
size_t count; size_t count = ((size_t)ctx->bytecount) & 0x3F;
uint64_t bitcount = ctx->bytecount << 3;
unsigned char *p; unsigned char *p;
/* Compute number of bytes mod 64 */
count = (ctx->bitcount >> 3) & 0x3F;
/* Set the first char of padding to 0x80. /* Set the first char of padding to 0x80.
* This is safe since there is always at least one byte free * This is safe since there is always at least one byte free
*/ */
@ -671,14 +680,14 @@ static void md5_final(SUM_CONTEXT *ctx)
} }
/* append the 64 bit count (little endian) */ /* append the 64 bit count (little endian) */
ctx->buf[56] = (unsigned char) ctx->bitcount; ctx->buf[56] = (unsigned char) bitcount;
ctx->buf[57] = (unsigned char) (ctx->bitcount >> 8); ctx->buf[57] = (unsigned char) (bitcount >> 8);
ctx->buf[58] = (unsigned char) (ctx->bitcount >> 16); ctx->buf[58] = (unsigned char) (bitcount >> 16);
ctx->buf[59] = (unsigned char) (ctx->bitcount >> 24); ctx->buf[59] = (unsigned char) (bitcount >> 24);
ctx->buf[60] = (unsigned char) (ctx->bitcount >> 32); ctx->buf[60] = (unsigned char) (bitcount >> 32);
ctx->buf[61] = (unsigned char) (ctx->bitcount >> 40); ctx->buf[61] = (unsigned char) (bitcount >> 40);
ctx->buf[62] = (unsigned char) (ctx->bitcount >> 48); ctx->buf[62] = (unsigned char) (bitcount >> 48);
ctx->buf[63] = (unsigned char) (ctx->bitcount >> 56); ctx->buf[63] = (unsigned char) (bitcount >> 56);
md5_transform(ctx, ctx->buf); md5_transform(ctx, ctx->buf);
@ -695,7 +704,6 @@ static void md5_final(SUM_CONTEXT *ctx)
#undef X #undef X
} }
//#define NULL_TEST //#define NULL_TEST
#ifdef NULL_TEST #ifdef NULL_TEST
// These 'null' calls are useful for testing load balancing and individual algorithm speed // These 'null' calls are useful for testing load balancing and individual algorithm speed

View file

@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
IDD_DIALOG DIALOGEX 12, 12, 242, 376 IDD_DIALOG DIALOGEX 12, 12, 242, 376
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
EXSTYLE WS_EX_ACCEPTFILES EXSTYLE WS_EX_ACCEPTFILES
CAPTION "Rufus 2.8.878" CAPTION "Rufus 2.8.879"
FONT 8, "Segoe UI Symbol", 400, 0, 0x0 FONT 8, "Segoe UI Symbol", 400, 0, 0x0
BEGIN BEGIN
LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8 LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8
@ -320,8 +320,8 @@ END
// //
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,8,878,0 FILEVERSION 2,8,879,0
PRODUCTVERSION 2,8,878,0 PRODUCTVERSION 2,8,879,0
FILEFLAGSMASK 0x3fL FILEFLAGSMASK 0x3fL
#ifdef _DEBUG #ifdef _DEBUG
FILEFLAGS 0x1L FILEFLAGS 0x1L
@ -338,13 +338,13 @@ BEGIN
BEGIN BEGIN
VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)"
VALUE "FileDescription", "Rufus" VALUE "FileDescription", "Rufus"
VALUE "FileVersion", "2.8.878" VALUE "FileVersion", "2.8.879"
VALUE "InternalName", "Rufus" VALUE "InternalName", "Rufus"
VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)"
VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html"
VALUE "OriginalFilename", "rufus.exe" VALUE "OriginalFilename", "rufus.exe"
VALUE "ProductName", "Rufus" VALUE "ProductName", "Rufus"
VALUE "ProductVersion", "2.8.878" VALUE "ProductVersion", "2.8.879"
END END
END END
BLOCK "VarFileInfo" BLOCK "VarFileInfo"

View file

@ -820,7 +820,6 @@ BOOL SetLGP(BOOL bRestore, BOOL* bExistingKey, const char* szPath, const char* s
*/ */
BOOL SetThreadAffinity(DWORD_PTR* thread_affinity, size_t num_threads) BOOL SetThreadAffinity(DWORD_PTR* thread_affinity, size_t num_threads)
{ {
size_t i, j, pc; size_t i, j, pc;
DWORD_PTR affinity, dummy; DWORD_PTR affinity, dummy;