mirror of
				https://github.com/pbatard/rufus.git
				synced 2024-08-14 23:57:05 +00:00 
			
		
		
		
	[checksum] more cleanup and optimization
* Why... can't I stop... trying to optimize... this thing?!?
This commit is contained in:
		
							parent
							
								
									b89beceedf
								
							
						
					
					
						commit
						07114edc6f
					
				
					 3 changed files with 45 additions and 45 deletions
				
			
		|  | @ -68,36 +68,15 @@ char sum_str[NUM_CHECKSUMS][65]; | ||||||
| int bufnum, sum_count[NUM_CHECKSUMS] = { 16, 20, 32 }; | int bufnum, sum_count[NUM_CHECKSUMS] = { 16, 20, 32 }; | ||||||
| HANDLE data_ready[NUM_CHECKSUMS], thread_ready[NUM_CHECKSUMS]; | HANDLE data_ready[NUM_CHECKSUMS], thread_ready[NUM_CHECKSUMS]; | ||||||
| DWORD rSize[2]; | DWORD rSize[2]; | ||||||
| char buffer[2][BUFFER_SIZE]; | char ALIGNED(64) buffer[2][BUFFER_SIZE]; | ||||||
| 
 | 
 | ||||||
| #if defined(__GNUC__) | /*
 | ||||||
| #define ALIGNED(m) __attribute__ ((__aligned__(m))) |  * Rotate 32 bit integers by n bytes. | ||||||
| #elif defined(_MSC_VER) |  * Don't bother trying to hand-optimize those, as the | ||||||
| #define ALIGNED(m) __declspec(align(m)) |  * compiler usually does a pretty good job at that. | ||||||
| #endif |  */ | ||||||
| 
 | #define ROL(a,b) (((a) << (b)) | ((a) >> (32-(b)))) | ||||||
| /* Rotate a 32 bit integer by n bytes */ | #define ROR(a,b) (((a) >> (b)) | ((a) << (32-(b)))) | ||||||
| #if defined(__GNUC__) && defined(__i386__) |  | ||||||
| static inline uint32_t rol(uint32_t x, int n) |  | ||||||
| { |  | ||||||
| 	__asm__("roll %%cl,%0" |  | ||||||
| 		:"=r" (x) |  | ||||||
| 		:"0" (x),"c" (n)); |  | ||||||
| 	return x; |  | ||||||
| } |  | ||||||
| #elif defined(_MSC_VER) && (_M_IX86 >= 300) |  | ||||||
| static __inline uint32_t rol(uint32_t x, int n) |  | ||||||
| { |  | ||||||
| 	__asm { |  | ||||||
| 		mov eax, x |  | ||||||
| 		mov ecx, n |  | ||||||
| 		rol eax, cl |  | ||||||
| 	} |  | ||||||
| 	/* returns with result in EAX */ |  | ||||||
| } |  | ||||||
| #else |  | ||||||
| #define rol(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) ) |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| // For SHA-256
 | // For SHA-256
 | ||||||
| static const uint32_t k[64] = { | static const uint32_t k[64] = { | ||||||
|  | @ -154,7 +133,6 @@ static void sha256_init(SUM_CONTEXT *ctx) | ||||||
| 	ctx->state[7] = 0x5be0cd19; | 	ctx->state[7] = 0x5be0cd19; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| /* Transform the message X which consists of 16 32-bit-words (SHA-1) */ | /* Transform the message X which consists of 16 32-bit-words (SHA-1) */ | ||||||
| static void sha1_transform(SUM_CONTEXT *ctx, const unsigned char *data) | static void sha1_transform(SUM_CONTEXT *ctx, const unsigned char *data) | ||||||
| { | { | ||||||
|  | @ -192,10 +170,10 @@ static void sha1_transform(SUM_CONTEXT *ctx, const unsigned char *data) | ||||||
| #define F3(x,y,z)   ( ( x & y ) | ( z & ( x | y ) ) ) | #define F3(x,y,z)   ( ( x & y ) | ( z & ( x | y ) ) ) | ||||||
| #define F4(x,y,z)   ( x ^ y ^ z ) | #define F4(x,y,z)   ( x ^ y ^ z ) | ||||||
| 
 | 
 | ||||||
| #define M(i) ( tm = x[i&0x0f] ^ x[(i-14)&0x0f] ^ x[(i-8)&0x0f] ^ x[(i-3)&0x0f], (x[i&0x0f] = rol(tm,1)) ) | #define M(i) ( tm = x[i&0x0f] ^ x[(i-14)&0x0f] ^ x[(i-8)&0x0f] ^ x[(i-3)&0x0f], (x[i&0x0f] = ROL(tm,1)) ) | ||||||
| 
 | 
 | ||||||
| #define SHA1STEP(a,b,c,d,e,f,k,m) do { e += rol(a, 5) + f(b, c, d) + k + m; \ | #define SHA1STEP(a,b,c,d,e,f,k,m) do { e += ROL(a, 5) + f(b, c, d) + k + m; \ | ||||||
|                                        b = rol(b, 30); } while(0) |                                        b = ROL(b, 30); } while(0) | ||||||
| 	SHA1STEP(a, b, c, d, e, F1, K1, x[0]); | 	SHA1STEP(a, b, c, d, e, F1, K1, x[0]); | ||||||
| 	SHA1STEP(e, a, b, c, d, F1, K1, x[1]); | 	SHA1STEP(e, a, b, c, d, F1, K1, x[1]); | ||||||
| 	SHA1STEP(d, e, a, b, c, F1, K1, x[2]); | 	SHA1STEP(d, e, a, b, c, F1, K1, x[2]); | ||||||
|  | @ -303,15 +281,12 @@ static void sha256_transform(SUM_CONTEXT *ctx, const unsigned char *data) | ||||||
| 	g = ctx->state[6]; | 	g = ctx->state[6]; | ||||||
| 	h = ctx->state[7]; | 	h = ctx->state[7]; | ||||||
| 
 | 
 | ||||||
| #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) |  | ||||||
| #define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) |  | ||||||
| 
 |  | ||||||
| #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) | #define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) | ||||||
| #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) | #define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) | ||||||
| #define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) | #define EP0(x) (ROR(x,2) ^ ROR(x,13) ^ ROR(x,22)) | ||||||
| #define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) | #define EP1(x) (ROR(x,6) ^ ROR(x,11) ^ ROR(x,25)) | ||||||
| #define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) | #define SIG0(x) (ROR(x,7) ^ ROR(x,18) ^ ((x) >> 3)) | ||||||
| #define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) | #define SIG1(x) (ROR(x,17) ^ ROR(x,19) ^ ((x) >> 10)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| #ifdef BIG_ENDIAN_HOST | #ifdef BIG_ENDIAN_HOST | ||||||
|  | @ -487,6 +462,7 @@ static void sha1_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	while (len >= 64) { | 	while (len >= 64) { | ||||||
|  | 		PREFETCH64(&buf[64]); | ||||||
| 		sha1_transform(ctx, buf); | 		sha1_transform(ctx, buf); | ||||||
| 		ctx->bytecount = 0; | 		ctx->bytecount = 0; | ||||||
| 		ctx->bitcount += 64 * 8; | 		ctx->bitcount += 64 * 8; | ||||||
|  | @ -505,6 +481,7 @@ static void sha256_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len) | ||||||
| 		ctx->buf[ctx->bytecount] = buf[i]; | 		ctx->buf[ctx->bytecount] = buf[i]; | ||||||
| 		ctx->bytecount++; | 		ctx->bytecount++; | ||||||
| 		if (ctx->bytecount == 64) { | 		if (ctx->bytecount == 64) { | ||||||
|  | 			PREFETCH64(&buf[i + 64]); | ||||||
| 			sha256_transform(ctx, ctx->buf); | 			sha256_transform(ctx, ctx->buf); | ||||||
| 			ctx->bitcount += 64 * 8; | 			ctx->bitcount += 64 * 8; | ||||||
| 			ctx->bytecount = 0; | 			ctx->bytecount = 0; | ||||||
|  | @ -539,6 +516,7 @@ static void md5_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len) | ||||||
| 
 | 
 | ||||||
| 	/* Process data in 64-byte chunks */ | 	/* Process data in 64-byte chunks */ | ||||||
| 	while (len >= 64) { | 	while (len >= 64) { | ||||||
|  | 		PREFETCH64(&buf[64]); | ||||||
| 		memcpy(ctx->buf, buf, 64); | 		memcpy(ctx->buf, buf, 64); | ||||||
| 		md5_transform(ctx, ctx->buf); | 		md5_transform(ctx, ctx->buf); | ||||||
| 		buf += 64; | 		buf += 64; | ||||||
|  | @ -704,6 +682,11 @@ static void md5_final(SUM_CONTEXT *ctx) | ||||||
| #undef X | #undef X | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // These 'null' calls are useful for testing load balancing and individual algorithm speed
 | ||||||
|  | static void null_init(SUM_CONTEXT *ctx) { memset(ctx, 0, sizeof(*ctx)); } | ||||||
|  | static void null_write(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len) { } | ||||||
|  | static void null_final(SUM_CONTEXT *ctx) { } | ||||||
|  | 
 | ||||||
| typedef void sum_init_t(SUM_CONTEXT *ctx); | typedef void sum_init_t(SUM_CONTEXT *ctx); | ||||||
| typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len); | typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len); | ||||||
| typedef void sum_final_t(SUM_CONTEXT *ctx); | typedef void sum_final_t(SUM_CONTEXT *ctx); | ||||||
|  |  | ||||||
|  | @ -35,6 +35,23 @@ | ||||||
| #define MIN(a,b) (((a) < (b)) ? (a) : (b)) | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #if defined(__GNUC__) | ||||||
|  | #define ALIGNED(m) __attribute__ ((__aligned__(m))) | ||||||
|  | #elif defined(_MSC_VER) | ||||||
|  | #define ALIGNED(m) __declspec(align(m)) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Prefetch 64 bytes at address m, for read-only operation | ||||||
|  |  * We account for these built-in calls doing nothing if the | ||||||
|  |  * line has already been fetched, or if the address is invalid. | ||||||
|  |  */ | ||||||
|  | #if defined(__GNUC__) | ||||||
|  | #define PREFETCH64(m) do { __builtin_prefetch(m, 0, 0); __builtin_prefetch(m+32, 0, 0); } while(0) | ||||||
|  | #elif defined(_MSC_VER) | ||||||
|  | #define PREFETCH64(m) do { _m_prefetch(m); _m_prefetch(m+32); } while(0) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #if defined(_MSC_VER) | #if defined(_MSC_VER) | ||||||
| #define bswap_uint64 _byteswap_uint64 | #define bswap_uint64 _byteswap_uint64 | ||||||
| #define bswap_uint32 _byteswap_ulong | #define bswap_uint32 _byteswap_ulong | ||||||
|  |  | ||||||
							
								
								
									
										10
									
								
								src/rufus.rc
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								src/rufus.rc
									
										
									
									
									
								
							|  | @ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL | ||||||
| IDD_DIALOG DIALOGEX 12, 12, 242, 376 | IDD_DIALOG DIALOGEX 12, 12, 242, 376 | ||||||
| STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | ||||||
| EXSTYLE WS_EX_ACCEPTFILES | EXSTYLE WS_EX_ACCEPTFILES | ||||||
| CAPTION "Rufus 2.8.873" | CAPTION "Rufus 2.8.874" | ||||||
| FONT 8, "Segoe UI Symbol", 400, 0, 0x0 | FONT 8, "Segoe UI Symbol", 400, 0, 0x0 | ||||||
| BEGIN | BEGIN | ||||||
|     LTEXT           "Device",IDS_DEVICE_TXT,9,6,200,8 |     LTEXT           "Device",IDS_DEVICE_TXT,9,6,200,8 | ||||||
|  | @ -320,8 +320,8 @@ END | ||||||
| // | // | ||||||
| 
 | 
 | ||||||
| VS_VERSION_INFO VERSIONINFO | VS_VERSION_INFO VERSIONINFO | ||||||
|  FILEVERSION 2,8,873,0 |  FILEVERSION 2,8,874,0 | ||||||
|  PRODUCTVERSION 2,8,873,0 |  PRODUCTVERSION 2,8,874,0 | ||||||
|  FILEFLAGSMASK 0x3fL |  FILEFLAGSMASK 0x3fL | ||||||
| #ifdef _DEBUG | #ifdef _DEBUG | ||||||
|  FILEFLAGS 0x1L |  FILEFLAGS 0x1L | ||||||
|  | @ -338,13 +338,13 @@ BEGIN | ||||||
|         BEGIN |         BEGIN | ||||||
|             VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" |             VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" | ||||||
|             VALUE "FileDescription", "Rufus" |             VALUE "FileDescription", "Rufus" | ||||||
|             VALUE "FileVersion", "2.8.873" |             VALUE "FileVersion", "2.8.874" | ||||||
|             VALUE "InternalName", "Rufus" |             VALUE "InternalName", "Rufus" | ||||||
|             VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" |             VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" | ||||||
|             VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" |             VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" | ||||||
|             VALUE "OriginalFilename", "rufus.exe" |             VALUE "OriginalFilename", "rufus.exe" | ||||||
|             VALUE "ProductName", "Rufus" |             VALUE "ProductName", "Rufus" | ||||||
|             VALUE "ProductVersion", "2.8.873" |             VALUE "ProductVersion", "2.8.874" | ||||||
|         END |         END | ||||||
|     END |     END | ||||||
|     BLOCK "VarFileInfo" |     BLOCK "VarFileInfo" | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue