mirror of
				https://github.com/pbatard/rufus.git
				synced 2024-08-14 23:57:05 +00:00 
			
		
		
		
	[checksum] more load balancing optimizations
* SetChecksumAffinity() now spreads the affinity evenly between cores * Also increase the read buffer size to help with performance * At this stage, the only limiting factor regarding performance seems to be the speed of the SHA-256 algorithm...
This commit is contained in:
		
							parent
							
								
									e6d3653cac
								
							
						
					
					
						commit
						e1c7c9670b
					
				
					 5 changed files with 77 additions and 46 deletions
				
			
		|  | @ -60,7 +60,7 @@ | ||||||
| 
 | 
 | ||||||
| #undef BIG_ENDIAN_HOST | #undef BIG_ENDIAN_HOST | ||||||
| 
 | 
 | ||||||
| #define BUFFER_SIZE     4096 | #define BUFFER_SIZE     (64*KB) | ||||||
| #define WAIT_TIME       5000 | #define WAIT_TIME       5000 | ||||||
| 
 | 
 | ||||||
| /* Globals */ | /* Globals */ | ||||||
|  | @ -704,6 +704,13 @@ static void md5_final(SUM_CONTEXT *ctx) | ||||||
| #undef X | #undef X | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | typedef void sum_init_t(SUM_CONTEXT *ctx); | ||||||
|  | typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len); | ||||||
|  | typedef void sum_final_t(SUM_CONTEXT *ctx); | ||||||
|  | sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init }; | ||||||
|  | sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write }; | ||||||
|  | sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final }; | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Checksum dialog callback |  * Checksum dialog callback | ||||||
|  */ |  */ | ||||||
|  | @ -771,53 +778,47 @@ INT_PTR CALLBACK ChecksumCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM | ||||||
| 	return (INT_PTR)FALSE; | 	return (INT_PTR)FALSE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| typedef void sum_init_t(SUM_CONTEXT *ctx); |  | ||||||
| typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len); |  | ||||||
| typedef void sum_final_t(SUM_CONTEXT *ctx); |  | ||||||
| sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init }; |  | ||||||
| sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write }; |  | ||||||
| sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final }; |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * We want the maximum speed we can get out of the checksum computation, |  * We want the maximum speed we can get out of the checksum computation, | ||||||
|  * so, if we have a multiprocessor/multithreaded machine, we'll assign of |  * so, if we have a multiprocessor/multithreaded machine, we try to assign | ||||||
|  * each of the individual checksum threads to a specific virtual core, and |  * each of the individual checksum threads to a different core. | ||||||
|  * assign the read thread to one of the remainder virtual cores. |  | ||||||
|  * To do just that, we need the following function call. |  * To do just that, we need the following function call. | ||||||
|  * Oh, and BOY is this thing sensitive to whether the first sum affinity |  | ||||||
|  * is on an even or odd virtual core! |  | ||||||
|  */ |  */ | ||||||
| BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity) | extern BOOL usb_debug;	// For uuprintf
 | ||||||
|  | BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity) | ||||||
| { | { | ||||||
| 	int i, pc; | 	int i, j, pc; | ||||||
| 	DWORD_PTR affinity, dummy; | 	DWORD_PTR affinity, dummy; | ||||||
| 
 | 
 | ||||||
| 	memset(checksum_affinity, 0, sizeof(CHECKSUM_AFFINITY)); | 	memset(thread_affinity, 0, 4 * sizeof(DWORD_PTR)); | ||||||
| 	if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy)) | 	if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy)) | ||||||
| 		return FALSE; | 		return FALSE; | ||||||
|  | 	uuprintf("\r\nChecksum affinities:"); | ||||||
|  | 	uuprintf("global:\t%s", printbitslz(affinity)); | ||||||
| 
 | 
 | ||||||
| 	// If we don't have enough virtual cores to evenly spread our load forget it
 | 	// If we don't have enough virtual cores to evenly spread our load forget it
 | ||||||
| 	pc = popcnt64(affinity); | 	pc = popcnt64(affinity); | ||||||
| 	if (pc < NUM_CHECKSUMS + 1) | 	if (pc < NUM_CHECKSUMS + 1) | ||||||
| 		return FALSE; | 		return FALSE; | ||||||
| 
 | 
 | ||||||
| 	// We'll use the NUM_CHECKSUMS least significant set bits in our mask for
 | 	// Spread the affinity as evenly as we can
 | ||||||
| 	// the individual checksum threads, and the remainder for the read thread.
 | 	thread_affinity[NUM_CHECKSUMS] = affinity; | ||||||
| 	// From an empirical perspective, this looks like the best "one-size-fits-all"
 |  | ||||||
| 	// to spread the load.
 |  | ||||||
| 	checksum_affinity->read_thread = affinity; |  | ||||||
| 	for (i = 0; i < NUM_CHECKSUMS; i++) { | 	for (i = 0; i < NUM_CHECKSUMS; i++) { | ||||||
| 		checksum_affinity->sum_thread[i] = affinity & (-1LL * affinity); | 		for (j = 0; j < pc / (NUM_CHECKSUMS + 1); j++) { | ||||||
| 		affinity ^= checksum_affinity->sum_thread[i]; | 			thread_affinity[i] |= affinity & (-1LL * affinity); | ||||||
| 		checksum_affinity->read_thread ^= checksum_affinity->sum_thread[i]; | 			affinity ^= affinity & (-1LL * affinity); | ||||||
| 		} | 		} | ||||||
|  | 		uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i])); | ||||||
|  | 		thread_affinity[NUM_CHECKSUMS] ^= thread_affinity[i]; | ||||||
|  | 	} | ||||||
|  | 	uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i])); | ||||||
| 	return TRUE; | 	return TRUE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // Individual thread that computes one of MD5, SHA1 or SHA256 in parallel
 | // Individual thread that computes one of MD5, SHA1 or SHA256 in parallel
 | ||||||
| DWORD WINAPI IndividualSumThread(void* param) | DWORD WINAPI IndividualSumThread(void* param) | ||||||
| { | { | ||||||
| 	SUM_CONTEXT sum_ctx; | 	SUM_CONTEXT sum_ctx = { 0 }; // There's a memset in sum_init, but static analyzers still bug us
 | ||||||
| 	int i = (int)(uintptr_t)param, j; | 	int i = (int)(uintptr_t)param, j; | ||||||
| 
 | 
 | ||||||
| 	sum_init[i](&sum_ctx); | 	sum_init[i](&sum_ctx); | ||||||
|  | @ -850,24 +851,28 @@ error: | ||||||
| 
 | 
 | ||||||
| DWORD WINAPI SumThread(void* param) | DWORD WINAPI SumThread(void* param) | ||||||
| { | { | ||||||
| 	CHECKSUM_AFFINITY* checksum_affinity = (CHECKSUM_AFFINITY*)param; | 	DWORD_PTR* thread_affinity = (DWORD_PTR*)param; | ||||||
| 	HANDLE sum_thread[NUM_CHECKSUMS] = { NULL, NULL, NULL }; | 	HANDLE sum_thread[NUM_CHECKSUMS] = { NULL, NULL, NULL }; | ||||||
| 	HANDLE h = INVALID_HANDLE_VALUE; | 	HANDLE h = INVALID_HANDLE_VALUE; | ||||||
| 	uint64_t rb, LastRefresh = 0; | 	uint64_t rb, LastRefresh = 0; | ||||||
| 	int i, _bufnum, r = -1; | 	int i, _bufnum, r = -1; | ||||||
| 	float format_percent = 0.0f; | 	float format_percent = 0.0f; | ||||||
| 
 | 
 | ||||||
| 	if ((image_path == NULL) || (checksum_affinity == NULL)) | 	if ((image_path == NULL) || (thread_affinity == NULL)) | ||||||
| 		goto out; | 		goto out; | ||||||
| 
 | 
 | ||||||
| 	uprintf("\r\nComputing checksum for '%s'...", image_path); | 	uprintf("\r\nComputing checksum for '%s'...", image_path); | ||||||
| 
 | 
 | ||||||
| 	if (checksum_affinity->read_thread != 0) | 	if (thread_affinity[0] != 0) | ||||||
| 		SetThreadAffinityMask(GetCurrentThread(), checksum_affinity->read_thread); | 		// Use the first affinity mask, as our read thread is the least
 | ||||||
|  | 		// CPU intensive (mostly waits on disk I/O or on the other threads)
 | ||||||
|  | 		// whereas the OS is likely to requisition the first Core, which
 | ||||||
|  | 		// is usually in this first mask, for other tasks.
 | ||||||
|  | 		SetThreadAffinityMask(GetCurrentThread(), thread_affinity[0]); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < NUM_CHECKSUMS; i++) { | 	for (i = 0; i < NUM_CHECKSUMS; i++) { | ||||||
| 		// NB: Can't use a single manual-reset event for data_ready as we
 | 		// NB: Can't use a single manual-reset event for data_ready as we
 | ||||||
| 		// wouldn't be able to ensure the event is reset before the threa
 | 		// wouldn't be able to ensure the event is reset before the thread
 | ||||||
| 		// gets into its next wait loop
 | 		// gets into its next wait loop
 | ||||||
| 		data_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL); | 		data_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL); | ||||||
| 		thread_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL); | 		thread_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL); | ||||||
|  | @ -880,8 +885,8 @@ DWORD WINAPI SumThread(void* param) | ||||||
| 			uprintf("Unable to start checksum thread #%d", i); | 			uprintf("Unable to start checksum thread #%d", i); | ||||||
| 			goto out; | 			goto out; | ||||||
| 		} | 		} | ||||||
| 		if (checksum_affinity->sum_thread[i] != 0) | 		if (thread_affinity[i+1] != 0) | ||||||
| 			SetThreadAffinityMask(sum_thread[i], checksum_affinity->sum_thread[i]); | 			SetThreadAffinityMask(sum_thread[i], thread_affinity[i+1]); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL, | 	h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL, | ||||||
|  |  | ||||||
|  | @ -2045,7 +2045,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA | ||||||
| 	static ULONG ulRegister = 0; | 	static ULONG ulRegister = 0; | ||||||
| 	static LPITEMIDLIST pidlDesktop = NULL; | 	static LPITEMIDLIST pidlDesktop = NULL; | ||||||
| 	static MY_SHChangeNotifyEntry NotifyEntry; | 	static MY_SHChangeNotifyEntry NotifyEntry; | ||||||
| 	static CHECKSUM_AFFINITY checksum_affinity; | 	static DWORD_PTR sumthread_affinity[4]; | ||||||
| 	DRAWITEMSTRUCT* pDI; | 	DRAWITEMSTRUCT* pDI; | ||||||
| 	HDROP droppedFileInfo; | 	HDROP droppedFileInfo; | ||||||
| 	POINT Point; | 	POINT Point; | ||||||
|  | @ -2530,8 +2530,8 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA | ||||||
| 				// Disable all controls except cancel
 | 				// Disable all controls except cancel
 | ||||||
| 				EnableControls(FALSE); | 				EnableControls(FALSE); | ||||||
| 				InitProgress(FALSE); | 				InitProgress(FALSE); | ||||||
| 				SetChecksumAffinity(&checksum_affinity); | 				SetChecksumAffinity(sumthread_affinity); | ||||||
| 				format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)&checksum_affinity, 0, NULL); | 				format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)sumthread_affinity, 0, NULL); | ||||||
| 				if (format_thid != NULL) { | 				if (format_thid != NULL) { | ||||||
| 					PrintInfo(0, -1); | 					PrintInfo(0, -1); | ||||||
| 					timer = 0; | 					timer = 0; | ||||||
|  |  | ||||||
							
								
								
									
										10
									
								
								src/rufus.h
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								src/rufus.h
									
										
									
									
									
								
							|  | @ -291,11 +291,6 @@ typedef struct { | ||||||
| 	char* path; | 	char* path; | ||||||
| } VHD_SAVE; | } VHD_SAVE; | ||||||
| 
 | 
 | ||||||
| typedef struct { |  | ||||||
| 	DWORD_PTR read_thread; |  | ||||||
| 	DWORD_PTR sum_thread[NUM_CHECKSUMS]; |  | ||||||
| } CHECKSUM_AFFINITY; |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Structure and macros used for the extensions specification of FileDialog() |  * Structure and macros used for the extensions specification of FileDialog() | ||||||
|  * You can use: |  * You can use: | ||||||
|  | @ -446,7 +441,10 @@ extern LONG ValidateSignature(HWND hDlg, const char* path); | ||||||
| extern BOOL IsFontAvailable(const char* font_name); | extern BOOL IsFontAvailable(const char* font_name); | ||||||
| extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite, | extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite, | ||||||
| 	LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries); | 	LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries); | ||||||
| extern BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity); | extern BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity); | ||||||
|  | #define printbits(x) _printbits(sizeof(x), &x, 0) | ||||||
|  | #define printbitslz(x) _printbits(sizeof(x), &x, 1) | ||||||
|  | extern char* _printbits(size_t const size, void const * const ptr, int leading_zeroes); | ||||||
| 
 | 
 | ||||||
| DWORD WINAPI FormatThread(void* param); | DWORD WINAPI FormatThread(void* param); | ||||||
| DWORD WINAPI SaveImageThread(void* param); | DWORD WINAPI SaveImageThread(void* param); | ||||||
|  |  | ||||||
							
								
								
									
										10
									
								
								src/rufus.rc
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								src/rufus.rc
									
										
									
									
									
								
							|  | @ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL | ||||||
| IDD_DIALOG DIALOGEX 12, 12, 242, 376 | IDD_DIALOG DIALOGEX 12, 12, 242, 376 | ||||||
| STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | ||||||
| EXSTYLE WS_EX_ACCEPTFILES | EXSTYLE WS_EX_ACCEPTFILES | ||||||
| CAPTION "Rufus 2.8.871" | CAPTION "Rufus 2.8.872" | ||||||
| FONT 8, "Segoe UI Symbol", 400, 0, 0x0 | FONT 8, "Segoe UI Symbol", 400, 0, 0x0 | ||||||
| BEGIN | BEGIN | ||||||
|     LTEXT           "Device",IDS_DEVICE_TXT,9,6,200,8 |     LTEXT           "Device",IDS_DEVICE_TXT,9,6,200,8 | ||||||
|  | @ -320,8 +320,8 @@ END | ||||||
| // | // | ||||||
| 
 | 
 | ||||||
| VS_VERSION_INFO VERSIONINFO | VS_VERSION_INFO VERSIONINFO | ||||||
|  FILEVERSION 2,8,871,0 |  FILEVERSION 2,8,872,0 | ||||||
|  PRODUCTVERSION 2,8,871,0 |  PRODUCTVERSION 2,8,872,0 | ||||||
|  FILEFLAGSMASK 0x3fL |  FILEFLAGSMASK 0x3fL | ||||||
| #ifdef _DEBUG | #ifdef _DEBUG | ||||||
|  FILEFLAGS 0x1L |  FILEFLAGS 0x1L | ||||||
|  | @ -338,13 +338,13 @@ BEGIN | ||||||
|         BEGIN |         BEGIN | ||||||
|             VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" |             VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" | ||||||
|             VALUE "FileDescription", "Rufus" |             VALUE "FileDescription", "Rufus" | ||||||
|             VALUE "FileVersion", "2.8.871" |             VALUE "FileVersion", "2.8.872" | ||||||
|             VALUE "InternalName", "Rufus" |             VALUE "InternalName", "Rufus" | ||||||
|             VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" |             VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" | ||||||
|             VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" |             VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" | ||||||
|             VALUE "OriginalFilename", "rufus.exe" |             VALUE "OriginalFilename", "rufus.exe" | ||||||
|             VALUE "ProductName", "Rufus" |             VALUE "ProductName", "Rufus" | ||||||
|             VALUE "ProductVersion", "2.8.871" |             VALUE "ProductVersion", "2.8.872" | ||||||
|         END |         END | ||||||
|     END |     END | ||||||
|     BLOCK "VarFileInfo" |     BLOCK "VarFileInfo" | ||||||
|  |  | ||||||
							
								
								
									
										28
									
								
								src/stdio.c
									
										
									
									
									
								
							
							
						
						
									
										28
									
								
								src/stdio.c
									
										
									
									
									
								
							|  | @ -74,6 +74,34 @@ void _uprintf(const char *format, ...) | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | // Prints a bitstring of a number of any size, with or without leading zeroes.
 | ||||||
|  | // See also the printbits() and printbitslz() helper macros in rufus.h
 | ||||||
|  | char *_printbits(size_t const size, void const * const ptr, int leading_zeroes) | ||||||
|  | { | ||||||
|  | 	// sizeof(uintmax_t) so that we have enough space to store whatever is thrown at us
 | ||||||
|  | 	static char str[sizeof(uintmax_t) * 8 + 3]; | ||||||
|  | 	size_t i; | ||||||
|  | 	uint8_t* b = (uint8_t*)ptr; | ||||||
|  | 	uintmax_t mask, lzmask = 0, val = 0; | ||||||
|  | 
 | ||||||
|  | 	// Little endian, the SCOURGE of any rational computing
 | ||||||
|  | 	for (i = 0; i < size; i++) | ||||||
|  | 		val |= ((uintmax_t)b[i]) << (8 * i); | ||||||
|  | 
 | ||||||
|  | 	str[0] = '0'; | ||||||
|  | 	str[1] = 'b'; | ||||||
|  | 	if (leading_zeroes) | ||||||
|  | 		lzmask = 1ULL << (size * 8 - 1); | ||||||
|  | 	for (i = 2, mask = 1ULL << (sizeof(uintmax_t) * 8 - 1); mask != 0; mask >>= 1) { | ||||||
|  | 		if ((i > 2) || (lzmask & mask)) | ||||||
|  | 			str[i++] = (val & mask) ? '1' : '0'; | ||||||
|  | 		else if (val & mask) | ||||||
|  | 			str[i++] = '1'; | ||||||
|  | 	} | ||||||
|  | 	str[i] = '\0'; | ||||||
|  | 	return str; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void DumpBufferHex(void *buf, size_t size) | void DumpBufferHex(void *buf, size_t size) | ||||||
| { | { | ||||||
| 	unsigned char* buffer = (unsigned char*)buf; | 	unsigned char* buffer = (unsigned char*)buf; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue