mirror of
https://github.com/pbatard/rufus.git
synced 2024-08-14 23:57:05 +00:00
[checksum] use multiple threads and double buffering
* Can reduce the duration of checksum computations by about 1/3rd, if you have quad core CPU or better.
This commit is contained in:
parent
bab3453f4d
commit
e6d3653cac
5 changed files with 185 additions and 33 deletions
181
src/checksum.c
181
src/checksum.c
|
@ -53,14 +53,22 @@
|
|||
#include <windowsx.h>
|
||||
|
||||
#include "rufus.h"
|
||||
#include "missing.h"
|
||||
#include "resource.h"
|
||||
#include "msapi_utf8.h"
|
||||
#include "localization.h"
|
||||
|
||||
#undef BIG_ENDIAN_HOST
|
||||
|
||||
#define BUFFER_SIZE 4096
|
||||
#define WAIT_TIME 5000
|
||||
|
||||
/* Globals */
|
||||
char sum_str[3][65];
|
||||
char sum_str[NUM_CHECKSUMS][65];
|
||||
int bufnum, sum_count[NUM_CHECKSUMS] = { 16, 20, 32 };
|
||||
HANDLE data_ready[NUM_CHECKSUMS], thread_ready[NUM_CHECKSUMS];
|
||||
DWORD rSize[2];
|
||||
char buffer[2][BUFFER_SIZE];
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define ALIGNED(m) __attribute__ ((__aligned__(m)))
|
||||
|
@ -766,25 +774,116 @@ INT_PTR CALLBACK ChecksumCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM
|
|||
typedef void sum_init_t(SUM_CONTEXT *ctx);
|
||||
typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len);
|
||||
typedef void sum_final_t(SUM_CONTEXT *ctx);
|
||||
sum_init_t *sum_init[3] = { md5_init, sha1_init , sha256_init };
|
||||
sum_write_t *sum_write[3] = { md5_write, sha1_write , sha256_write };
|
||||
sum_final_t *sum_final[3] = { md5_final, sha1_final , sha256_final };
|
||||
int sum_count[3] = { 16, 20, 32 };
|
||||
sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init };
|
||||
sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write };
|
||||
sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final };
|
||||
|
||||
/*
|
||||
* We want the maximum speed we can get out of the checksum computation,
|
||||
* so, if we have a multiprocessor/multithreaded machine, we'll assign of
|
||||
* each of the individual checksum threads to a specific virtual core, and
|
||||
* assign the read thread to one of the remainder virtual cores.
|
||||
* To do just that, we need the following function call.
|
||||
* Oh, and BOY is this thing sensitive to whether the first sum affinity
|
||||
* is on an even or odd virtual core!
|
||||
*/
|
||||
BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity)
|
||||
{
|
||||
int i, pc;
|
||||
DWORD_PTR affinity, dummy;
|
||||
|
||||
memset(checksum_affinity, 0, sizeof(CHECKSUM_AFFINITY));
|
||||
if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy))
|
||||
return FALSE;
|
||||
|
||||
// If we don't have enough virtual cores to evenly spread our load forget it
|
||||
pc = popcnt64(affinity);
|
||||
if (pc < NUM_CHECKSUMS + 1)
|
||||
return FALSE;
|
||||
|
||||
// We'll use the NUM_CHECKSUMS least significant set bits in our mask for
|
||||
// the individual checksum threads, and the remainder for the read thread.
|
||||
// From an empirical perspective, this looks like the best "one-size-fits-all"
|
||||
// to spread the load.
|
||||
checksum_affinity->read_thread = affinity;
|
||||
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
||||
checksum_affinity->sum_thread[i] = affinity & (-1LL * affinity);
|
||||
affinity ^= checksum_affinity->sum_thread[i];
|
||||
checksum_affinity->read_thread ^= checksum_affinity->sum_thread[i];
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// Individual thread that computes one of MD5, SHA1 or SHA256 in parallel
|
||||
DWORD WINAPI IndividualSumThread(void* param)
|
||||
{
|
||||
SUM_CONTEXT sum_ctx;
|
||||
int i = (int)(uintptr_t)param, j;
|
||||
|
||||
sum_init[i](&sum_ctx);
|
||||
// Signal that we're ready to service requests
|
||||
if (!SetEvent(thread_ready[i]))
|
||||
goto error;
|
||||
|
||||
// Wait for requests
|
||||
while (1) {
|
||||
if (WaitForSingleObject(data_ready[i], WAIT_TIME) != WAIT_OBJECT_0) {
|
||||
uprintf("Failed to wait for event for checksum thread #%d: %s", i, WindowsErrorString());
|
||||
return 1;
|
||||
}
|
||||
if (rSize[bufnum] != 0) {
|
||||
sum_write[i](&sum_ctx, buffer[bufnum], (size_t)rSize[bufnum]);
|
||||
if (!SetEvent(thread_ready[i]))
|
||||
goto error;
|
||||
} else {
|
||||
sum_final[i](&sum_ctx);
|
||||
memset(&sum_str[i], 0, ARRAYSIZE(sum_str[i]));
|
||||
for (j = 0; j < sum_count[i]; j++)
|
||||
safe_sprintf(&sum_str[i][2 * j], ARRAYSIZE(sum_str[i]) - 2 * j, "%02x", sum_ctx.buf[j]);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
error:
|
||||
uprintf("Failed to set event for checksum thread #%d: %s", i, WindowsErrorString());
|
||||
return 1;
|
||||
}
|
||||
|
||||
DWORD WINAPI SumThread(void* param)
|
||||
{
|
||||
CHECKSUM_AFFINITY* checksum_affinity = (CHECKSUM_AFFINITY*)param;
|
||||
HANDLE sum_thread[NUM_CHECKSUMS] = { NULL, NULL, NULL };
|
||||
HANDLE h = INVALID_HANDLE_VALUE;
|
||||
DWORD rSize = 0;
|
||||
uint64_t rb, LastRefresh = 0;
|
||||
char buffer[4096];
|
||||
SUM_CONTEXT sum_ctx[3];
|
||||
int i, j, r = -1;
|
||||
int i, _bufnum, r = -1;
|
||||
float format_percent = 0.0f;
|
||||
|
||||
if (image_path == NULL)
|
||||
if ((image_path == NULL) || (checksum_affinity == NULL))
|
||||
goto out;
|
||||
|
||||
uprintf("\r\nComputing checksum for '%s'...", image_path);
|
||||
|
||||
if (checksum_affinity->read_thread != 0)
|
||||
SetThreadAffinityMask(GetCurrentThread(), checksum_affinity->read_thread);
|
||||
|
||||
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
||||
// NB: Can't use a single manual-reset event for data_ready as we
|
||||
// wouldn't be able to ensure the event is reset before the threa
|
||||
// gets into its next wait loop
|
||||
data_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
thread_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
if ((data_ready == NULL) || (thread_ready[i] == NULL)) {
|
||||
uprintf("Unable to create checksum thread event: %s", WindowsErrorString());
|
||||
goto out;
|
||||
}
|
||||
sum_thread[i] = CreateThread(NULL, 0, IndividualSumThread, (LPVOID)(uintptr_t)i, 0, NULL);
|
||||
if (sum_thread[i] == NULL) {
|
||||
uprintf("Unable to start checksum thread #%d", i);
|
||||
goto out;
|
||||
}
|
||||
if (checksum_affinity->sum_thread[i] != 0)
|
||||
SetThreadAffinityMask(sum_thread[i], checksum_affinity->sum_thread[i]);
|
||||
}
|
||||
|
||||
h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL,
|
||||
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
|
||||
if (h == INVALID_HANDLE_VALUE) {
|
||||
|
@ -793,42 +892,70 @@ DWORD WINAPI SumThread(void* param)
|
|||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAYSIZE(sum_init); i++)
|
||||
sum_init[i](&sum_ctx[i]);
|
||||
|
||||
for (rb = 0; ; rb += rSize) {
|
||||
bufnum = 0;
|
||||
_bufnum = 0;
|
||||
rSize[0] = 1; // Don't trigger the first loop break
|
||||
for (rb = 0; ;rb += rSize[_bufnum]) {
|
||||
// Update the progress and check for cancel
|
||||
if (_GetTickCount64() > LastRefresh + 25) {
|
||||
LastRefresh = _GetTickCount64();
|
||||
format_percent = (100.0f*rb) / (1.0f*img_report.projected_size);
|
||||
PrintInfo(0, MSG_271, format_percent);
|
||||
SendMessage(hProgress, PBM_SETPOS, (WPARAM)((format_percent/100.0f)*MAX_PROGRESS), 0);
|
||||
SendMessage(hProgress, PBM_SETPOS, (WPARAM)((format_percent / 100.0f)*MAX_PROGRESS), 0);
|
||||
SetTaskbarProgressValue(rb, img_report.projected_size);
|
||||
}
|
||||
CHECK_FOR_USER_CANCEL;
|
||||
if (!ReadFile(h, buffer, sizeof(buffer), &rSize, NULL)) {
|
||||
|
||||
// Signal the threads that we have data to process
|
||||
if (rb != 0) {
|
||||
bufnum = _bufnum;
|
||||
// Toggle the read buffer
|
||||
_bufnum = (bufnum + 1) % 2;
|
||||
// Signal the waiting threads
|
||||
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
||||
if (!SetEvent(data_ready[i])) {
|
||||
uprintf("Could not signal checksum thread %d: %s", i, WindowsErrorString());
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Break the loop when data has been exhausted
|
||||
if (rSize[bufnum] == 0)
|
||||
break;
|
||||
|
||||
// Read data (double buffered)
|
||||
if (!ReadFile(h, buffer[_bufnum], BUFFER_SIZE, &rSize[_bufnum], NULL)) {
|
||||
FormatStatus = ERROR_SEVERITY_ERROR | FAC(FACILITY_STORAGE) | ERROR_READ_FAULT;
|
||||
uprintf(" Read error: %s", WindowsErrorString());
|
||||
uprintf("Read error: %s", WindowsErrorString());
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Wait for the thread to signal they are ready to process data
|
||||
if (WaitForMultipleObjects(NUM_CHECKSUMS, thread_ready, TRUE, WAIT_TIME) != WAIT_OBJECT_0) {
|
||||
uprintf("Checksum threads failed to signal: %s", WindowsErrorString());
|
||||
goto out;
|
||||
}
|
||||
if (rSize == 0)
|
||||
break;
|
||||
for (i = 0; i < ARRAYSIZE(sum_init); i++)
|
||||
sum_write[i](&sum_ctx[i], buffer, (size_t)rSize);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAYSIZE(sum_init); i++) {
|
||||
memset(&sum_str[i], 0, ARRAYSIZE(sum_str[i]));
|
||||
sum_final[i](&sum_ctx[i]);
|
||||
for (j = 0; j < sum_count[i]; j++)
|
||||
safe_sprintf(&sum_str[i][2 * j], ARRAYSIZE(sum_str[i]) - 2 * j, "%02x", sum_ctx[i].buf[j]);
|
||||
// Our last event with rSize=0 signaled the threads to exit - wait for that to happen
|
||||
if (WaitForMultipleObjects(NUM_CHECKSUMS, sum_thread, TRUE, WAIT_TIME) != WAIT_OBJECT_0) {
|
||||
uprintf("Checksum threads did not finalize: %s", WindowsErrorString());
|
||||
goto out;
|
||||
}
|
||||
|
||||
uprintf(" MD5:\t %s", sum_str[0]);
|
||||
uprintf(" SHA1:\t %s", sum_str[1]);
|
||||
uprintf(" SHA256: %s", sum_str[2]);
|
||||
|
||||
r = 0;
|
||||
|
||||
out:
|
||||
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
||||
if (sum_thread[i] != NULL)
|
||||
TerminateThread(sum_thread[i], 1);
|
||||
CloseHandle(data_ready[i]);
|
||||
CloseHandle(thread_ready[i]);
|
||||
}
|
||||
safe_closehandle(h);
|
||||
PostMessage(hMainDialog, UM_FORMAT_COMPLETED, (WPARAM)FALSE, 0);
|
||||
if (r == 0)
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <commctrl.h>
|
||||
#include <shlobj.h>
|
||||
#include <wininet.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#pragma once
|
||||
|
||||
|
@ -44,6 +45,21 @@
|
|||
#define bswap_uint16 __builtin_bswap16
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Nibbled from https://github.com/hanji/popcnt/blob/master/populationcount.cpp
|
||||
* Since MSVC x86_32 does not have intrinsic popcount64 and I don't have all day
|
||||
*/
|
||||
static inline int popcnt64(register uint64_t u)
|
||||
{
|
||||
u = (u & 0x5555555555555555) + ((u >> 1) & 0x5555555555555555);
|
||||
u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
|
||||
u = (u & 0x0f0f0f0f0f0f0f0f) + ((u >> 4) & 0x0f0f0f0f0f0f0f0f);
|
||||
u = (u & 0x00ff00ff00ff00ff) + ((u >> 8) & 0x00ff00ff00ff00ff);
|
||||
u = (u & 0x0000ffff0000ffff) + ((u >> 16) & 0x0000ffff0000ffff);
|
||||
u = (u & 0x00000000ffffffff) + ((u >> 32) & 0x00000000ffffffff);
|
||||
return (int)u;
|
||||
}
|
||||
|
||||
static __inline void *_reallocf(void *ptr, size_t size) {
|
||||
void *ret = realloc(ptr, size);
|
||||
if (!ret)
|
||||
|
|
|
@ -2045,6 +2045,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA
|
|||
static ULONG ulRegister = 0;
|
||||
static LPITEMIDLIST pidlDesktop = NULL;
|
||||
static MY_SHChangeNotifyEntry NotifyEntry;
|
||||
static CHECKSUM_AFFINITY checksum_affinity;
|
||||
DRAWITEMSTRUCT* pDI;
|
||||
HDROP droppedFileInfo;
|
||||
POINT Point;
|
||||
|
@ -2529,7 +2530,8 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA
|
|||
// Disable all controls except cancel
|
||||
EnableControls(FALSE);
|
||||
InitProgress(FALSE);
|
||||
format_thid = CreateThread(NULL, 0, SumThread, NULL, 0, NULL);
|
||||
SetChecksumAffinity(&checksum_affinity);
|
||||
format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)&checksum_affinity, 0, NULL);
|
||||
if (format_thid != NULL) {
|
||||
PrintInfo(0, -1);
|
||||
timer = 0;
|
||||
|
|
|
@ -60,6 +60,7 @@
|
|||
#define MAX_SECTORS_TO_CLEAR 128 // nb sectors to zap when clearing the MBR/GPT (must be >34)
|
||||
#define MBR_UEFI_MARKER 0x49464555 // 'U', 'E', 'F', 'I', as a 32 bit little endian longword
|
||||
#define WRITE_RETRIES 3
|
||||
#define NUM_CHECKSUMS 3 // Number of checksum algorithms we support (MD5, SHA1, SHA256)
|
||||
#define FS_DEFAULT FS_FAT32
|
||||
#define SINGLE_CLUSTERSIZE_DEFAULT 0x00000100
|
||||
#define BADBLOCK_PATTERNS {0xaa, 0x55, 0xff, 0x00}
|
||||
|
@ -290,6 +291,11 @@ typedef struct {
|
|||
char* path;
|
||||
} VHD_SAVE;
|
||||
|
||||
typedef struct {
|
||||
DWORD_PTR read_thread;
|
||||
DWORD_PTR sum_thread[NUM_CHECKSUMS];
|
||||
} CHECKSUM_AFFINITY;
|
||||
|
||||
/*
|
||||
* Structure and macros used for the extensions specification of FileDialog()
|
||||
* You can use:
|
||||
|
@ -440,6 +446,7 @@ extern LONG ValidateSignature(HWND hDlg, const char* path);
|
|||
extern BOOL IsFontAvailable(const char* font_name);
|
||||
extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,
|
||||
LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries);
|
||||
extern BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity);
|
||||
|
||||
DWORD WINAPI FormatThread(void* param);
|
||||
DWORD WINAPI SaveImageThread(void* param);
|
||||
|
|
10
src/rufus.rc
10
src/rufus.rc
|
@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
|
|||
IDD_DIALOG DIALOGEX 12, 12, 242, 376
|
||||
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
|
||||
EXSTYLE WS_EX_ACCEPTFILES
|
||||
CAPTION "Rufus 2.8.870"
|
||||
CAPTION "Rufus 2.8.871"
|
||||
FONT 8, "Segoe UI Symbol", 400, 0, 0x0
|
||||
BEGIN
|
||||
LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8
|
||||
|
@ -320,8 +320,8 @@ END
|
|||
//
|
||||
|
||||
VS_VERSION_INFO VERSIONINFO
|
||||
FILEVERSION 2,8,870,0
|
||||
PRODUCTVERSION 2,8,870,0
|
||||
FILEVERSION 2,8,871,0
|
||||
PRODUCTVERSION 2,8,871,0
|
||||
FILEFLAGSMASK 0x3fL
|
||||
#ifdef _DEBUG
|
||||
FILEFLAGS 0x1L
|
||||
|
@ -338,13 +338,13 @@ BEGIN
|
|||
BEGIN
|
||||
VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)"
|
||||
VALUE "FileDescription", "Rufus"
|
||||
VALUE "FileVersion", "2.8.870"
|
||||
VALUE "FileVersion", "2.8.871"
|
||||
VALUE "InternalName", "Rufus"
|
||||
VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)"
|
||||
VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html"
|
||||
VALUE "OriginalFilename", "rufus.exe"
|
||||
VALUE "ProductName", "Rufus"
|
||||
VALUE "ProductVersion", "2.8.870"
|
||||
VALUE "ProductVersion", "2.8.871"
|
||||
END
|
||||
END
|
||||
BLOCK "VarFileInfo"
|
||||
|
|
Loading…
Reference in a new issue