[checksum] improve performance by switching to async I/O

Yes!!! We are finally *much* faster than 7-zip for SHA-256, even though
we are also computing MD5 and SHA-1 in parallel. Here are some averaged
comparative results, against the 5.71 GB Win10_20H2_EnglishInternational_x64.iso
(SHA-256 = 08535b6dd0a4311f562e301c3c344b4aefd2e69a82168426b9971d6f8cab35e1):
* Windows' PowerShell Get-FileHash: 48s
* 7-zip's SHA-256                 : 31s
* Rufus (64-bit release version)  : 23s
This commit is contained in:
Pete Batard 2021-02-03 11:49:57 +00:00
parent 53b014781d
commit d4db16a9ca
No known key found for this signature in database
GPG Key ID: 38E0CF5E69EDD671
5 changed files with 219 additions and 47 deletions

View File

@ -388,6 +388,7 @@
<ClInclude Include="..\src\dev.h" />
<ClInclude Include="..\src\ui.h" />
<ClInclude Include="..\src\ui_data.h" />
<ClInclude Include="..\src\winio.h" />
</ItemGroup>
<ItemGroup>
<Manifest Include="..\src\rufus.manifest" />

View File

@ -167,6 +167,9 @@
<ClInclude Include="..\src\gpt_types.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\winio.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\res\rufus.ico">

View File

@ -5,7 +5,7 @@
* Copyright © 2004-2019 Tom St Denis
* Copyright © 2004 g10 Code GmbH
* Copyright © 2002-2015 Wei Dai & Igor Pavlov
* Copyright © 2015-2020 Pete Batard <pete@akeo.ie>
* Copyright © 2015-2021 Pete Batard <pete@akeo.ie>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -62,6 +62,7 @@
#include "db.h"
#include "rufus.h"
#include "winio.h"
#include "missing.h"
#include "resource.h"
#include "msapi_utf8.h"
@ -86,13 +87,17 @@
#define SHA512_HASHSIZE 64
#define MAX_HASHSIZE SHA512_HASHSIZE
/* Number of buffers we work with */
#define NUM_BUFFERS 3 // 2 + 1 as a mere double buffered async I/O
// would modify the buffer being processed.
/* Globals */
char sum_str[CHECKSUM_MAX][150];
uint32_t bufnum, sum_count[CHECKSUM_MAX] = { MD5_HASHSIZE, SHA1_HASHSIZE, SHA256_HASHSIZE, SHA512_HASHSIZE };
uint32_t proc_bufnum, sum_count[CHECKSUM_MAX] = { MD5_HASHSIZE, SHA1_HASHSIZE, SHA256_HASHSIZE, SHA512_HASHSIZE };
HANDLE data_ready[CHECKSUM_MAX] = { 0 }, thread_ready[CHECKSUM_MAX] = { 0 };
DWORD read_size[2];
DWORD read_size[NUM_BUFFERS];
BOOL enable_extra_hashes = FALSE;
uint8_t ALIGNED(64) buffer[2][BUFFER_SIZE];
uint8_t ALIGNED(64) buffer[NUM_BUFFERS][BUFFER_SIZE];
extern int default_thread_priority;
/*
@ -1095,8 +1100,8 @@ DWORD WINAPI IndividualSumThread(void* param)
uprintf("Failed to wait for event for checksum thread #%d: %s", i, WindowsErrorString());
return 1;
}
if (read_size[bufnum] != 0) {
sum_write[i](&sum_ctx, buffer[bufnum], (size_t)read_size[bufnum]);
if (read_size[proc_bufnum] != 0) {
sum_write[i](&sum_ctx, buffer[proc_bufnum], (size_t)read_size[proc_bufnum]);
if (!SetEvent(thread_ready[i]))
goto error;
} else {
@ -1121,9 +1126,10 @@ DWORD WINAPI SumThread(void* param)
{
DWORD_PTR* thread_affinity = (DWORD_PTR*)param;
HANDLE sum_thread[CHECKSUM_MAX] = { NULL, NULL, NULL, NULL };
HANDLE h = INVALID_HANDLE_VALUE;
uint64_t rb;
int i, _bufnum, r = -1;
DWORD wr;
VOID* fd = NULL;
uint64_t processed_bytes;
int i, read_bufnum, r = -1;
int num_checksums = CHECKSUM_MAX - (enable_extra_hashes ? 0 : 1);
if ((image_path == NULL) || (thread_affinity == NULL))
@ -1158,53 +1164,60 @@ DWORD WINAPI SumThread(void* param)
SetThreadAffinityMask(sum_thread[i], thread_affinity[i+1]);
}
h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if (h == INVALID_HANDLE_VALUE) {
fd = CreateFileAsync(image_path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN);
if (fd == NULL) {
uprintf("Could not open file: %s", WindowsErrorString());
FormatStatus = ERROR_SEVERITY_ERROR | FAC(FACILITY_STORAGE) | ERROR_OPEN_FAILED;
goto out;
}
bufnum = 0;
_bufnum = 0;
read_size[0] = 1; // Don't trigger the first loop break
read_bufnum = 0;
proc_bufnum = 1;
read_size[proc_bufnum] = 1; // To avoid early loop exit
UpdateProgressWithInfoInit(hMainDialog, FALSE);
for (rb = 0; ;rb += read_size[_bufnum]) {
// Update the progress and check for cancel
UpdateProgressWithInfo(OP_NOOP_WITH_TASKBAR, MSG_271, rb, img_report.image_size);
// Start the initial read
ReadFileAsync(fd, buffer[read_bufnum], BUFFER_SIZE);
for (processed_bytes = 0; read_size[proc_bufnum] != 0; processed_bytes += read_size[proc_bufnum]) {
// 0. Update the progress and check for cancel
UpdateProgressWithInfo(OP_NOOP_WITH_TASKBAR, MSG_271, processed_bytes, img_report.image_size);
CHECK_FOR_USER_CANCEL;
// Signal the threads that we have data to process
if (rb != 0) {
bufnum = _bufnum;
// Toggle the read buffer
_bufnum = (bufnum + 1) % 2;
// Signal the waiting threads
for (i = 0; i < num_checksums; i++) {
if (!SetEvent(data_ready[i])) {
uprintf("Could not signal checksum thread %d: %s", i, WindowsErrorString());
goto out;
}
}
}
// Break the loop when data has been exhausted
if (read_size[bufnum] == 0)
break;
// Read data (double buffered)
if (!ReadFile(h, buffer[_bufnum], BUFFER_SIZE, &read_size[_bufnum], NULL)) {
FormatStatus = ERROR_SEVERITY_ERROR | FAC(FACILITY_STORAGE) | ERROR_READ_FAULT;
// 1. Wait for the current read operation to complete (and update the read size)
if ((!WaitFileAsync(fd, DRIVE_ACCESS_TIMEOUT)) ||
(!GetSizeAsync(fd, &read_size[read_bufnum]))) {
uprintf("Read error: %s", WindowsErrorString());
FormatStatus = ERROR_SEVERITY_ERROR | FAC(FACILITY_STORAGE) | ERROR_READ_FAULT;
goto out;
}
// Wait for the thread to signal they are ready to process data
if (WaitForMultipleObjects(num_checksums, thread_ready, TRUE, WAIT_TIME) != WAIT_OBJECT_0) {
// 2. Switch to the next reading buffer
read_bufnum = (read_bufnum + 1) % NUM_BUFFERS;
// 3. Launch the next asynchronous read operation
ReadFileAsync(fd, buffer[read_bufnum], BUFFER_SIZE);
// 4. Wait for all the sum threads to indicate that they are ready to process data
wr = WaitForMultipleObjects(num_checksums, thread_ready, TRUE, WAIT_TIME);
if (wr != WAIT_OBJECT_0) {
if (wr == STATUS_TIMEOUT)
SetLastError(ERROR_TIMEOUT);
uprintf("Checksum threads failed to signal: %s", WindowsErrorString());
goto out;
}
// 5. Set the target buffer we want to process to the buffer we just read data into
// Note that this variable should only be updated AFTER all the threads have signalled.
proc_bufnum = (read_bufnum + NUM_BUFFERS - 1) % NUM_BUFFERS;
// 6. Signal the waiting threads that there is data available
for (i = 0; i < num_checksums; i++) {
if (!SetEvent(data_ready[i])) {
uprintf("Could not signal checksum thread %d: %s", i, WindowsErrorString());
goto out;
}
}
}
// Our last event with read_size=0 signaled the threads to exit - wait for that to happen
@ -1232,7 +1245,7 @@ out:
safe_closehandle(data_ready[i]);
safe_closehandle(thread_ready[i]);
}
safe_closehandle(h);
CloseFileAsync(fd);
PostMessage(hMainDialog, UM_FORMAT_COMPLETED, (WPARAM)FALSE, 0);
if (r == 0)
MyDialogBox(hMainInstance, IDD_CHECKSUM, hMainDialog, ChecksumCallback);

View File

@ -35,7 +35,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
IDD_DIALOG DIALOGEX 12, 12, 232, 326
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
EXSTYLE WS_EX_ACCEPTFILES
CAPTION "Rufus 3.14.1735"
CAPTION "Rufus 3.14.1736"
FONT 9, "Segoe UI Symbol", 400, 0, 0x0
BEGIN
LTEXT "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP
@ -397,8 +397,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 3,14,1735,0
PRODUCTVERSION 3,14,1735,0
FILEVERSION 3,14,1736,0
PRODUCTVERSION 3,14,1736,0
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -416,13 +416,13 @@ BEGIN
VALUE "Comments", "https://rufus.ie"
VALUE "CompanyName", "Akeo Consulting"
VALUE "FileDescription", "Rufus"
VALUE "FileVersion", "3.14.1735"
VALUE "FileVersion", "3.14.1736"
VALUE "InternalName", "Rufus"
VALUE "LegalCopyright", "© 2011-2021 Pete Batard (GPL v3)"
VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html"
VALUE "OriginalFilename", "rufus-3.14.exe"
VALUE "ProductName", "Rufus"
VALUE "ProductVersion", "3.14.1735"
VALUE "ProductVersion", "3.14.1736"
END
END
BLOCK "VarFileInfo"

155
src/winio.h Normal file
View File

@ -0,0 +1,155 @@
/*
* Rufus: The Reliable USB Formatting Utility
* Windows I/O redefinitions, that would be totally unnecessary had
* Microsoft done a proper job with their asynchronous APIs.
* Copyright © 2021 Pete Batard <pete@akeo.ie>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <windows.h>
#include "msapi_utf8.h"
#pragma once
// https://docs.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-overlapped
// See Microsoft? It's not THAT hard to define an OVERLAPPED struct in a manner that
// doesn't qualify as an example of "Crimes against humanity" for the Geneva convention.
typedef struct {
ULONG_PTR Internal[2];
ULONG64 Offset;
HANDLE hEvent;
BOOL bOffsetUpdated;
} NOW_THATS_WHAT_I_CALL_AN_OVERLAPPED;
// File Descriptor for asynchronous accesses.
// The status field is a threestate value reflecting the result
// of the current asynchronous read operation:
// 1: Read was successful and completed synchronously
// -1: Read is pending asynchronously
// 0: Read Error
typedef struct {
HANDLE hFile;
INT iStatus;
NOW_THATS_WHAT_I_CALL_AN_OVERLAPPED Overlapped;
} ASYNC_FD;
/// <summary>
/// Open a file for asynchronous access. The values for the flags are the same as the ones
/// for the native CreateFile() call. Note that FILE_FLAG_OVERLAPPED will always be added
/// to dwFlagsAndAttributes before the file is instantiated, and that an internal
/// OVERLAPPED structure with its associated wait event is also created.
/// </summary>
/// <param name="lpFileName">The name of the file or device to be created or opened</param>
/// <param name="dwDesiredAccess">The requested access to the file or device</param>
/// <param name="dwShareMode">The requested sharing mode of the file or device</param>
/// <param name="dwCreationDisposition">Action to take on a file or device that exists or does not exist</param>
/// <param name="dwFlagsAndAttributes">The file or device attributes and flags</param>
/// <returns>Non NULL on success</returns>
static __inline VOID* CreateFileAsync(LPCSTR lpFileName, DWORD dwDesiredAccess,
DWORD dwShareMode, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes)
{
ASYNC_FD* fd = calloc(sizeof(ASYNC_FD), 1);
if (fd == NULL) {
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
return NULL;
}
fd->Overlapped.hEvent = CreateEventA(NULL, TRUE, FALSE, NULL);
fd->hFile = CreateFileU(lpFileName, dwDesiredAccess, dwShareMode, NULL,
dwCreationDisposition, FILE_FLAG_OVERLAPPED | dwFlagsAndAttributes, NULL);
if (fd->hFile == INVALID_HANDLE_VALUE) {
CloseHandle(fd->Overlapped.hEvent);
free(fd);
return NULL;
}
return fd;
}
/// <summary>
/// Close a previously opened asynchronous file
/// </summary>
/// <param name="fd">The file descriptor</param>
static __inline VOID CloseFileAsync(VOID* fd)
{
ASYNC_FD* _fd = (ASYNC_FD*)fd;
if (_fd == NULL)
return;
CloseHandle(_fd->hFile);
CloseHandle(_fd->Overlapped.hEvent);
free(_fd);
}
/// <summary>
/// Initiate a read operation for asynchronous I/O.
/// </summary>
/// <param name="fd">The file descriptor</param>
/// <param name="lpBuffer">The buffer that receives the data</param>
/// <param name="nNumberOfBytesToRead">Number of bytes requested</param>
/// <returns>TRUE on success, FALSE on error</returns>
static __inline BOOL ReadFileAsync(VOID* fd, LPVOID lpBuffer, DWORD nNumberOfBytesToRead)
{
ASYNC_FD* _fd = (ASYNC_FD*)fd;
_fd->Overlapped.bOffsetUpdated = FALSE;
if (!ReadFile(_fd->hFile, lpBuffer, nNumberOfBytesToRead, NULL,
(OVERLAPPED*)&_fd->Overlapped))
// TODO: Is it possible to get ERROR_HANDLE_EOF here?
_fd->iStatus = (GetLastError() == ERROR_IO_PENDING) ? -1 : 0;
else
_fd->iStatus = 1;
return (_fd->iStatus != 0);
}
/// <summary>
/// Wait for an asynchronous operation to complete, with timeout.
/// This function also succeeds if the I/O already completed synchronously.
/// </summary>
/// <param name="fd">The file descriptor</param>
/// <param name="dwTimeout">A timeout value, in ms</param>
/// <returns>TRUE on success, FALSE on error</returns>
static __inline BOOL WaitFileAsync(VOID* fd, DWORD dwTimeout)
{
ASYNC_FD* _fd = (ASYNC_FD*)fd;
if (_fd->iStatus > 0) // Read completed synchronously
return TRUE;
return (WaitForSingleObject(_fd->Overlapped.hEvent, dwTimeout) == WAIT_OBJECT_0);
}
/// <summary>
/// Return the number of bytes read and keep track/update the current offset
/// for an asynchronous read operation.
/// </summary>
/// <param name="fd">The file descriptor</param>
/// <param name="lpNumberOfBytesRead">A pointer that receives the number of bytes read.</param>
/// <returns>TRUE on success, FALSE on error</returns>
static __inline BOOL GetSizeAsync(VOID* fd, LPDWORD lpNumberOfBytesRead)
{
ASYNC_FD* _fd = (ASYNC_FD*)fd;
// Previous call to ReadFileAsync() failed
if (_fd->iStatus == 0) {
*lpNumberOfBytesRead = 0;
return FALSE;
}
// Detect if we already read the size and updated the offset
if (_fd->Overlapped.bOffsetUpdated) {
SetLastError(ERROR_NO_MORE_ITEMS);
return FALSE;
}
// TODO: Use a timeout and call GetOverlappedResultEx() on Windows 8 and later
if (!GetOverlappedResult(_fd->hFile, (OVERLAPPED*)&_fd->Overlapped,
lpNumberOfBytesRead, (_fd->iStatus < 0)))
return (GetLastError() == ERROR_HANDLE_EOF);
_fd->Overlapped.Offset += *lpNumberOfBytesRead;
_fd->Overlapped.bOffsetUpdated = TRUE;
return TRUE;
}