diff --git a/.vs/rufus.vcxproj b/.vs/rufus.vcxproj index 37441ae6..2d2601a9 100644 --- a/.vs/rufus.vcxproj +++ b/.vs/rufus.vcxproj @@ -349,6 +349,7 @@ + @@ -380,6 +381,7 @@ + diff --git a/.vs/rufus.vcxproj.filters b/.vs/rufus.vcxproj.filters index 2e10c9d3..5a82626c 100644 --- a/.vs/rufus.vcxproj.filters +++ b/.vs/rufus.vcxproj.filters @@ -93,6 +93,9 @@ Source Files + + Source Files + @@ -191,6 +194,9 @@ Header Files + + Header Files + diff --git a/src/Makefile.am b/src/Makefile.am index a97bdb7b..68f0c10e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -14,8 +14,8 @@ AM_V_WINDRES = $(AM_V_WINDRES_$(V)) %_rc.o: %.rc ../res/loc/embedded.loc $(AM_V_WINDRES) $(AM_RCFLAGS) -i $< -o $@ -rufus_SOURCES = badblocks.c checksum.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c localization.c \ - net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c +rufus_SOURCES = badblocks.c checksum.c cpu.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c \ + localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \ -DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus rufus_LDFLAGS = $(AM_LDFLAGS) -mwindows -L ../.mingw diff --git a/src/Makefile.in b/src/Makefile.in index 32421112..2465d1af 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -88,7 +88,7 @@ CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = PROGRAMS = $(noinst_PROGRAMS) am_rufus_OBJECTS = rufus-badblocks.$(OBJEXT) rufus-checksum.$(OBJEXT) \ - rufus-dev.$(OBJEXT) rufus-dos.$(OBJEXT) \ + rufus-cpu.$(OBJEXT) rufus-dev.$(OBJEXT) rufus-dos.$(OBJEXT) \ rufus-dos_locale.$(OBJEXT) rufus-drive.$(OBJEXT) \ rufus-format.$(OBJEXT) rufus-format_ext.$(OBJEXT) \ rufus-format_fat32.$(OBJEXT) rufus-icon.$(OBJEXT) \ @@ -281,8 +281,8 @@ AM_V_WINDRES_0 = @echo " RC $@";$(WINDRES) AM_V_WINDRES_1 = $(WINDRES) AM_V_WINDRES_ = $(AM_V_WINDRES_$(AM_DEFAULT_VERBOSITY)) AM_V_WINDRES = $(AM_V_WINDRES_$(V)) -rufus_SOURCES = badblocks.c checksum.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c localization.c \ - net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c +rufus_SOURCES = badblocks.c checksum.c cpu.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c \ + localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \ -DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus @@ -357,6 +357,12 @@ rufus-checksum.o: checksum.c rufus-checksum.obj: checksum.c $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-checksum.obj `if test -f 'checksum.c'; then $(CYGPATH_W) 'checksum.c'; else $(CYGPATH_W) '$(srcdir)/checksum.c'; fi` +rufus-cpu.o: cpu.c + $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-cpu.o `test -f 'cpu.c' || echo '$(srcdir)/'`cpu.c + +rufus-cpu.obj: cpu.c + $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-cpu.obj `if test -f 'cpu.c'; then $(CYGPATH_W) 'cpu.c'; else $(CYGPATH_W) '$(srcdir)/cpu.c'; fi` + rufus-dev.o: dev.c $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-dev.o `test -f 'dev.c' || echo '$(srcdir)/'`dev.c diff --git a/src/checksum.c b/src/checksum.c index abca0332..fb7cac3e 100644 --- a/src/checksum.c +++ b/src/checksum.c @@ -6,6 +6,7 @@ * Copyright © 2004 g10 Code GmbH * Copyright © 2002-2015 Wei Dai & Igor Pavlov * Copyright © 2015-2021 Pete Batard + * Copyright © 2022 Jeffrey Walton * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,6 +29,8 @@ * * SHA-512 modified from LibTomCrypt - Public Domain * + * CPU accelerated SHA code taken from SHA-Intrinsics - Public Domain + * * MD5 code from various public domain sources sharing the following * copyright declaration: * @@ -61,6 +64,7 @@ #include #include "db.h" +#include "cpu.h" #include "rufus.h" #include "winio.h" #include "missing.h" @@ -68,6 +72,22 @@ #include "msapi_utf8.h" #include "localization.h" +/* Includes for SHA-1 and SHA-256 intrinsics */ +#if defined(CPU_X86_SHA1_ACCELERATION) || defined(CPU_X86_SHA256_ACCELERATION) +#if defined(_MSC_VER) +#include +#elif defined(__GNUC__) +#include +#include +#endif +#endif + +#if defined(_MSC_VER) +#define RUFUS_ENABLE_GCC_ARCH(arch) +#else +#define RUFUS_ENABLE_GCC_ARCH(arch) __attribute__ ((target (arch))) +#endif + #undef BIG_ENDIAN_HOST #define BUFFER_SIZE (64*KB) @@ -208,7 +228,7 @@ static void sha512_init(SUM_CONTEXT* ctx) } /* Transform the message X which consists of 16 32-bit-words (SHA-1) */ -static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) +static void sha1_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data) { uint32_t a, b, c, d, e, tm, x[16]; @@ -341,8 +361,235 @@ static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) ctx->state[4] += e; } +#ifdef CPU_X86_SHA1_ACCELERATION +/* + * Transform the message X which consists of 16 32-bit-words (SHA-1) + * The code is public domain taken from https://github.com/noloader/SHA-Intrinsics. + */ +RUFUS_ENABLE_GCC_ARCH("ssse3,sse4.1,sha") +static void sha1_transform_x86(uint64_t state64[5], const uint8_t *data, size_t length) +{ + __m128i ABCD, E0, E1; + __m128i MSG0, MSG1, MSG2, MSG3; + const __m128i MASK = _mm_set_epi64x(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); + + /* Rufus uses uint64_t for the state array. Pack it into uint32_t. */ + uint32_t state[5] = { + (uint32_t)state64[0], + (uint32_t)state64[1], + (uint32_t)state64[2], + (uint32_t)state64[3], + (uint32_t)state64[4] + }; + + /* Load initial values */ + ABCD = _mm_loadu_si128((const __m128i*) state); + E0 = _mm_set_epi32(state[4], 0, 0, 0); + ABCD = _mm_shuffle_epi32(ABCD, 0x1B); + + while (length >= SHA1_BLOCKSIZE) + { + /* Save current state */ + const __m128i ABCD_SAVE = ABCD; + const __m128i E0_SAVE = E0; + + /* Rounds 0-3 */ + MSG0 = _mm_loadu_si128((const __m128i*)(data + 0)); + MSG0 = _mm_shuffle_epi8(MSG0, MASK); + E0 = _mm_add_epi32(E0, MSG0); + E1 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); + + /* Rounds 4-7 */ + MSG1 = _mm_loadu_si128((const __m128i*)(data + 16)); + MSG1 = _mm_shuffle_epi8(MSG1, MASK); + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + + /* Rounds 8-11 */ + MSG2 = _mm_loadu_si128((const __m128i*)(data + 32)); + MSG2 = _mm_shuffle_epi8(MSG2, MASK); + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); + + /* Rounds 12-15 */ + MSG3 = _mm_loadu_si128((const __m128i*)(data + 48)); + MSG3 = _mm_shuffle_epi8(MSG3, MASK); + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); + + /* Rounds 16-19 */ + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); + + /* Rounds 20-23 */ + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + MSG3 = _mm_xor_si128(MSG3, MSG1); + + /* Rounds 24-27 */ + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); + + /* Rounds 28-31 */ + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); + + /* Rounds 32-35 */ + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); + + /* Rounds 36-39 */ + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + MSG3 = _mm_xor_si128(MSG3, MSG1); + + /* Rounds 40-43 */ + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); + + /* Rounds 44-47 */ + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); + + /* Rounds 48-51 */ + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); + + /* Rounds 52-55 */ + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); + MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); + MSG3 = _mm_xor_si128(MSG3, MSG1); + + /* Rounds 56-59 */ + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); + MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); + MSG0 = _mm_xor_si128(MSG0, MSG2); + + /* Rounds 60-63 */ + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); + MSG1 = _mm_xor_si128(MSG1, MSG3); + + /* Rounds 64-67 */ + E0 = _mm_sha1nexte_epu32(E0, MSG0); + E1 = ABCD; + MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); + MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); + MSG2 = _mm_xor_si128(MSG2, MSG0); + + /* Rounds 68-71 */ + E1 = _mm_sha1nexte_epu32(E1, MSG1); + E0 = ABCD; + MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + MSG3 = _mm_xor_si128(MSG3, MSG1); + + /* Rounds 72-75 */ + E0 = _mm_sha1nexte_epu32(E0, MSG2); + E1 = ABCD; + MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); + ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); + + /* Rounds 76-79 */ + E1 = _mm_sha1nexte_epu32(E1, MSG3); + E0 = ABCD; + ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); + + /* Combine state */ + E0 = _mm_sha1nexte_epu32(E0, E0_SAVE); + ABCD = _mm_add_epi32(ABCD, ABCD_SAVE); + + data += 64; + length -= 64; + } + + /* Save state */ + ABCD = _mm_shuffle_epi32(ABCD, 0x1B); + _mm_storeu_si128((__m128i*) state, ABCD); + state[4] = _mm_extract_epi32(E0, 3); + + /* Repack into uint64_t. */ + state64[0] = state[0]; + state64[1] = state[1]; + state64[2] = state[2]; + state64[3] = state[3]; + state64[4] = state[4]; +} +#endif /* CPU_X86_SHA1_ACCELERATION */ + +/* Transform the message X which consists of 16 32-bit-words (SHA-1) */ +static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) +{ +#ifdef CPU_X86_SHA1_ACCELERATION + if (cpu_has_sha1_accel) + { + /* SHA-1 acceleration using intrinsics */ + sha1_transform_x86(ctx->state, data, SHA1_BLOCKSIZE); + } + else +#endif + { + /* Portable C/C++ implementation */ + sha1_transform_cc(ctx, data); + } +} + /* Transform the message X which consists of 16 32-bit-words (SHA-256) */ -static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) +static __inline void sha256_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data) { uint32_t a, b, c, d, e, f, g, h, j, x[16]; @@ -415,6 +662,243 @@ static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) ctx->state[7] += h; } +#ifdef CPU_X86_SHA256_ACCELERATION +/* + * Transform the message X which consists of 16 32-bit-words (SHA-256) + * The code is public domain taken from https://github.com/noloader/SHA-Intrinsics. + */ +RUFUS_ENABLE_GCC_ARCH("ssse3,sse4.1,sha") +static __inline void sha256_transform_x86(uint64_t state64[8], const uint8_t *data, size_t length) +{ + __m128i STATE0, STATE1; + __m128i MSG, TMP; + __m128i MSG0, MSG1, MSG2, MSG3; + const __m128i MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL); + + /* Rufus uses uint64_t for the state array. Pack it into uint32_t. */ + uint32_t state[8] = { + (uint32_t)state64[0], + (uint32_t)state64[1], + (uint32_t)state64[2], + (uint32_t)state64[3], + (uint32_t)state64[4], + (uint32_t)state64[5], + (uint32_t)state64[6], + (uint32_t)state64[7] + }; + + /* Load initial values */ + TMP = _mm_loadu_si128((const __m128i*) (state+0)); + STATE1 = _mm_loadu_si128((const __m128i*) (state+4)); + + TMP = _mm_shuffle_epi32(TMP, 0xB1); /* CDAB */ + STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); /* EFGH */ + STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); /* ABEF */ + STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); /* CDGH */ + + while (length >= SHA256_BLOCKSIZE) + { + /* Save current state */ + const __m128i ABEF_SAVE = STATE0; + const __m128i CDGH_SAVE = STATE1; + + /* Rounds 0-3 */ + MSG = _mm_loadu_si128((const __m128i*) (data+0)); + MSG0 = _mm_shuffle_epi8(MSG, MASK); + MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0xE9B5DBA5B5C0FBCFULL, 0x71374491428A2F98ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + + /* Rounds 4-7 */ + MSG1 = _mm_loadu_si128((const __m128i*) (data+16)); + MSG1 = _mm_shuffle_epi8(MSG1, MASK); + MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0xAB1C5ED5923F82A4ULL, 0x59F111F13956C25BULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1); + + /* Rounds 8-11 */ + MSG2 = _mm_loadu_si128((const __m128i*) (data+32)); + MSG2 = _mm_shuffle_epi8(MSG2, MASK); + MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x550C7DC3243185BEULL, 0x12835B01D807AA98ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2); + + /* Rounds 12-15 */ + MSG3 = _mm_loadu_si128((const __m128i*) (data+48)); + MSG3 = _mm_shuffle_epi8(MSG3, MASK); + MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC19BF1749BDC06A7ULL, 0x80DEB1FE72BE5D74ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG3, MSG2, 4); + MSG0 = _mm_add_epi32(MSG0, TMP); + MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3); + + /* Rounds 16-19 */ + MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x240CA1CC0FC19DC6ULL, 0xEFBE4786E49B69C1ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG0, MSG3, 4); + MSG1 = _mm_add_epi32(MSG1, TMP); + MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0); + + /* Rounds 20-23 */ + MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x76F988DA5CB0A9DCULL, 0x4A7484AA2DE92C6FULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG1, MSG0, 4); + MSG2 = _mm_add_epi32(MSG2, TMP); + MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1); + + /* Rounds 24-27 */ + MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xBF597FC7B00327C8ULL, 0xA831C66D983E5152ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG2, MSG1, 4); + MSG3 = _mm_add_epi32(MSG3, TMP); + MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2); + + /* Rounds 28-31 */ + MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x1429296706CA6351ULL, 0xD5A79147C6E00BF3ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG3, MSG2, 4); + MSG0 = _mm_add_epi32(MSG0, TMP); + MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3); + + /* Rounds 32-35 */ + MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x53380D134D2C6DFCULL, 0x2E1B213827B70A85ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG0, MSG3, 4); + MSG1 = _mm_add_epi32(MSG1, TMP); + MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0); + + /* Rounds 36-39 */ + MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x92722C8581C2C92EULL, 0x766A0ABB650A7354ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG1, MSG0, 4); + MSG2 = _mm_add_epi32(MSG2, TMP); + MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1); + + /* Rounds 40-43 */ + MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xC76C51A3C24B8B70ULL, 0xA81A664BA2BFE8A1ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG2, MSG1, 4); + MSG3 = _mm_add_epi32(MSG3, TMP); + MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2); + + /* Rounds 44-47 */ + MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x106AA070F40E3585ULL, 0xD6990624D192E819ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG3, MSG2, 4); + MSG0 = _mm_add_epi32(MSG0, TMP); + MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3); + + /* Rounds 48-51 */ + MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x34B0BCB52748774CULL, 0x1E376C0819A4C116ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG0, MSG3, 4); + MSG1 = _mm_add_epi32(MSG1, TMP); + MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0); + + /* Rounds 52-55 */ + MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x682E6FF35B9CCA4FULL, 0x4ED8AA4A391C0CB3ULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG1, MSG0, 4); + MSG2 = _mm_add_epi32(MSG2, TMP); + MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + + /* Rounds 56-59 */ + MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x8CC7020884C87814ULL, 0x78A5636F748F82EEULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + TMP = _mm_alignr_epi8(MSG2, MSG1, 4); + MSG3 = _mm_add_epi32(MSG3, TMP); + MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + + /* Rounds 60-63 */ + MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC67178F2BEF9A3F7ULL, 0xA4506CEB90BEFFFAULL)); + STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); + MSG = _mm_shuffle_epi32(MSG, 0x0E); + STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); + + /* Combine state */ + STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE); + STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE); + + data += 64; + length -= 64; + } + + TMP = _mm_shuffle_epi32(STATE0, 0x1B); /* FEBA */ + STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); /* DCHG */ + STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); /* DCBA */ + STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); /* ABEF */ + + /* Save state */ + _mm_storeu_si128((__m128i*) (state+0), STATE0); + _mm_storeu_si128((__m128i*) (state+4), STATE1); + + /* Repack into uint64_t. */ + state64[0] = state[0]; + state64[1] = state[1]; + state64[2] = state[2]; + state64[3] = state[3]; + state64[4] = state[4]; + state64[5] = state[5]; + state64[6] = state[6]; + state64[7] = state[7]; +} +#endif /* CPU_X86_SHA256_ACCELERATION */ + +static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) +{ +#ifdef CPU_X86_SHA256_ACCELERATION + if (cpu_has_sha256_accel) + { + /* SHA-256 acceleration using intrinsics */ + sha256_transform_x86(ctx->state, data, SHA256_BLOCKSIZE); + } + else +#endif + { + /* Portable C/C++ implementation */ + sha256_transform_cc(ctx, data); + } +} + /* * Transform the message X which consists of 16 64-bit-words (SHA-512) * This is an algorithm that *REALLY* benefits from being executed as 64-bit @@ -618,12 +1102,29 @@ static void sha1_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len) len -= num; } - /* Process data in blocksize chunks */ - while (len >= SHA1_BLOCKSIZE) { - PREFETCH64(buf + SHA1_BLOCKSIZE); - sha1_transform(ctx, buf); - buf += SHA1_BLOCKSIZE; - len -= SHA1_BLOCKSIZE; +#ifdef CPU_X86_SHA1_ACCELERATION + if (cpu_has_sha1_accel) + { + /* Process all full blocks at once */ + if (len >= SHA1_BLOCKSIZE) { + /* Calculate full blocks, in bytes */ + num = (len / SHA1_BLOCKSIZE) * SHA1_BLOCKSIZE; + /* SHA-1 acceleration using intrinsics */ + sha1_transform_x86(ctx->state, buf, num); + buf += num; + len -= num; + } + } + else +#endif + { + /* Process data in blocksize chunks */ + while (len >= SHA1_BLOCKSIZE) { + PREFETCH64(buf + SHA1_BLOCKSIZE); + sha1_transform(ctx, buf); + buf += SHA1_BLOCKSIZE; + len -= SHA1_BLOCKSIZE; + } } /* Handle any remaining bytes of data. */ @@ -653,12 +1154,29 @@ static void sha256_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len) len -= num; } - /* Process data in blocksize chunks */ - while (len >= SHA256_BLOCKSIZE) { - PREFETCH64(buf + SHA256_BLOCKSIZE); - sha256_transform(ctx, buf); - buf += SHA256_BLOCKSIZE; - len -= SHA256_BLOCKSIZE; +#ifdef CPU_X86_SHA256_ACCELERATION + if (cpu_has_sha256_accel) + { + /* Process all full blocks at once */ + if (len >= SHA256_BLOCKSIZE) { + /* Calculate full blocks, in bytes */ + num = (len / SHA256_BLOCKSIZE) * SHA256_BLOCKSIZE; + /* SHA-256 acceleration using intrinsics */ + sha256_transform_x86(ctx->state, buf, num); + buf += num; + len -= num; + } + } + else +#endif + { + /* Process data in blocksize chunks */ + while (len >= SHA256_BLOCKSIZE) { + PREFETCH64(buf + SHA256_BLOCKSIZE); + sha256_transform(ctx, buf); + buf += SHA256_BLOCKSIZE; + len -= SHA256_BLOCKSIZE; + } } /* Handle any remaining bytes of data. */ @@ -1280,7 +1798,7 @@ BOOL IsFileInDB(const char* path) return FALSE; } -#if defined(_DEBUG) +#if defined(_DEBUG) || defined(TEST) || defined(ALPHA) /* Convert a lowercase hex string to binary. Returned value must be freed */ uint8_t* to_bin(const char* str) { @@ -1303,7 +1821,7 @@ uint8_t* to_bin(const char* str) return ret; } -const char* test_msg = "Did you ever hear the tragedy of Darth Plagueis The Wise? " +const char test_msg[] = "Did you ever hear the tragedy of Darth Plagueis The Wise? " "I thought not. It's not a story the Jedi would tell you. It's a Sith legend. " "Darth Plagueis was a Dark Lord of the Sith, so powerful and so wise he could " "use the Force to influence the midichlorians to create life... He had such a " @@ -1356,6 +1874,10 @@ int TestChecksum(void) if (msg == NULL) return -1; + /* Display accelerations available */ + uprintf("SHA1 acceleration: %s", (cpu_has_sha1_accel ? "TRUE" : "FALSE")); + uprintf("SHA256 acceleration: %s", (cpu_has_sha256_accel ? "TRUE" : "FALSE")); + for (j = 0; j < CHECKSUM_MAX; j++) { size_t copy_msg_len[4]; copy_msg_len[0] = 0; diff --git a/src/cpu.c b/src/cpu.c new file mode 100644 index 00000000..11f84915 --- /dev/null +++ b/src/cpu.c @@ -0,0 +1,123 @@ +/* + * Rufus: The Reliable USB Formatting Utility + * CPU features detection + * Copyright © 2022 Pete Batard + * Copyright © 2022 Jeffrey Walton + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "cpu.h" + +#if (defined(CPU_X86_SHA1_ACCELERATION) || defined(CPU_X86_SHA256_ACCELERATION)) +#if defined(RUFUS_MSC_VERSION) +#include +#elif (defined(RUFUS_GCC_VERSION) || defined(RUFUS_CLANG_VERSION)) +#include +#elif defined(RUFUS_INTEL_VERSION) +#include +#endif +#endif + +BOOL cpu_has_sha1_accel = FALSE; +BOOL cpu_has_sha256_accel = FALSE; + +/* + * Three elements must be in place to make a meaningful call to the + * DetectSHA###Acceleration() calls. First, the compiler must support + * the underlying intrinsics. Second, the platform must provide a + * cpuid() function. And third, the cpu must actually support the SHA-1 + * and SHA-256 instructions. + * + * If any of the conditions are not met, then DetectSHA###Acceleration() + * returns FALSE. + */ + +/* + * Detect if the processor supports SHA-1 acceleration. We only check for + * the three ISAs we need - SSSE3, SSE4.1 and SHA. We don't check for OS + * support or XSAVE because that's been enabled since Windows 2000. + */ +BOOL DetectSHA1Acceleration(void) +{ +#if defined(CPU_X86_SHA1_ACCELERATION) +#if defined(_MSC_VER) + uint32_t regs0[4] = {0,0,0,0}, regs1[4] = {0,0,0,0}, regs7[4] = {0,0,0,0}; + const uint32_t SSSE3_BIT = 1u << 9; /* Function 1, Bit 9 of ECX */ + const uint32_t SSE41_BIT = 1u << 19; /* Function 1, Bit 19 of ECX */ + const uint32_t SHA_BIT = 1u << 29; /* Function 7, Bit 29 of EBX */ + + __cpuid(regs0, 0); + const uint32_t highest = regs0[0]; /*EAX*/ + + if (highest >= 0x01) { + __cpuidex(regs1, 1, 0); + } + if (highest >= 0x07) { + __cpuidex(regs7, 7, 0); + } + + return (regs1[2] /*ECX*/ & SSSE3_BIT) && (regs1[2] /*ECX*/ & SSE41_BIT) && (regs7[1] /*EBX*/ & SHA_BIT) ? TRUE : FALSE; +#elif defined(__GNUC__) || defined(__clang__) + /* __builtin_cpu_supports available in GCC 4.8.1 and above */ + return __builtin_cpu_supports("ssse3") && __builtin_cpu_supports("sse4.1") && __builtin_cpu_supports("sha") ? TRUE : FALSE; +#elif defined(__INTEL_COMPILER) + /* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_may_i_use_cpu_feature */ + return _may_i_use_cpu_feature(_FEATURE_SSSE3|_FEATURE_SSE4_1|_FEATURE_SHA) ? TRUE : FALSE; +#else + return FALSE; +#endif +#else + return FALSE; +#endif +} + +/* + * Detect if the processor supports SHA-256 acceleration. We only check for + * the three ISAs we need - SSSE3, SSE4.1 and SHA. We don't check for OS + * support or XSAVE because that's been enabled since Windows 2000. + */ +BOOL DetectSHA256Acceleration(void) +{ +#if defined(CPU_X86_SHA256_ACCELERATION) +#if defined(_MSC_VER) + uint32_t regs0[4] = {0,0,0,0}, regs1[4] = {0,0,0,0}, regs7[4] = {0,0,0,0}; + const uint32_t SSSE3_BIT = 1u << 9; /* Function 1, Bit 9 of ECX */ + const uint32_t SSE41_BIT = 1u << 19; /* Function 1, Bit 19 of ECX */ + const uint32_t SHA_BIT = 1u << 29; /* Function 7, Bit 29 of EBX */ + + __cpuid(regs0, 0); + const uint32_t highest = regs0[0]; /*EAX*/ + + if (highest >= 0x01) { + __cpuidex(regs1, 1, 0); + } + if (highest >= 0x07) { + __cpuidex(regs7, 7, 0); + } + + return (regs1[2] /*ECX*/ & SSSE3_BIT) && (regs1[2] /*ECX*/ & SSE41_BIT) && (regs7[1] /*EBX*/ & SHA_BIT) ? TRUE : FALSE; +#elif defined(__GNUC__) || defined(__clang__) + /* __builtin_cpu_supports available in GCC 4.8.1 and above */ + return __builtin_cpu_supports("ssse3") && __builtin_cpu_supports("sse4.1") && __builtin_cpu_supports("sha") ? TRUE : FALSE; +#elif defined(__INTEL_COMPILER) + /* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_may_i_use_cpu_feature */ + return _may_i_use_cpu_feature(_FEATURE_SSSE3|_FEATURE_SSE4_1|_FEATURE_SHA) ? TRUE : FALSE; +#else + return FALSE; +#endif +#else + return FALSE; +#endif +} diff --git a/src/cpu.h b/src/cpu.h new file mode 100644 index 00000000..077f19fd --- /dev/null +++ b/src/cpu.h @@ -0,0 +1,79 @@ +/* + * Rufus: The Reliable USB Formatting Utility + * CPU features detection + * Copyright © 2022 Pete Batard + * Copyright © 2022 Jeffrey Walton + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Primarily added to support SHA instructions on x86 machines. + * SHA acceleration is becoming as ubiquitous as AES acceleration. + * SHA support was introduced in Intel Goldmont architecture, like + * Celeron J3455 and Pentium J4205. The instructions are now present + * in AMD Ryzen 3 (Zen architecture) and above, and Intel Core + * 10th-gen processors (Ice Lake), 11th-gen processors (Rocket Lake) + * and above. + * + * Typical benchmarks for x86 SHA acceleration is about a 6x to 10x + * speedup over a C/C++ implementation. The rough measurements are + * 1.0 to 1.8 cpb for SHA-1, and 1.5 to 2.5 cpb for SHA-256. On a + * Celeron J3455, that's 1.1 GB/s for SHA-1 and 800 MB/s for SHA-256. + * On a 10th-gen Core i5, that's about 1.65 GB/s for SHA-1 and about + * 1.3 GB/s for SHA-256. + */ + +#include "rufus.h" + +#pragma once + +#ifdef _MSC_VER +#define RUFUS_MSC_VERSION (_MSC_VER) +#if (RUFUS_MSC_VERSION < 1900) +#error "Your compiler is too old to build this application" +#endif +#endif + +#if defined(__GNUC__) +#define RUFUS_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#if (RUFUS_GCC_VERSION < 40900) +#error "Your compiler is too old to build this application" +#endif +#endif + +#ifdef __INTEL_COMPILER +#define RUFUS_INTEL_VERSION (__INTEL_COMPILER) +#if (RUFUS_INTEL_VERSION < 1600) +#error "Your compiler is too old to build this application" +#endif +#endif + +#if defined(__clang__) +#define RUFUS_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#if (RUFUS_CLANG_VERSION < 30400) +#error "Your compiler is too old to build this application" +#endif +#endif + +#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || \ + defined(_X86_) || defined(__I86__) || defined(__x86_64__)) +#define CPU_X86_SHA1_ACCELERATION 1 +#define CPU_X86_SHA256_ACCELERATION 1 +#endif + +extern BOOL cpu_has_sha1_accel, cpu_has_sha256_accel; + +extern BOOL DetectSHA1Acceleration(void); +extern BOOL DetectSHA256Acceleration(void); diff --git a/src/rufus.c b/src/rufus.c index 0f9acad6..8dfa4a2a 100755 --- a/src/rufus.c +++ b/src/rufus.c @@ -47,6 +47,7 @@ #include "ui.h" #include "re.h" +#include "cpu.h" #include "vhd.h" #include "wue.h" #include "drive.h" @@ -3688,6 +3689,10 @@ skip_args_processing: uprintf("Failed to enable AutoMount"); } + // Detect CPU acceleration for SHA-1/SHA-256 + cpu_has_sha1_accel = DetectSHA1Acceleration(); + cpu_has_sha256_accel = DetectSHA256Acceleration(); + relaunch: ubprintf("Localization set to '%s'", selected_locale->txt[0]); right_to_left_mode = ((selected_locale->ctrl_id) & LOC_RIGHT_TO_LEFT); @@ -3774,7 +3779,8 @@ relaunch: SendMessage(hMainDialog, WM_COMMAND, IDC_LOG, 0); continue; } -#if defined(_DEBUG) || defined(TEST) +#if defined(_DEBUG) || defined(TEST) || defined(ALPHA) +extern int TestChecksum(void); // Ctrl-T => Alternate Test mode that doesn't require a full rebuild if ((ctrl_without_focus || ((GetKeyState(VK_CONTROL) & 0x8000) && (msg.message == WM_KEYDOWN))) && (msg.wParam == 'T')) { diff --git a/src/rufus.rc b/src/rufus.rc index 823ddac0..024049a8 100644 --- a/src/rufus.rc +++ b/src/rufus.rc @@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL IDD_DIALOG DIALOGEX 12, 12, 232, 326 STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_ACCEPTFILES -CAPTION "Rufus 3.21.1949" +CAPTION "Rufus 3.21.1950" FONT 9, "Segoe UI Symbol", 400, 0, 0x0 BEGIN LTEXT "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP @@ -396,8 +396,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 3,21,1949,0 - PRODUCTVERSION 3,21,1949,0 + FILEVERSION 3,21,1950,0 + PRODUCTVERSION 3,21,1950,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -415,13 +415,13 @@ BEGIN VALUE "Comments", "https://rufus.ie" VALUE "CompanyName", "Akeo Consulting" VALUE "FileDescription", "Rufus" - VALUE "FileVersion", "3.21.1949" + VALUE "FileVersion", "3.21.1950" VALUE "InternalName", "Rufus" VALUE "LegalCopyright", "© 2011-2022 Pete Batard (GPL v3)" VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html" VALUE "OriginalFilename", "rufus-3.21.exe" VALUE "ProductName", "Rufus" - VALUE "ProductVersion", "3.21.1949" + VALUE "ProductVersion", "3.21.1950" END END BLOCK "VarFileInfo"