1
1
Fork 0
mirror of https://github.com/pbatard/rufus.git synced 2024-08-14 23:57:05 +00:00

Compare commits

...

3 commits

Author SHA1 Message Date
Rose
fab095c043
[misc] prefer strtol/strtoull over atol/atoll
* This means we don't need to worry about conversion issues regarding signedess. In addition,
  the behavior will no longer be undefined if for some reason the conversion cannot happen.
* Closes #2104.
2022-12-08 11:21:39 +00:00
dependabot[bot]
2bc1428975
[misc] bump dessant/lock-threads from 3 to 4
* Bumps [dessant/lock-threads](https://github.com/dessant/lock-threads) from 3 to 4.
  - [Release notes](https://github.com/dessant/lock-threads/releases)
  - [Changelog](https://github.com/dessant/lock-threads/blob/master/CHANGELOG.md)
  - [Commits](https://github.com/dessant/lock-threads/compare/v3...v4)
* Closes #2102
* Also bump Rufus version to Rufus next
2022-12-08 11:18:47 +00:00
Jeffrey Walton
36f4716afd
[checksum] enable x86 acceleration if the CPU supports it
* Newer Intel and AMD CPUs have SSE extensions for SHA-1 and SHA-256 acceleration.
* Add new cpu.c/cpu.h sources to detect the extensions, and use them in checksum.c
  if available.
* Acceleration code is taken from https://github.com/noloader/SHA-Intrinsics.
2022-12-08 11:15:48 +00:00
13 changed files with 787 additions and 43 deletions

View file

@ -9,7 +9,7 @@ jobs:
lock: lock:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: dessant/lock-threads@v3 - uses: dessant/lock-threads@v4
with: with:
github-token: ${{ github.token }} github-token: ${{ github.token }}
issue-inactive-days: '90' issue-inactive-days: '90'

View file

@ -349,6 +349,7 @@
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\src\badblocks.c" /> <ClCompile Include="..\src\badblocks.c" />
<ClCompile Include="..\src\cpu.c" />
<ClCompile Include="..\src\dos_locale.c" /> <ClCompile Include="..\src\dos_locale.c" />
<ClCompile Include="..\src\drive.c" /> <ClCompile Include="..\src\drive.c" />
<ClCompile Include="..\src\format.c" /> <ClCompile Include="..\src\format.c" />
@ -380,6 +381,7 @@
<ClInclude Include="..\res\grub\grub_version.h" /> <ClInclude Include="..\res\grub\grub_version.h" />
<ClInclude Include="..\src\badblocks.h" /> <ClInclude Include="..\src\badblocks.h" />
<ClInclude Include="..\src\bled\bled.h" /> <ClInclude Include="..\src\bled\bled.h" />
<ClInclude Include="..\src\cpu.h" />
<ClInclude Include="..\src\drive.h" /> <ClInclude Include="..\src\drive.h" />
<ClInclude Include="..\src\format.h" /> <ClInclude Include="..\src\format.h" />
<ClInclude Include="..\src\gpt_types.h" /> <ClInclude Include="..\src\gpt_types.h" />

View file

@ -93,6 +93,9 @@
<ClCompile Include="..\src\wue.c"> <ClCompile Include="..\src\wue.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\src\cpu.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\src\rufus.h"> <ClInclude Include="..\src\rufus.h">
@ -191,6 +194,9 @@
<ClInclude Include="..\src\vhd.h"> <ClInclude Include="..\src\vhd.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\cpu.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="..\res\rufus.ico"> <None Include="..\res\rufus.ico">

20
configure vendored
View file

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for rufus 3.21. # Generated by GNU Autoconf 2.71 for rufus 3.22.
# #
# Report bugs to <https://github.com/pbatard/rufus/issues>. # Report bugs to <https://github.com/pbatard/rufus/issues>.
# #
@ -611,8 +611,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='rufus' PACKAGE_NAME='rufus'
PACKAGE_TARNAME='rufus' PACKAGE_TARNAME='rufus'
PACKAGE_VERSION='3.21' PACKAGE_VERSION='3.22'
PACKAGE_STRING='rufus 3.21' PACKAGE_STRING='rufus 3.22'
PACKAGE_BUGREPORT='https://github.com/pbatard/rufus/issues' PACKAGE_BUGREPORT='https://github.com/pbatard/rufus/issues'
PACKAGE_URL='https://rufus.ie' PACKAGE_URL='https://rufus.ie'
@ -1269,7 +1269,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures rufus 3.21 to adapt to many kinds of systems. \`configure' configures rufus 3.22 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1336,7 +1336,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of rufus 3.21:";; short | recursive ) echo "Configuration of rufus 3.22:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1428,7 +1428,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
rufus configure 3.21 rufus configure 3.22
generated by GNU Autoconf 2.71 generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc. Copyright (C) 2021 Free Software Foundation, Inc.
@ -1504,7 +1504,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by rufus $as_me 3.21, which was It was created by rufus $as_me 3.22, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw $ $0$ac_configure_args_raw
@ -2768,7 +2768,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='rufus' PACKAGE='rufus'
VERSION='3.21' VERSION='3.22'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@ -5310,7 +5310,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by rufus $as_me 3.21, which was This file was extended by rufus $as_me 3.22, which was
generated by GNU Autoconf 2.71. Invocation command line was generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -5366,7 +5366,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped' ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\ ac_cs_version="\\
rufus config.status 3.21 rufus config.status 3.22
configured by $0, generated by GNU Autoconf 2.71, configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View file

@ -1,4 +1,4 @@
AC_INIT([rufus], [3.21], [https://github.com/pbatard/rufus/issues], [rufus], [https://rufus.ie]) AC_INIT([rufus], [3.22], [https://github.com/pbatard/rufus/issues], [rufus], [https://rufus.ie])
AM_INIT_AUTOMAKE([-Wno-portability foreign no-dist no-dependencies]) AM_INIT_AUTOMAKE([-Wno-portability foreign no-dist no-dependencies])
AC_CONFIG_SRCDIR([src/rufus.c]) AC_CONFIG_SRCDIR([src/rufus.c])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])

View file

@ -14,8 +14,8 @@ AM_V_WINDRES = $(AM_V_WINDRES_$(V))
%_rc.o: %.rc ../res/loc/embedded.loc %_rc.o: %.rc ../res/loc/embedded.loc
$(AM_V_WINDRES) $(AM_RCFLAGS) -i $< -o $@ $(AM_V_WINDRES) $(AM_RCFLAGS) -i $< -o $@
rufus_SOURCES = badblocks.c checksum.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c localization.c \ rufus_SOURCES = badblocks.c checksum.c cpu.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c \
net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c
rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \ rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \
-DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus -DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus
rufus_LDFLAGS = $(AM_LDFLAGS) -mwindows -L ../.mingw rufus_LDFLAGS = $(AM_LDFLAGS) -mwindows -L ../.mingw

View file

@ -88,7 +88,7 @@ CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES = CONFIG_CLEAN_VPATH_FILES =
PROGRAMS = $(noinst_PROGRAMS) PROGRAMS = $(noinst_PROGRAMS)
am_rufus_OBJECTS = rufus-badblocks.$(OBJEXT) rufus-checksum.$(OBJEXT) \ am_rufus_OBJECTS = rufus-badblocks.$(OBJEXT) rufus-checksum.$(OBJEXT) \
rufus-dev.$(OBJEXT) rufus-dos.$(OBJEXT) \ rufus-cpu.$(OBJEXT) rufus-dev.$(OBJEXT) rufus-dos.$(OBJEXT) \
rufus-dos_locale.$(OBJEXT) rufus-drive.$(OBJEXT) \ rufus-dos_locale.$(OBJEXT) rufus-drive.$(OBJEXT) \
rufus-format.$(OBJEXT) rufus-format_ext.$(OBJEXT) \ rufus-format.$(OBJEXT) rufus-format_ext.$(OBJEXT) \
rufus-format_fat32.$(OBJEXT) rufus-icon.$(OBJEXT) \ rufus-format_fat32.$(OBJEXT) rufus-icon.$(OBJEXT) \
@ -281,8 +281,8 @@ AM_V_WINDRES_0 = @echo " RC $@";$(WINDRES)
AM_V_WINDRES_1 = $(WINDRES) AM_V_WINDRES_1 = $(WINDRES)
AM_V_WINDRES_ = $(AM_V_WINDRES_$(AM_DEFAULT_VERBOSITY)) AM_V_WINDRES_ = $(AM_V_WINDRES_$(AM_DEFAULT_VERBOSITY))
AM_V_WINDRES = $(AM_V_WINDRES_$(V)) AM_V_WINDRES = $(AM_V_WINDRES_$(V))
rufus_SOURCES = badblocks.c checksum.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c localization.c \ rufus_SOURCES = badblocks.c checksum.c cpu.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c \
net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c
rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \ rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \
-DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus -DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus
@ -357,6 +357,12 @@ rufus-checksum.o: checksum.c
rufus-checksum.obj: checksum.c rufus-checksum.obj: checksum.c
$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-checksum.obj `if test -f 'checksum.c'; then $(CYGPATH_W) 'checksum.c'; else $(CYGPATH_W) '$(srcdir)/checksum.c'; fi` $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-checksum.obj `if test -f 'checksum.c'; then $(CYGPATH_W) 'checksum.c'; else $(CYGPATH_W) '$(srcdir)/checksum.c'; fi`
rufus-cpu.o: cpu.c
$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-cpu.o `test -f 'cpu.c' || echo '$(srcdir)/'`cpu.c
rufus-cpu.obj: cpu.c
$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-cpu.obj `if test -f 'cpu.c'; then $(CYGPATH_W) 'cpu.c'; else $(CYGPATH_W) '$(srcdir)/cpu.c'; fi`
rufus-dev.o: dev.c rufus-dev.o: dev.c
$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-dev.o `test -f 'dev.c' || echo '$(srcdir)/'`dev.c $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-dev.o `test -f 'dev.c' || echo '$(srcdir)/'`dev.c

View file

@ -6,6 +6,7 @@
* Copyright © 2004 g10 Code GmbH * Copyright © 2004 g10 Code GmbH
* Copyright © 2002-2015 Wei Dai & Igor Pavlov * Copyright © 2002-2015 Wei Dai & Igor Pavlov
* Copyright © 2015-2021 Pete Batard <pete@akeo.ie> * Copyright © 2015-2021 Pete Batard <pete@akeo.ie>
* Copyright © 2022 Jeffrey Walton <noloader@gmail.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -28,6 +29,8 @@
* *
* SHA-512 modified from LibTomCrypt - Public Domain * SHA-512 modified from LibTomCrypt - Public Domain
* *
* CPU accelerated SHA code taken from SHA-Intrinsics - Public Domain
*
* MD5 code from various public domain sources sharing the following * MD5 code from various public domain sources sharing the following
* copyright declaration: * copyright declaration:
* *
@ -61,6 +64,7 @@
#include <windowsx.h> #include <windowsx.h>
#include "db.h" #include "db.h"
#include "cpu.h"
#include "rufus.h" #include "rufus.h"
#include "winio.h" #include "winio.h"
#include "missing.h" #include "missing.h"
@ -68,6 +72,22 @@
#include "msapi_utf8.h" #include "msapi_utf8.h"
#include "localization.h" #include "localization.h"
/* Includes for SHA-1 and SHA-256 intrinsics */
#if defined(CPU_X86_SHA1_ACCELERATION) || defined(CPU_X86_SHA256_ACCELERATION)
#if defined(_MSC_VER)
#include <immintrin.h>
#elif defined(__GNUC__)
#include <stdint.h>
#include <x86intrin.h>
#endif
#endif
#if defined(_MSC_VER)
#define RUFUS_ENABLE_GCC_ARCH(arch)
#else
#define RUFUS_ENABLE_GCC_ARCH(arch) __attribute__ ((target (arch)))
#endif
#undef BIG_ENDIAN_HOST #undef BIG_ENDIAN_HOST
#define BUFFER_SIZE (64*KB) #define BUFFER_SIZE (64*KB)
@ -208,7 +228,7 @@ static void sha512_init(SUM_CONTEXT* ctx)
} }
/* Transform the message X which consists of 16 32-bit-words (SHA-1) */ /* Transform the message X which consists of 16 32-bit-words (SHA-1) */
static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) static void sha1_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data)
{ {
uint32_t a, b, c, d, e, tm, x[16]; uint32_t a, b, c, d, e, tm, x[16];
@ -341,8 +361,235 @@ static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data)
ctx->state[4] += e; ctx->state[4] += e;
} }
#ifdef CPU_X86_SHA1_ACCELERATION
/*
* Transform the message X which consists of 16 32-bit-words (SHA-1)
* The code is public domain taken from https://github.com/noloader/SHA-Intrinsics.
*/
RUFUS_ENABLE_GCC_ARCH("ssse3,sse4.1,sha")
static void sha1_transform_x86(uint64_t state64[5], const uint8_t *data, size_t length)
{
__m128i ABCD, E0, E1;
__m128i MSG0, MSG1, MSG2, MSG3;
const __m128i MASK = _mm_set_epi64x(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
/* Rufus uses uint64_t for the state array. Pack it into uint32_t. */
uint32_t state[5] = {
(uint32_t)state64[0],
(uint32_t)state64[1],
(uint32_t)state64[2],
(uint32_t)state64[3],
(uint32_t)state64[4]
};
/* Load initial values */
ABCD = _mm_loadu_si128((const __m128i*) state);
E0 = _mm_set_epi32(state[4], 0, 0, 0);
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
while (length >= SHA1_BLOCKSIZE)
{
/* Save current state */
const __m128i ABCD_SAVE = ABCD;
const __m128i E0_SAVE = E0;
/* Rounds 0-3 */
MSG0 = _mm_loadu_si128((const __m128i*)(data + 0));
MSG0 = _mm_shuffle_epi8(MSG0, MASK);
E0 = _mm_add_epi32(E0, MSG0);
E1 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
/* Rounds 4-7 */
MSG1 = _mm_loadu_si128((const __m128i*)(data + 16));
MSG1 = _mm_shuffle_epi8(MSG1, MASK);
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
/* Rounds 8-11 */
MSG2 = _mm_loadu_si128((const __m128i*)(data + 32));
MSG2 = _mm_shuffle_epi8(MSG2, MASK);
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);
/* Rounds 12-15 */
MSG3 = _mm_loadu_si128((const __m128i*)(data + 48));
MSG3 = _mm_shuffle_epi8(MSG3, MASK);
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);
/* Rounds 16-19 */
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);
/* Rounds 20-23 */
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
MSG3 = _mm_xor_si128(MSG3, MSG1);
/* Rounds 24-27 */
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);
/* Rounds 28-31 */
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);
/* Rounds 32-35 */
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);
/* Rounds 36-39 */
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
MSG3 = _mm_xor_si128(MSG3, MSG1);
/* Rounds 40-43 */
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);
/* Rounds 44-47 */
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);
/* Rounds 48-51 */
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);
/* Rounds 52-55 */
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
MSG3 = _mm_xor_si128(MSG3, MSG1);
/* Rounds 56-59 */
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
MSG0 = _mm_xor_si128(MSG0, MSG2);
/* Rounds 60-63 */
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
MSG1 = _mm_xor_si128(MSG1, MSG3);
/* Rounds 64-67 */
E0 = _mm_sha1nexte_epu32(E0, MSG0);
E1 = ABCD;
MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
MSG2 = _mm_xor_si128(MSG2, MSG0);
/* Rounds 68-71 */
E1 = _mm_sha1nexte_epu32(E1, MSG1);
E0 = ABCD;
MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
MSG3 = _mm_xor_si128(MSG3, MSG1);
/* Rounds 72-75 */
E0 = _mm_sha1nexte_epu32(E0, MSG2);
E1 = ABCD;
MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
/* Rounds 76-79 */
E1 = _mm_sha1nexte_epu32(E1, MSG3);
E0 = ABCD;
ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
/* Combine state */
E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
data += 64;
length -= 64;
}
/* Save state */
ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
_mm_storeu_si128((__m128i*) state, ABCD);
state[4] = _mm_extract_epi32(E0, 3);
/* Repack into uint64_t. */
state64[0] = state[0];
state64[1] = state[1];
state64[2] = state[2];
state64[3] = state[3];
state64[4] = state[4];
}
#endif /* CPU_X86_SHA1_ACCELERATION */
/* Transform the message X which consists of 16 32-bit-words (SHA-1) */
static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data)
{
#ifdef CPU_X86_SHA1_ACCELERATION
if (cpu_has_sha1_accel)
{
/* SHA-1 acceleration using intrinsics */
sha1_transform_x86(ctx->state, data, SHA1_BLOCKSIZE);
}
else
#endif
{
/* Portable C/C++ implementation */
sha1_transform_cc(ctx, data);
}
}
/* Transform the message X which consists of 16 32-bit-words (SHA-256) */ /* Transform the message X which consists of 16 32-bit-words (SHA-256) */
static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) static __inline void sha256_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data)
{ {
uint32_t a, b, c, d, e, f, g, h, j, x[16]; uint32_t a, b, c, d, e, f, g, h, j, x[16];
@ -415,6 +662,243 @@ static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data)
ctx->state[7] += h; ctx->state[7] += h;
} }
#ifdef CPU_X86_SHA256_ACCELERATION
/*
* Transform the message X which consists of 16 32-bit-words (SHA-256)
* The code is public domain taken from https://github.com/noloader/SHA-Intrinsics.
*/
RUFUS_ENABLE_GCC_ARCH("ssse3,sse4.1,sha")
static __inline void sha256_transform_x86(uint64_t state64[8], const uint8_t *data, size_t length)
{
__m128i STATE0, STATE1;
__m128i MSG, TMP;
__m128i MSG0, MSG1, MSG2, MSG3;
const __m128i MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL);
/* Rufus uses uint64_t for the state array. Pack it into uint32_t. */
uint32_t state[8] = {
(uint32_t)state64[0],
(uint32_t)state64[1],
(uint32_t)state64[2],
(uint32_t)state64[3],
(uint32_t)state64[4],
(uint32_t)state64[5],
(uint32_t)state64[6],
(uint32_t)state64[7]
};
/* Load initial values */
TMP = _mm_loadu_si128((const __m128i*) (state+0));
STATE1 = _mm_loadu_si128((const __m128i*) (state+4));
TMP = _mm_shuffle_epi32(TMP, 0xB1); /* CDAB */
STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); /* EFGH */
STATE0 = _mm_alignr_epi8(TMP, STATE1, 8); /* ABEF */
STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); /* CDGH */
while (length >= SHA256_BLOCKSIZE)
{
/* Save current state */
const __m128i ABEF_SAVE = STATE0;
const __m128i CDGH_SAVE = STATE1;
/* Rounds 0-3 */
MSG = _mm_loadu_si128((const __m128i*) (data+0));
MSG0 = _mm_shuffle_epi8(MSG, MASK);
MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0xE9B5DBA5B5C0FBCFULL, 0x71374491428A2F98ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
/* Rounds 4-7 */
MSG1 = _mm_loadu_si128((const __m128i*) (data+16));
MSG1 = _mm_shuffle_epi8(MSG1, MASK);
MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0xAB1C5ED5923F82A4ULL, 0x59F111F13956C25BULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);
/* Rounds 8-11 */
MSG2 = _mm_loadu_si128((const __m128i*) (data+32));
MSG2 = _mm_shuffle_epi8(MSG2, MASK);
MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x550C7DC3243185BEULL, 0x12835B01D807AA98ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);
/* Rounds 12-15 */
MSG3 = _mm_loadu_si128((const __m128i*) (data+48));
MSG3 = _mm_shuffle_epi8(MSG3, MASK);
MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC19BF1749BDC06A7ULL, 0x80DEB1FE72BE5D74ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
MSG0 = _mm_add_epi32(MSG0, TMP);
MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);
/* Rounds 16-19 */
MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x240CA1CC0FC19DC6ULL, 0xEFBE4786E49B69C1ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
MSG1 = _mm_add_epi32(MSG1, TMP);
MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);
/* Rounds 20-23 */
MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x76F988DA5CB0A9DCULL, 0x4A7484AA2DE92C6FULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
MSG2 = _mm_add_epi32(MSG2, TMP);
MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);
/* Rounds 24-27 */
MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xBF597FC7B00327C8ULL, 0xA831C66D983E5152ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
MSG3 = _mm_add_epi32(MSG3, TMP);
MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);
/* Rounds 28-31 */
MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x1429296706CA6351ULL, 0xD5A79147C6E00BF3ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
MSG0 = _mm_add_epi32(MSG0, TMP);
MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);
/* Rounds 32-35 */
MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x53380D134D2C6DFCULL, 0x2E1B213827B70A85ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
MSG1 = _mm_add_epi32(MSG1, TMP);
MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);
/* Rounds 36-39 */
MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x92722C8581C2C92EULL, 0x766A0ABB650A7354ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
MSG2 = _mm_add_epi32(MSG2, TMP);
MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);
/* Rounds 40-43 */
MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xC76C51A3C24B8B70ULL, 0xA81A664BA2BFE8A1ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
MSG3 = _mm_add_epi32(MSG3, TMP);
MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);
/* Rounds 44-47 */
MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x106AA070F40E3585ULL, 0xD6990624D192E819ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
MSG0 = _mm_add_epi32(MSG0, TMP);
MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);
/* Rounds 48-51 */
MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x34B0BCB52748774CULL, 0x1E376C0819A4C116ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
MSG1 = _mm_add_epi32(MSG1, TMP);
MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);
/* Rounds 52-55 */
MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x682E6FF35B9CCA4FULL, 0x4ED8AA4A391C0CB3ULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
MSG2 = _mm_add_epi32(MSG2, TMP);
MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
/* Rounds 56-59 */
MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x8CC7020884C87814ULL, 0x78A5636F748F82EEULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
MSG3 = _mm_add_epi32(MSG3, TMP);
MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
/* Rounds 60-63 */
MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC67178F2BEF9A3F7ULL, 0xA4506CEB90BEFFFAULL));
STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
MSG = _mm_shuffle_epi32(MSG, 0x0E);
STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
/* Combine state */
STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
data += 64;
length -= 64;
}
TMP = _mm_shuffle_epi32(STATE0, 0x1B); /* FEBA */
STATE1 = _mm_shuffle_epi32(STATE1, 0xB1); /* DCHG */
STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); /* DCBA */
STATE1 = _mm_alignr_epi8(STATE1, TMP, 8); /* ABEF */
/* Save state */
_mm_storeu_si128((__m128i*) (state+0), STATE0);
_mm_storeu_si128((__m128i*) (state+4), STATE1);
/* Repack into uint64_t. */
state64[0] = state[0];
state64[1] = state[1];
state64[2] = state[2];
state64[3] = state[3];
state64[4] = state[4];
state64[5] = state[5];
state64[6] = state[6];
state64[7] = state[7];
}
#endif /* CPU_X86_SHA256_ACCELERATION */
static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data)
{
#ifdef CPU_X86_SHA256_ACCELERATION
if (cpu_has_sha256_accel)
{
/* SHA-256 acceleration using intrinsics */
sha256_transform_x86(ctx->state, data, SHA256_BLOCKSIZE);
}
else
#endif
{
/* Portable C/C++ implementation */
sha256_transform_cc(ctx, data);
}
}
/* /*
* Transform the message X which consists of 16 64-bit-words (SHA-512) * Transform the message X which consists of 16 64-bit-words (SHA-512)
* This is an algorithm that *REALLY* benefits from being executed as 64-bit * This is an algorithm that *REALLY* benefits from being executed as 64-bit
@ -618,12 +1102,29 @@ static void sha1_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len)
len -= num; len -= num;
} }
/* Process data in blocksize chunks */ #ifdef CPU_X86_SHA1_ACCELERATION
while (len >= SHA1_BLOCKSIZE) { if (cpu_has_sha1_accel)
PREFETCH64(buf + SHA1_BLOCKSIZE); {
sha1_transform(ctx, buf); /* Process all full blocks at once */
buf += SHA1_BLOCKSIZE; if (len >= SHA1_BLOCKSIZE) {
len -= SHA1_BLOCKSIZE; /* Calculate full blocks, in bytes */
num = (len / SHA1_BLOCKSIZE) * SHA1_BLOCKSIZE;
/* SHA-1 acceleration using intrinsics */
sha1_transform_x86(ctx->state, buf, num);
buf += num;
len -= num;
}
}
else
#endif
{
/* Process data in blocksize chunks */
while (len >= SHA1_BLOCKSIZE) {
PREFETCH64(buf + SHA1_BLOCKSIZE);
sha1_transform(ctx, buf);
buf += SHA1_BLOCKSIZE;
len -= SHA1_BLOCKSIZE;
}
} }
/* Handle any remaining bytes of data. */ /* Handle any remaining bytes of data. */
@ -653,12 +1154,29 @@ static void sha256_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len)
len -= num; len -= num;
} }
/* Process data in blocksize chunks */ #ifdef CPU_X86_SHA256_ACCELERATION
while (len >= SHA256_BLOCKSIZE) { if (cpu_has_sha256_accel)
PREFETCH64(buf + SHA256_BLOCKSIZE); {
sha256_transform(ctx, buf); /* Process all full blocks at once */
buf += SHA256_BLOCKSIZE; if (len >= SHA256_BLOCKSIZE) {
len -= SHA256_BLOCKSIZE; /* Calculate full blocks, in bytes */
num = (len / SHA256_BLOCKSIZE) * SHA256_BLOCKSIZE;
/* SHA-256 acceleration using intrinsics */
sha256_transform_x86(ctx->state, buf, num);
buf += num;
len -= num;
}
}
else
#endif
{
/* Process data in blocksize chunks */
while (len >= SHA256_BLOCKSIZE) {
PREFETCH64(buf + SHA256_BLOCKSIZE);
sha256_transform(ctx, buf);
buf += SHA256_BLOCKSIZE;
len -= SHA256_BLOCKSIZE;
}
} }
/* Handle any remaining bytes of data. */ /* Handle any remaining bytes of data. */
@ -1280,7 +1798,7 @@ BOOL IsFileInDB(const char* path)
return FALSE; return FALSE;
} }
#if defined(_DEBUG) #if defined(_DEBUG) || defined(TEST) || defined(ALPHA)
/* Convert a lowercase hex string to binary. Returned value must be freed */ /* Convert a lowercase hex string to binary. Returned value must be freed */
uint8_t* to_bin(const char* str) uint8_t* to_bin(const char* str)
{ {
@ -1303,7 +1821,7 @@ uint8_t* to_bin(const char* str)
return ret; return ret;
} }
const char* test_msg = "Did you ever hear the tragedy of Darth Plagueis The Wise? " const char test_msg[] = "Did you ever hear the tragedy of Darth Plagueis The Wise? "
"I thought not. It's not a story the Jedi would tell you. It's a Sith legend. " "I thought not. It's not a story the Jedi would tell you. It's a Sith legend. "
"Darth Plagueis was a Dark Lord of the Sith, so powerful and so wise he could " "Darth Plagueis was a Dark Lord of the Sith, so powerful and so wise he could "
"use the Force to influence the midichlorians to create life... He had such a " "use the Force to influence the midichlorians to create life... He had such a "
@ -1356,6 +1874,10 @@ int TestChecksum(void)
if (msg == NULL) if (msg == NULL)
return -1; return -1;
/* Display accelerations available */
uprintf("SHA1 acceleration: %s", (cpu_has_sha1_accel ? "TRUE" : "FALSE"));
uprintf("SHA256 acceleration: %s", (cpu_has_sha256_accel ? "TRUE" : "FALSE"));
for (j = 0; j < CHECKSUM_MAX; j++) { for (j = 0; j < CHECKSUM_MAX; j++) {
size_t copy_msg_len[4]; size_t copy_msg_len[4];
copy_msg_len[0] = 0; copy_msg_len[0] = 0;

123
src/cpu.c Normal file
View file

@ -0,0 +1,123 @@
/*
* Rufus: The Reliable USB Formatting Utility
* CPU features detection
* Copyright © 2022 Pete Batard <pete@akeo.ie>
* Copyright © 2022 Jeffrey Walton <noloader@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cpu.h"
#if (defined(CPU_X86_SHA1_ACCELERATION) || defined(CPU_X86_SHA256_ACCELERATION))
#if defined(RUFUS_MSC_VERSION)
#include <intrin.h>
#elif (defined(RUFUS_GCC_VERSION) || defined(RUFUS_CLANG_VERSION))
#include <x86Intrin.h>
#elif defined(RUFUS_INTEL_VERSION)
#include <immintrin.h>
#endif
#endif
BOOL cpu_has_sha1_accel = FALSE;
BOOL cpu_has_sha256_accel = FALSE;
/*
* Three elements must be in place to make a meaningful call to the
* DetectSHA###Acceleration() calls. First, the compiler must support
* the underlying intrinsics. Second, the platform must provide a
* cpuid() function. And third, the cpu must actually support the SHA-1
* and SHA-256 instructions.
*
* If any of the conditions are not met, then DetectSHA###Acceleration()
* returns FALSE.
*/
/*
* Detect if the processor supports SHA-1 acceleration. We only check for
* the three ISAs we need - SSSE3, SSE4.1 and SHA. We don't check for OS
* support or XSAVE because that's been enabled since Windows 2000.
*/
BOOL DetectSHA1Acceleration(void)
{
#if defined(CPU_X86_SHA1_ACCELERATION)
#if defined(_MSC_VER)
uint32_t regs0[4] = {0,0,0,0}, regs1[4] = {0,0,0,0}, regs7[4] = {0,0,0,0};
const uint32_t SSSE3_BIT = 1u << 9; /* Function 1, Bit 9 of ECX */
const uint32_t SSE41_BIT = 1u << 19; /* Function 1, Bit 19 of ECX */
const uint32_t SHA_BIT = 1u << 29; /* Function 7, Bit 29 of EBX */
__cpuid(regs0, 0);
const uint32_t highest = regs0[0]; /*EAX*/
if (highest >= 0x01) {
__cpuidex(regs1, 1, 0);
}
if (highest >= 0x07) {
__cpuidex(regs7, 7, 0);
}
return (regs1[2] /*ECX*/ & SSSE3_BIT) && (regs1[2] /*ECX*/ & SSE41_BIT) && (regs7[1] /*EBX*/ & SHA_BIT) ? TRUE : FALSE;
#elif defined(__GNUC__) || defined(__clang__)
/* __builtin_cpu_supports available in GCC 4.8.1 and above */
return __builtin_cpu_supports("ssse3") && __builtin_cpu_supports("sse4.1") && __builtin_cpu_supports("sha") ? TRUE : FALSE;
#elif defined(__INTEL_COMPILER)
/* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_may_i_use_cpu_feature */
return _may_i_use_cpu_feature(_FEATURE_SSSE3|_FEATURE_SSE4_1|_FEATURE_SHA) ? TRUE : FALSE;
#else
return FALSE;
#endif
#else
return FALSE;
#endif
}
/*
* Detect if the processor supports SHA-256 acceleration. We only check for
* the three ISAs we need - SSSE3, SSE4.1 and SHA. We don't check for OS
* support or XSAVE because that's been enabled since Windows 2000.
*/
BOOL DetectSHA256Acceleration(void)
{
#if defined(CPU_X86_SHA256_ACCELERATION)
#if defined(_MSC_VER)
uint32_t regs0[4] = {0,0,0,0}, regs1[4] = {0,0,0,0}, regs7[4] = {0,0,0,0};
const uint32_t SSSE3_BIT = 1u << 9; /* Function 1, Bit 9 of ECX */
const uint32_t SSE41_BIT = 1u << 19; /* Function 1, Bit 19 of ECX */
const uint32_t SHA_BIT = 1u << 29; /* Function 7, Bit 29 of EBX */
__cpuid(regs0, 0);
const uint32_t highest = regs0[0]; /*EAX*/
if (highest >= 0x01) {
__cpuidex(regs1, 1, 0);
}
if (highest >= 0x07) {
__cpuidex(regs7, 7, 0);
}
return (regs1[2] /*ECX*/ & SSSE3_BIT) && (regs1[2] /*ECX*/ & SSE41_BIT) && (regs7[1] /*EBX*/ & SHA_BIT) ? TRUE : FALSE;
#elif defined(__GNUC__) || defined(__clang__)
/* __builtin_cpu_supports available in GCC 4.8.1 and above */
return __builtin_cpu_supports("ssse3") && __builtin_cpu_supports("sse4.1") && __builtin_cpu_supports("sha") ? TRUE : FALSE;
#elif defined(__INTEL_COMPILER)
/* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_may_i_use_cpu_feature */
return _may_i_use_cpu_feature(_FEATURE_SSSE3|_FEATURE_SSE4_1|_FEATURE_SHA) ? TRUE : FALSE;
#else
return FALSE;
#endif
#else
return FALSE;
#endif
}

79
src/cpu.h Normal file
View file

@ -0,0 +1,79 @@
/*
* Rufus: The Reliable USB Formatting Utility
* CPU features detection
* Copyright © 2022 Pete Batard <pete@akeo.ie>
* Copyright © 2022 Jeffrey Walton <noloader@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Primarily added to support SHA instructions on x86 machines.
* SHA acceleration is becoming as ubiquitous as AES acceleration.
* SHA support was introduced in Intel Goldmont architecture, like
* Celeron J3455 and Pentium J4205. The instructions are now present
* in AMD Ryzen 3 (Zen architecture) and above, and Intel Core
* 10th-gen processors (Ice Lake), 11th-gen processors (Rocket Lake)
* and above.
*
* Typical benchmarks for x86 SHA acceleration is about a 6x to 10x
* speedup over a C/C++ implementation. The rough measurements are
* 1.0 to 1.8 cpb for SHA-1, and 1.5 to 2.5 cpb for SHA-256. On a
* Celeron J3455, that's 1.1 GB/s for SHA-1 and 800 MB/s for SHA-256.
* On a 10th-gen Core i5, that's about 1.65 GB/s for SHA-1 and about
* 1.3 GB/s for SHA-256.
*/
#include "rufus.h"
#pragma once
#ifdef _MSC_VER
#define RUFUS_MSC_VERSION (_MSC_VER)
#if (RUFUS_MSC_VERSION < 1900)
#error "Your compiler is too old to build this application"
#endif
#endif
#if defined(__GNUC__)
#define RUFUS_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if (RUFUS_GCC_VERSION < 40900)
#error "Your compiler is too old to build this application"
#endif
#endif
#ifdef __INTEL_COMPILER
#define RUFUS_INTEL_VERSION (__INTEL_COMPILER)
#if (RUFUS_INTEL_VERSION < 1600)
#error "Your compiler is too old to build this application"
#endif
#endif
#if defined(__clang__)
#define RUFUS_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#if (RUFUS_CLANG_VERSION < 30400)
#error "Your compiler is too old to build this application"
#endif
#endif
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || \
defined(_X86_) || defined(__I86__) || defined(__x86_64__))
#define CPU_X86_SHA1_ACCELERATION 1
#define CPU_X86_SHA256_ACCELERATION 1
#endif
extern BOOL cpu_has_sha1_accel, cpu_has_sha256_accel;
extern BOOL DetectSHA1Acceleration(void);
extern BOOL DetectSHA256Acceleration(void);

View file

@ -428,7 +428,7 @@ uint64_t DownloadToFileOrBuffer(const char* url, const char* file, BYTE** buffer
uprintf("Unable to retrieve file length: %s", WinInetErrorString()); uprintf("Unable to retrieve file length: %s", WinInetErrorString());
goto out; goto out;
} }
total_size = (uint64_t)atoll(strsize); total_size = strtoull(strsize, NULL, 10);
if (hProgressDialog != NULL) { if (hProgressDialog != NULL) {
char msg[128]; char msg[128];
uprintf("File length: %s", SizeToHumanReadable(total_size, FALSE, FALSE)); uprintf("File length: %s", SizeToHumanReadable(total_size, FALSE, FALSE));

View file

@ -47,6 +47,7 @@
#include "ui.h" #include "ui.h"
#include "re.h" #include "re.h"
#include "cpu.h"
#include "vhd.h" #include "vhd.h"
#include "wue.h" #include "wue.h"
#include "drive.h" #include "drive.h"
@ -2478,7 +2479,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA
break; break;
} }
GetWindowTextA(GetDlgItem(hDlg, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp)); GetWindowTextA(GetDlgItem(hDlg, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp));
lPos = atol(tmp); lPos = strtol(tmp, NULL, 10);
persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)); persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS));
persistence_size = lPos * MB; persistence_size = lPos * MB;
for (i = 0; i < persistence_unit_selection; i++) for (i = 0; i < persistence_unit_selection; i++)
@ -2520,7 +2521,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA
if (ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)) == persistence_unit_selection) if (ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)) == persistence_unit_selection)
break; break;
GetWindowTextA(GetDlgItem(hMainDialog, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp)); GetWindowTextA(GetDlgItem(hMainDialog, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp));
persistence_size = atol(tmp) * MB; persistence_size = strtol(tmp, NULL, 10) * MB;
for (i = 0; i < persistence_unit_selection; i++) for (i = 0; i < persistence_unit_selection; i++)
persistence_size *= 1024; persistence_size *= 1024;
persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)); persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS));
@ -3688,6 +3689,10 @@ skip_args_processing:
uprintf("Failed to enable AutoMount"); uprintf("Failed to enable AutoMount");
} }
// Detect CPU acceleration for SHA-1/SHA-256
cpu_has_sha1_accel = DetectSHA1Acceleration();
cpu_has_sha256_accel = DetectSHA256Acceleration();
relaunch: relaunch:
ubprintf("Localization set to '%s'", selected_locale->txt[0]); ubprintf("Localization set to '%s'", selected_locale->txt[0]);
right_to_left_mode = ((selected_locale->ctrl_id) & LOC_RIGHT_TO_LEFT); right_to_left_mode = ((selected_locale->ctrl_id) & LOC_RIGHT_TO_LEFT);
@ -3774,7 +3779,8 @@ relaunch:
SendMessage(hMainDialog, WM_COMMAND, IDC_LOG, 0); SendMessage(hMainDialog, WM_COMMAND, IDC_LOG, 0);
continue; continue;
} }
#if defined(_DEBUG) || defined(TEST) #if defined(_DEBUG) || defined(TEST) || defined(ALPHA)
extern int TestChecksum(void);
// Ctrl-T => Alternate Test mode that doesn't require a full rebuild // Ctrl-T => Alternate Test mode that doesn't require a full rebuild
if ((ctrl_without_focus || ((GetKeyState(VK_CONTROL) & 0x8000) && (msg.message == WM_KEYDOWN))) if ((ctrl_without_focus || ((GetKeyState(VK_CONTROL) & 0x8000) && (msg.message == WM_KEYDOWN)))
&& (msg.wParam == 'T')) { && (msg.wParam == 'T')) {

View file

@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
IDD_DIALOG DIALOGEX 12, 12, 232, 326 IDD_DIALOG DIALOGEX 12, 12, 232, 326
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
EXSTYLE WS_EX_ACCEPTFILES EXSTYLE WS_EX_ACCEPTFILES
CAPTION "Rufus 3.21.1949" CAPTION "Rufus 3.22.1952"
FONT 9, "Segoe UI Symbol", 400, 0, 0x0 FONT 9, "Segoe UI Symbol", 400, 0, 0x0
BEGIN BEGIN
LTEXT "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP LTEXT "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP
@ -396,8 +396,8 @@ END
// //
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
FILEVERSION 3,21,1949,0 FILEVERSION 3,22,1952,0
PRODUCTVERSION 3,21,1949,0 PRODUCTVERSION 3,22,1952,0
FILEFLAGSMASK 0x3fL FILEFLAGSMASK 0x3fL
#ifdef _DEBUG #ifdef _DEBUG
FILEFLAGS 0x1L FILEFLAGS 0x1L
@ -415,13 +415,13 @@ BEGIN
VALUE "Comments", "https://rufus.ie" VALUE "Comments", "https://rufus.ie"
VALUE "CompanyName", "Akeo Consulting" VALUE "CompanyName", "Akeo Consulting"
VALUE "FileDescription", "Rufus" VALUE "FileDescription", "Rufus"
VALUE "FileVersion", "3.21.1949" VALUE "FileVersion", "3.22.1952"
VALUE "InternalName", "Rufus" VALUE "InternalName", "Rufus"
VALUE "LegalCopyright", "© 2011-2022 Pete Batard (GPL v3)" VALUE "LegalCopyright", "© 2011-2022 Pete Batard (GPL v3)"
VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html" VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html"
VALUE "OriginalFilename", "rufus-3.21.exe" VALUE "OriginalFilename", "rufus-3.22.exe"
VALUE "ProductName", "Rufus" VALUE "ProductName", "Rufus"
VALUE "ProductVersion", "3.21.1949" VALUE "ProductVersion", "3.22.1952"
END END
END END
BLOCK "VarFileInfo" BLOCK "VarFileInfo"