mirror of
				https://github.com/pbatard/rufus.git
				synced 2024-08-14 23:57:05 +00:00 
			
		
		
		
	Compare commits
	
		
			3 commits
		
	
	
		
			ddcbe8ed81
			...
			fab095c043
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | fab095c043 | ||
|  | 2bc1428975 | ||
|  | 36f4716afd | 
					 13 changed files with 787 additions and 43 deletions
				
			
		
							
								
								
									
										2
									
								
								.github/workflows/lock.yml
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/lock.yml
									
										
									
									
										vendored
									
									
								
							|  | @ -9,7 +9,7 @@ jobs: | ||||||
|   lock: |   lock: | ||||||
|     runs-on: ubuntu-latest |     runs-on: ubuntu-latest | ||||||
|     steps: |     steps: | ||||||
|       - uses: dessant/lock-threads@v3 |       - uses: dessant/lock-threads@v4 | ||||||
|         with: |         with: | ||||||
|           github-token: ${{ github.token }} |           github-token: ${{ github.token }} | ||||||
|           issue-inactive-days: '90' |           issue-inactive-days: '90' | ||||||
|  |  | ||||||
|  | @ -349,6 +349,7 @@ | ||||||
|   </ItemDefinitionGroup> |   </ItemDefinitionGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|     <ClCompile Include="..\src\badblocks.c" /> |     <ClCompile Include="..\src\badblocks.c" /> | ||||||
|  |     <ClCompile Include="..\src\cpu.c" /> | ||||||
|     <ClCompile Include="..\src\dos_locale.c" /> |     <ClCompile Include="..\src\dos_locale.c" /> | ||||||
|     <ClCompile Include="..\src\drive.c" /> |     <ClCompile Include="..\src\drive.c" /> | ||||||
|     <ClCompile Include="..\src\format.c" /> |     <ClCompile Include="..\src\format.c" /> | ||||||
|  | @ -380,6 +381,7 @@ | ||||||
|     <ClInclude Include="..\res\grub\grub_version.h" /> |     <ClInclude Include="..\res\grub\grub_version.h" /> | ||||||
|     <ClInclude Include="..\src\badblocks.h" /> |     <ClInclude Include="..\src\badblocks.h" /> | ||||||
|     <ClInclude Include="..\src\bled\bled.h" /> |     <ClInclude Include="..\src\bled\bled.h" /> | ||||||
|  |     <ClInclude Include="..\src\cpu.h" /> | ||||||
|     <ClInclude Include="..\src\drive.h" /> |     <ClInclude Include="..\src\drive.h" /> | ||||||
|     <ClInclude Include="..\src\format.h" /> |     <ClInclude Include="..\src\format.h" /> | ||||||
|     <ClInclude Include="..\src\gpt_types.h" /> |     <ClInclude Include="..\src\gpt_types.h" /> | ||||||
|  |  | ||||||
|  | @ -93,6 +93,9 @@ | ||||||
|     <ClCompile Include="..\src\wue.c"> |     <ClCompile Include="..\src\wue.c"> | ||||||
|       <Filter>Source Files</Filter> |       <Filter>Source Files</Filter> | ||||||
|     </ClCompile> |     </ClCompile> | ||||||
|  |     <ClCompile Include="..\src\cpu.c"> | ||||||
|  |       <Filter>Source Files</Filter> | ||||||
|  |     </ClCompile> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|     <ClInclude Include="..\src\rufus.h"> |     <ClInclude Include="..\src\rufus.h"> | ||||||
|  | @ -191,6 +194,9 @@ | ||||||
|     <ClInclude Include="..\src\vhd.h"> |     <ClInclude Include="..\src\vhd.h"> | ||||||
|       <Filter>Header Files</Filter> |       <Filter>Header Files</Filter> | ||||||
|     </ClInclude> |     </ClInclude> | ||||||
|  |     <ClInclude Include="..\src\cpu.h"> | ||||||
|  |       <Filter>Header Files</Filter> | ||||||
|  |     </ClInclude> | ||||||
|   </ItemGroup> |   </ItemGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|     <None Include="..\res\rufus.ico"> |     <None Include="..\res\rufus.ico"> | ||||||
|  |  | ||||||
							
								
								
									
										20
									
								
								configure
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								configure
									
										
									
									
										vendored
									
									
								
							|  | @ -1,6 +1,6 @@ | ||||||
| #! /bin/sh | #! /bin/sh | ||||||
| # Guess values for system-dependent variables and create Makefiles. | # Guess values for system-dependent variables and create Makefiles. | ||||||
| # Generated by GNU Autoconf 2.71 for rufus 3.21. | # Generated by GNU Autoconf 2.71 for rufus 3.22. | ||||||
| # | # | ||||||
| # Report bugs to <https://github.com/pbatard/rufus/issues>. | # Report bugs to <https://github.com/pbatard/rufus/issues>. | ||||||
| # | # | ||||||
|  | @ -611,8 +611,8 @@ MAKEFLAGS= | ||||||
| # Identity of this package. | # Identity of this package. | ||||||
| PACKAGE_NAME='rufus' | PACKAGE_NAME='rufus' | ||||||
| PACKAGE_TARNAME='rufus' | PACKAGE_TARNAME='rufus' | ||||||
| PACKAGE_VERSION='3.21' | PACKAGE_VERSION='3.22' | ||||||
| PACKAGE_STRING='rufus 3.21' | PACKAGE_STRING='rufus 3.22' | ||||||
| PACKAGE_BUGREPORT='https://github.com/pbatard/rufus/issues' | PACKAGE_BUGREPORT='https://github.com/pbatard/rufus/issues' | ||||||
| PACKAGE_URL='https://rufus.ie' | PACKAGE_URL='https://rufus.ie' | ||||||
| 
 | 
 | ||||||
|  | @ -1269,7 +1269,7 @@ if test "$ac_init_help" = "long"; then | ||||||
|   # Omit some internal or obsolete options to make the list less imposing. |   # Omit some internal or obsolete options to make the list less imposing. | ||||||
|   # This message is too long to be a string in the A/UX 3.1 sh. |   # This message is too long to be a string in the A/UX 3.1 sh. | ||||||
|   cat <<_ACEOF |   cat <<_ACEOF | ||||||
| \`configure' configures rufus 3.21 to adapt to many kinds of systems. | \`configure' configures rufus 3.22 to adapt to many kinds of systems. | ||||||
| 
 | 
 | ||||||
| Usage: $0 [OPTION]... [VAR=VALUE]... | Usage: $0 [OPTION]... [VAR=VALUE]... | ||||||
| 
 | 
 | ||||||
|  | @ -1336,7 +1336,7 @@ fi | ||||||
| 
 | 
 | ||||||
| if test -n "$ac_init_help"; then | if test -n "$ac_init_help"; then | ||||||
|   case $ac_init_help in |   case $ac_init_help in | ||||||
|      short | recursive ) echo "Configuration of rufus 3.21:";; |      short | recursive ) echo "Configuration of rufus 3.22:";; | ||||||
|    esac |    esac | ||||||
|   cat <<\_ACEOF |   cat <<\_ACEOF | ||||||
| 
 | 
 | ||||||
|  | @ -1428,7 +1428,7 @@ fi | ||||||
| test -n "$ac_init_help" && exit $ac_status | test -n "$ac_init_help" && exit $ac_status | ||||||
| if $ac_init_version; then | if $ac_init_version; then | ||||||
|   cat <<\_ACEOF |   cat <<\_ACEOF | ||||||
| rufus configure 3.21 | rufus configure 3.22 | ||||||
| generated by GNU Autoconf 2.71 | generated by GNU Autoconf 2.71 | ||||||
| 
 | 
 | ||||||
| Copyright (C) 2021 Free Software Foundation, Inc. | Copyright (C) 2021 Free Software Foundation, Inc. | ||||||
|  | @ -1504,7 +1504,7 @@ cat >config.log <<_ACEOF | ||||||
| This file contains any messages produced by compilers while | This file contains any messages produced by compilers while | ||||||
| running configure, to aid debugging if configure makes a mistake. | running configure, to aid debugging if configure makes a mistake. | ||||||
| 
 | 
 | ||||||
| It was created by rufus $as_me 3.21, which was | It was created by rufus $as_me 3.22, which was | ||||||
| generated by GNU Autoconf 2.71.  Invocation command line was | generated by GNU Autoconf 2.71.  Invocation command line was | ||||||
| 
 | 
 | ||||||
|   $ $0$ac_configure_args_raw |   $ $0$ac_configure_args_raw | ||||||
|  | @ -2768,7 +2768,7 @@ fi | ||||||
| 
 | 
 | ||||||
| # Define the identity of the package. | # Define the identity of the package. | ||||||
|  PACKAGE='rufus' |  PACKAGE='rufus' | ||||||
|  VERSION='3.21' |  VERSION='3.22' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h | printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h | ||||||
|  | @ -5310,7 +5310,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 | ||||||
| # report actual input values of CONFIG_FILES etc. instead of their | # report actual input values of CONFIG_FILES etc. instead of their | ||||||
| # values after options handling. | # values after options handling. | ||||||
| ac_log=" | ac_log=" | ||||||
| This file was extended by rufus $as_me 3.21, which was | This file was extended by rufus $as_me 3.22, which was | ||||||
| generated by GNU Autoconf 2.71.  Invocation command line was | generated by GNU Autoconf 2.71.  Invocation command line was | ||||||
| 
 | 
 | ||||||
|   CONFIG_FILES    = $CONFIG_FILES |   CONFIG_FILES    = $CONFIG_FILES | ||||||
|  | @ -5366,7 +5366,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ | ||||||
| cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 | cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 | ||||||
| ac_cs_config='$ac_cs_config_escaped' | ac_cs_config='$ac_cs_config_escaped' | ||||||
| ac_cs_version="\\ | ac_cs_version="\\ | ||||||
| rufus config.status 3.21 | rufus config.status 3.22 | ||||||
| configured by $0, generated by GNU Autoconf 2.71, | configured by $0, generated by GNU Autoconf 2.71, | ||||||
|   with options \\"\$ac_cs_config\\" |   with options \\"\$ac_cs_config\\" | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| AC_INIT([rufus], [3.21], [https://github.com/pbatard/rufus/issues], [rufus], [https://rufus.ie]) | AC_INIT([rufus], [3.22], [https://github.com/pbatard/rufus/issues], [rufus], [https://rufus.ie]) | ||||||
| AM_INIT_AUTOMAKE([-Wno-portability foreign no-dist no-dependencies]) | AM_INIT_AUTOMAKE([-Wno-portability foreign no-dist no-dependencies]) | ||||||
| AC_CONFIG_SRCDIR([src/rufus.c]) | AC_CONFIG_SRCDIR([src/rufus.c]) | ||||||
| AC_CONFIG_MACRO_DIR([m4]) | AC_CONFIG_MACRO_DIR([m4]) | ||||||
|  |  | ||||||
|  | @ -14,8 +14,8 @@ AM_V_WINDRES   = $(AM_V_WINDRES_$(V)) | ||||||
| %_rc.o: %.rc ../res/loc/embedded.loc | %_rc.o: %.rc ../res/loc/embedded.loc | ||||||
| 	$(AM_V_WINDRES) $(AM_RCFLAGS) -i $< -o $@ | 	$(AM_V_WINDRES) $(AM_RCFLAGS) -i $< -o $@ | ||||||
| 
 | 
 | ||||||
| rufus_SOURCES = badblocks.c checksum.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c localization.c \ | rufus_SOURCES = badblocks.c checksum.c cpu.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c \ | ||||||
| 	net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c | 	localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c | ||||||
| rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \ | rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \ | ||||||
| 	-DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus | 	-DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus | ||||||
| rufus_LDFLAGS = $(AM_LDFLAGS) -mwindows -L ../.mingw | rufus_LDFLAGS = $(AM_LDFLAGS) -mwindows -L ../.mingw | ||||||
|  |  | ||||||
|  | @ -88,7 +88,7 @@ CONFIG_CLEAN_FILES = | ||||||
| CONFIG_CLEAN_VPATH_FILES = | CONFIG_CLEAN_VPATH_FILES = | ||||||
| PROGRAMS = $(noinst_PROGRAMS) | PROGRAMS = $(noinst_PROGRAMS) | ||||||
| am_rufus_OBJECTS = rufus-badblocks.$(OBJEXT) rufus-checksum.$(OBJEXT) \
 | am_rufus_OBJECTS = rufus-badblocks.$(OBJEXT) rufus-checksum.$(OBJEXT) \
 | ||||||
| 	rufus-dev.$(OBJEXT) rufus-dos.$(OBJEXT) \
 | 	rufus-cpu.$(OBJEXT) rufus-dev.$(OBJEXT) rufus-dos.$(OBJEXT) \
 | ||||||
| 	rufus-dos_locale.$(OBJEXT) rufus-drive.$(OBJEXT) \
 | 	rufus-dos_locale.$(OBJEXT) rufus-drive.$(OBJEXT) \
 | ||||||
| 	rufus-format.$(OBJEXT) rufus-format_ext.$(OBJEXT) \
 | 	rufus-format.$(OBJEXT) rufus-format_ext.$(OBJEXT) \
 | ||||||
| 	rufus-format_fat32.$(OBJEXT) rufus-icon.$(OBJEXT) \
 | 	rufus-format_fat32.$(OBJEXT) rufus-icon.$(OBJEXT) \
 | ||||||
|  | @ -281,8 +281,8 @@ AM_V_WINDRES_0 = @echo "  RC     $@";$(WINDRES) | ||||||
| AM_V_WINDRES_1 = $(WINDRES) | AM_V_WINDRES_1 = $(WINDRES) | ||||||
| AM_V_WINDRES_ = $(AM_V_WINDRES_$(AM_DEFAULT_VERBOSITY)) | AM_V_WINDRES_ = $(AM_V_WINDRES_$(AM_DEFAULT_VERBOSITY)) | ||||||
| AM_V_WINDRES = $(AM_V_WINDRES_$(V)) | AM_V_WINDRES = $(AM_V_WINDRES_$(V)) | ||||||
| rufus_SOURCES = badblocks.c checksum.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c localization.c \
 | rufus_SOURCES = badblocks.c checksum.c cpu.c dev.c dos.c dos_locale.c drive.c format.c format_ext.c format_fat32.c icon.c iso.c \
 | ||||||
| 	net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c | 	localization.c net.c parser.c pki.c process.c re.c rufus.c smart.c stdfn.c stdio.c stdlg.c syslinux.c ui.c vhd.c wue.c | ||||||
| 
 | 
 | ||||||
| rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \
 | rufus_CFLAGS = -I$(srcdir)/ms-sys/inc -I$(srcdir)/syslinux/libfat -I$(srcdir)/syslinux/libinstaller -I$(srcdir)/syslinux/win -I$(srcdir)/libcdio $(AM_CFLAGS) \
 | ||||||
| 	-DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus | 	-DEXT2_FLAT_INCLUDES=0 -DSOLUTION=rufus | ||||||
|  | @ -357,6 +357,12 @@ rufus-checksum.o: checksum.c | ||||||
| rufus-checksum.obj: checksum.c | rufus-checksum.obj: checksum.c | ||||||
| 	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-checksum.obj `if test -f 'checksum.c'; then $(CYGPATH_W) 'checksum.c'; else $(CYGPATH_W) '$(srcdir)/checksum.c'; fi` | 	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-checksum.obj `if test -f 'checksum.c'; then $(CYGPATH_W) 'checksum.c'; else $(CYGPATH_W) '$(srcdir)/checksum.c'; fi` | ||||||
| 
 | 
 | ||||||
|  | rufus-cpu.o: cpu.c | ||||||
|  | 	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-cpu.o `test -f 'cpu.c' || echo '$(srcdir)/'`cpu.c | ||||||
|  | 
 | ||||||
|  | rufus-cpu.obj: cpu.c | ||||||
|  | 	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-cpu.obj `if test -f 'cpu.c'; then $(CYGPATH_W) 'cpu.c'; else $(CYGPATH_W) '$(srcdir)/cpu.c'; fi` | ||||||
|  | 
 | ||||||
| rufus-dev.o: dev.c | rufus-dev.o: dev.c | ||||||
| 	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-dev.o `test -f 'dev.c' || echo '$(srcdir)/'`dev.c | 	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(rufus_CFLAGS) $(CFLAGS) -c -o rufus-dev.o `test -f 'dev.c' || echo '$(srcdir)/'`dev.c | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										530
									
								
								src/checksum.c
									
										
									
									
									
								
							
							
						
						
									
										530
									
								
								src/checksum.c
									
										
									
									
									
								
							|  | @ -6,6 +6,7 @@ | ||||||
|  * Copyright © 2004 g10 Code GmbH |  * Copyright © 2004 g10 Code GmbH | ||||||
|  * Copyright © 2002-2015 Wei Dai & Igor Pavlov |  * Copyright © 2002-2015 Wei Dai & Igor Pavlov | ||||||
|  * Copyright © 2015-2021 Pete Batard <pete@akeo.ie> |  * Copyright © 2015-2021 Pete Batard <pete@akeo.ie> | ||||||
|  |  * Copyright © 2022 Jeffrey Walton <noloader@gmail.com> | ||||||
|  * |  * | ||||||
|  * This program is free software: you can redistribute it and/or modify |  * This program is free software: you can redistribute it and/or modify | ||||||
|  * it under the terms of the GNU General Public License as published by |  * it under the terms of the GNU General Public License as published by | ||||||
|  | @ -28,6 +29,8 @@ | ||||||
|  * |  * | ||||||
|  * SHA-512 modified from LibTomCrypt - Public Domain |  * SHA-512 modified from LibTomCrypt - Public Domain | ||||||
|  * |  * | ||||||
|  |  * CPU accelerated SHA code taken from SHA-Intrinsics - Public Domain | ||||||
|  |  * | ||||||
|  * MD5 code from various public domain sources sharing the following |  * MD5 code from various public domain sources sharing the following | ||||||
|  * copyright declaration: |  * copyright declaration: | ||||||
|  * |  * | ||||||
|  | @ -61,6 +64,7 @@ | ||||||
| #include <windowsx.h> | #include <windowsx.h> | ||||||
| 
 | 
 | ||||||
| #include "db.h" | #include "db.h" | ||||||
|  | #include "cpu.h" | ||||||
| #include "rufus.h" | #include "rufus.h" | ||||||
| #include "winio.h" | #include "winio.h" | ||||||
| #include "missing.h" | #include "missing.h" | ||||||
|  | @ -68,6 +72,22 @@ | ||||||
| #include "msapi_utf8.h" | #include "msapi_utf8.h" | ||||||
| #include "localization.h" | #include "localization.h" | ||||||
| 
 | 
 | ||||||
|  | /* Includes for SHA-1 and SHA-256 intrinsics */ | ||||||
|  | #if defined(CPU_X86_SHA1_ACCELERATION) || defined(CPU_X86_SHA256_ACCELERATION) | ||||||
|  | #if defined(_MSC_VER) | ||||||
|  | #include <immintrin.h> | ||||||
|  | #elif defined(__GNUC__) | ||||||
|  | #include <stdint.h> | ||||||
|  | #include <x86intrin.h> | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if defined(_MSC_VER) | ||||||
|  | #define RUFUS_ENABLE_GCC_ARCH(arch) | ||||||
|  | #else | ||||||
|  | #define RUFUS_ENABLE_GCC_ARCH(arch) __attribute__ ((target (arch))) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #undef BIG_ENDIAN_HOST | #undef BIG_ENDIAN_HOST | ||||||
| 
 | 
 | ||||||
| #define BUFFER_SIZE         (64*KB) | #define BUFFER_SIZE         (64*KB) | ||||||
|  | @ -208,7 +228,7 @@ static void sha512_init(SUM_CONTEXT* ctx) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Transform the message X which consists of 16 32-bit-words (SHA-1) */ | /* Transform the message X which consists of 16 32-bit-words (SHA-1) */ | ||||||
| static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) | static void sha1_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data) | ||||||
| { | { | ||||||
| 	uint32_t a, b, c, d, e, tm, x[16]; | 	uint32_t a, b, c, d, e, tm, x[16]; | ||||||
| 
 | 
 | ||||||
|  | @ -341,8 +361,235 @@ static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) | ||||||
| 	ctx->state[4] += e; | 	ctx->state[4] += e; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CPU_X86_SHA1_ACCELERATION | ||||||
|  | /*
 | ||||||
|  |  * Transform the message X which consists of 16 32-bit-words (SHA-1) | ||||||
|  |  * The code is public domain taken from https://github.com/noloader/SHA-Intrinsics.
 | ||||||
|  |  */ | ||||||
|  | RUFUS_ENABLE_GCC_ARCH("ssse3,sse4.1,sha") | ||||||
|  | static void sha1_transform_x86(uint64_t state64[5], const uint8_t *data, size_t length) | ||||||
|  | { | ||||||
|  | 	__m128i ABCD, E0, E1; | ||||||
|  | 	__m128i MSG0, MSG1, MSG2, MSG3; | ||||||
|  | 	const __m128i MASK = _mm_set_epi64x(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); | ||||||
|  | 
 | ||||||
|  | 	/* Rufus uses uint64_t for the state array. Pack it into uint32_t. */ | ||||||
|  | 	uint32_t state[5] = { | ||||||
|  | 		(uint32_t)state64[0], | ||||||
|  | 		(uint32_t)state64[1], | ||||||
|  | 		(uint32_t)state64[2], | ||||||
|  | 		(uint32_t)state64[3], | ||||||
|  | 		(uint32_t)state64[4] | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | 	/* Load initial values */ | ||||||
|  | 	ABCD = _mm_loadu_si128((const __m128i*) state); | ||||||
|  | 	E0 = _mm_set_epi32(state[4], 0, 0, 0); | ||||||
|  | 	ABCD = _mm_shuffle_epi32(ABCD, 0x1B); | ||||||
|  | 
 | ||||||
|  | 	while (length >= SHA1_BLOCKSIZE) | ||||||
|  | 	{ | ||||||
|  | 		/* Save current state  */ | ||||||
|  | 		const __m128i ABCD_SAVE = ABCD; | ||||||
|  | 		const __m128i E0_SAVE = E0; | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 0-3 */ | ||||||
|  | 		MSG0 = _mm_loadu_si128((const __m128i*)(data + 0)); | ||||||
|  | 		MSG0 = _mm_shuffle_epi8(MSG0, MASK); | ||||||
|  | 		E0 = _mm_add_epi32(E0, MSG0); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 4-7 */ | ||||||
|  | 		MSG1 = _mm_loadu_si128((const __m128i*)(data + 16)); | ||||||
|  | 		MSG1 = _mm_shuffle_epi8(MSG1, MASK); | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG1); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); | ||||||
|  | 		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 8-11 */ | ||||||
|  | 		MSG2 = _mm_loadu_si128((const __m128i*)(data + 32)); | ||||||
|  | 		MSG2 = _mm_shuffle_epi8(MSG2, MASK); | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG2); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); | ||||||
|  | 		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); | ||||||
|  | 		MSG0 = _mm_xor_si128(MSG0, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 12-15 */ | ||||||
|  | 		MSG3 = _mm_loadu_si128((const __m128i*)(data + 48)); | ||||||
|  | 		MSG3 = _mm_shuffle_epi8(MSG3, MASK); | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG3); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0); | ||||||
|  | 		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); | ||||||
|  | 		MSG1 = _mm_xor_si128(MSG1, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 16-19 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG0); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0); | ||||||
|  | 		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); | ||||||
|  | 		MSG2 = _mm_xor_si128(MSG2, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 20-23 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG1); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); | ||||||
|  | 		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); | ||||||
|  | 		MSG3 = _mm_xor_si128(MSG3, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 24-27 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG2); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); | ||||||
|  | 		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); | ||||||
|  | 		MSG0 = _mm_xor_si128(MSG0, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 28-31 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG3); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); | ||||||
|  | 		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); | ||||||
|  | 		MSG1 = _mm_xor_si128(MSG1, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 32-35 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG0); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1); | ||||||
|  | 		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); | ||||||
|  | 		MSG2 = _mm_xor_si128(MSG2, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 36-39 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG1); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1); | ||||||
|  | 		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); | ||||||
|  | 		MSG3 = _mm_xor_si128(MSG3, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 40-43 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG2); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); | ||||||
|  | 		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); | ||||||
|  | 		MSG0 = _mm_xor_si128(MSG0, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 44-47 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG3); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); | ||||||
|  | 		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); | ||||||
|  | 		MSG1 = _mm_xor_si128(MSG1, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 48-51 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG0); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); | ||||||
|  | 		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); | ||||||
|  | 		MSG2 = _mm_xor_si128(MSG2, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 52-55 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG1); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2); | ||||||
|  | 		MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1); | ||||||
|  | 		MSG3 = _mm_xor_si128(MSG3, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 56-59 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG2); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2); | ||||||
|  | 		MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2); | ||||||
|  | 		MSG0 = _mm_xor_si128(MSG0, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 60-63 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG3); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); | ||||||
|  | 		MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3); | ||||||
|  | 		MSG1 = _mm_xor_si128(MSG1, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 64-67 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG0); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); | ||||||
|  | 		MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0); | ||||||
|  | 		MSG2 = _mm_xor_si128(MSG2, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 68-71 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG1); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); | ||||||
|  | 		MSG3 = _mm_xor_si128(MSG3, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 72-75 */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, MSG2); | ||||||
|  | 		E1 = ABCD; | ||||||
|  | 		MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2); | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 76-79 */ | ||||||
|  | 		E1 = _mm_sha1nexte_epu32(E1, MSG3); | ||||||
|  | 		E0 = ABCD; | ||||||
|  | 		ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3); | ||||||
|  | 
 | ||||||
|  | 		/* Combine state */ | ||||||
|  | 		E0 = _mm_sha1nexte_epu32(E0, E0_SAVE); | ||||||
|  | 		ABCD = _mm_add_epi32(ABCD, ABCD_SAVE); | ||||||
|  | 
 | ||||||
|  | 		data += 64; | ||||||
|  | 		length -= 64; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Save state */ | ||||||
|  | 	ABCD = _mm_shuffle_epi32(ABCD, 0x1B); | ||||||
|  | 	_mm_storeu_si128((__m128i*) state, ABCD); | ||||||
|  | 	state[4] = _mm_extract_epi32(E0, 3); | ||||||
|  | 
 | ||||||
|  | 	/* Repack into uint64_t. */ | ||||||
|  | 	state64[0] = state[0]; | ||||||
|  | 	state64[1] = state[1]; | ||||||
|  | 	state64[2] = state[2]; | ||||||
|  | 	state64[3] = state[3]; | ||||||
|  | 	state64[4] = state[4]; | ||||||
|  | } | ||||||
|  | #endif /* CPU_X86_SHA1_ACCELERATION */ | ||||||
|  | 
 | ||||||
|  | /* Transform the message X which consists of 16 32-bit-words (SHA-1) */ | ||||||
|  | static void sha1_transform(SUM_CONTEXT *ctx, const uint8_t *data) | ||||||
|  | { | ||||||
|  | #ifdef CPU_X86_SHA1_ACCELERATION | ||||||
|  | 	if (cpu_has_sha1_accel) | ||||||
|  | 	{ | ||||||
|  | 		/* SHA-1 acceleration using intrinsics */ | ||||||
|  | 		sha1_transform_x86(ctx->state, data, SHA1_BLOCKSIZE); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | #endif | ||||||
|  | 	{ | ||||||
|  | 		/* Portable C/C++ implementation */ | ||||||
|  | 		sha1_transform_cc(ctx, data); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /* Transform the message X which consists of 16 32-bit-words (SHA-256) */ | /* Transform the message X which consists of 16 32-bit-words (SHA-256) */ | ||||||
| static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) | static __inline void sha256_transform_cc(SUM_CONTEXT *ctx, const uint8_t *data) | ||||||
| { | { | ||||||
| 	uint32_t a, b, c, d, e, f, g, h, j, x[16]; | 	uint32_t a, b, c, d, e, f, g, h, j, x[16]; | ||||||
| 
 | 
 | ||||||
|  | @ -415,6 +662,243 @@ static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) | ||||||
| 	ctx->state[7] += h; | 	ctx->state[7] += h; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #ifdef CPU_X86_SHA256_ACCELERATION | ||||||
|  | /*
 | ||||||
|  |  * Transform the message X which consists of 16 32-bit-words (SHA-256) | ||||||
|  |  * The code is public domain taken from https://github.com/noloader/SHA-Intrinsics.
 | ||||||
|  |  */ | ||||||
|  | RUFUS_ENABLE_GCC_ARCH("ssse3,sse4.1,sha") | ||||||
|  | static __inline void sha256_transform_x86(uint64_t state64[8], const uint8_t *data, size_t length) | ||||||
|  | { | ||||||
|  | 	__m128i STATE0, STATE1; | ||||||
|  | 	__m128i MSG, TMP; | ||||||
|  | 	__m128i MSG0, MSG1, MSG2, MSG3; | ||||||
|  | 	const __m128i MASK = _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL); | ||||||
|  | 
 | ||||||
|  | 	/* Rufus uses uint64_t for the state array. Pack it into uint32_t. */ | ||||||
|  | 	uint32_t state[8] = { | ||||||
|  | 		(uint32_t)state64[0], | ||||||
|  | 		(uint32_t)state64[1], | ||||||
|  | 		(uint32_t)state64[2], | ||||||
|  | 		(uint32_t)state64[3], | ||||||
|  | 		(uint32_t)state64[4], | ||||||
|  | 		(uint32_t)state64[5], | ||||||
|  | 		(uint32_t)state64[6], | ||||||
|  | 		(uint32_t)state64[7] | ||||||
|  | 	}; | ||||||
|  | 
 | ||||||
|  | 	/* Load initial values */ | ||||||
|  | 	TMP = _mm_loadu_si128((const __m128i*) (state+0)); | ||||||
|  | 	STATE1 = _mm_loadu_si128((const __m128i*) (state+4)); | ||||||
|  | 
 | ||||||
|  | 	TMP = _mm_shuffle_epi32(TMP, 0xB1);          /* CDAB */ | ||||||
|  | 	STATE1 = _mm_shuffle_epi32(STATE1, 0x1B);    /* EFGH */ | ||||||
|  | 	STATE0 = _mm_alignr_epi8(TMP, STATE1, 8);    /* ABEF */ | ||||||
|  | 	STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); /* CDGH */ | ||||||
|  | 
 | ||||||
|  | 	while (length >= SHA256_BLOCKSIZE) | ||||||
|  | 	{ | ||||||
|  | 		/* Save current state */ | ||||||
|  | 		const __m128i ABEF_SAVE = STATE0; | ||||||
|  | 		const __m128i CDGH_SAVE = STATE1; | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 0-3 */ | ||||||
|  | 		MSG = _mm_loadu_si128((const __m128i*) (data+0)); | ||||||
|  | 		MSG0 = _mm_shuffle_epi8(MSG, MASK); | ||||||
|  | 		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0xE9B5DBA5B5C0FBCFULL, 0x71374491428A2F98ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 4-7 */ | ||||||
|  | 		MSG1 = _mm_loadu_si128((const __m128i*) (data+16)); | ||||||
|  | 		MSG1 = _mm_shuffle_epi8(MSG1, MASK); | ||||||
|  | 		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0xAB1C5ED5923F82A4ULL, 0x59F111F13956C25BULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 8-11 */ | ||||||
|  | 		MSG2 = _mm_loadu_si128((const __m128i*) (data+32)); | ||||||
|  | 		MSG2 = _mm_shuffle_epi8(MSG2, MASK); | ||||||
|  | 		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x550C7DC3243185BEULL, 0x12835B01D807AA98ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 12-15 */ | ||||||
|  | 		MSG3 = _mm_loadu_si128((const __m128i*) (data+48)); | ||||||
|  | 		MSG3 = _mm_shuffle_epi8(MSG3, MASK); | ||||||
|  | 		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC19BF1749BDC06A7ULL, 0x80DEB1FE72BE5D74ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG3, MSG2, 4); | ||||||
|  | 		MSG0 = _mm_add_epi32(MSG0, TMP); | ||||||
|  | 		MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 16-19 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x240CA1CC0FC19DC6ULL, 0xEFBE4786E49B69C1ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG0, MSG3, 4); | ||||||
|  | 		MSG1 = _mm_add_epi32(MSG1, TMP); | ||||||
|  | 		MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 20-23 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x76F988DA5CB0A9DCULL, 0x4A7484AA2DE92C6FULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG1, MSG0, 4); | ||||||
|  | 		MSG2 = _mm_add_epi32(MSG2, TMP); | ||||||
|  | 		MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 24-27 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xBF597FC7B00327C8ULL, 0xA831C66D983E5152ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG2, MSG1, 4); | ||||||
|  | 		MSG3 = _mm_add_epi32(MSG3, TMP); | ||||||
|  | 		MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 28-31 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x1429296706CA6351ULL,  0xD5A79147C6E00BF3ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG3, MSG2, 4); | ||||||
|  | 		MSG0 = _mm_add_epi32(MSG0, TMP); | ||||||
|  | 		MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 32-35 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x53380D134D2C6DFCULL, 0x2E1B213827B70A85ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG0, MSG3, 4); | ||||||
|  | 		MSG1 = _mm_add_epi32(MSG1, TMP); | ||||||
|  | 		MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 36-39 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x92722C8581C2C92EULL, 0x766A0ABB650A7354ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG1, MSG0, 4); | ||||||
|  | 		MSG2 = _mm_add_epi32(MSG2, TMP); | ||||||
|  | 		MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 40-43 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0xC76C51A3C24B8B70ULL, 0xA81A664BA2BFE8A1ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG2, MSG1, 4); | ||||||
|  | 		MSG3 = _mm_add_epi32(MSG3, TMP); | ||||||
|  | 		MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 44-47 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0x106AA070F40E3585ULL, 0xD6990624D192E819ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG3, MSG2, 4); | ||||||
|  | 		MSG0 = _mm_add_epi32(MSG0, TMP); | ||||||
|  | 		MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 48-51 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(0x34B0BCB52748774CULL, 0x1E376C0819A4C116ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG0, MSG3, 4); | ||||||
|  | 		MSG1 = _mm_add_epi32(MSG1, TMP); | ||||||
|  | 		MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 		MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 52-55 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(0x682E6FF35B9CCA4FULL, 0x4ED8AA4A391C0CB3ULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG1, MSG0, 4); | ||||||
|  | 		MSG2 = _mm_add_epi32(MSG2, TMP); | ||||||
|  | 		MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 56-59 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(0x8CC7020884C87814ULL, 0x78A5636F748F82EEULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		TMP = _mm_alignr_epi8(MSG2, MSG1, 4); | ||||||
|  | 		MSG3 = _mm_add_epi32(MSG3, TMP); | ||||||
|  | 		MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 
 | ||||||
|  | 		/* Rounds 60-63 */ | ||||||
|  | 		MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(0xC67178F2BEF9A3F7ULL, 0xA4506CEB90BEFFFAULL)); | ||||||
|  | 		STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG); | ||||||
|  | 		MSG = _mm_shuffle_epi32(MSG, 0x0E); | ||||||
|  | 		STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG); | ||||||
|  | 
 | ||||||
|  | 		/* Combine state  */ | ||||||
|  | 		STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE); | ||||||
|  | 		STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE); | ||||||
|  | 
 | ||||||
|  | 		data += 64; | ||||||
|  | 		length -= 64; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	TMP = _mm_shuffle_epi32(STATE0, 0x1B);       /* FEBA */ | ||||||
|  | 	STATE1 = _mm_shuffle_epi32(STATE1, 0xB1);    /* DCHG */ | ||||||
|  | 	STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); /* DCBA */ | ||||||
|  | 	STATE1 = _mm_alignr_epi8(STATE1, TMP, 8);    /* ABEF */ | ||||||
|  | 
 | ||||||
|  | 	/* Save state */ | ||||||
|  | 	_mm_storeu_si128((__m128i*) (state+0), STATE0); | ||||||
|  | 	_mm_storeu_si128((__m128i*) (state+4), STATE1); | ||||||
|  | 
 | ||||||
|  | 	/* Repack into uint64_t. */ | ||||||
|  | 	state64[0] = state[0]; | ||||||
|  | 	state64[1] = state[1]; | ||||||
|  | 	state64[2] = state[2]; | ||||||
|  | 	state64[3] = state[3]; | ||||||
|  | 	state64[4] = state[4]; | ||||||
|  | 	state64[5] = state[5]; | ||||||
|  | 	state64[6] = state[6]; | ||||||
|  | 	state64[7] = state[7]; | ||||||
|  | } | ||||||
|  | #endif /* CPU_X86_SHA256_ACCELERATION */ | ||||||
|  | 
 | ||||||
|  | static __inline void sha256_transform(SUM_CONTEXT *ctx, const uint8_t *data) | ||||||
|  | { | ||||||
|  | #ifdef CPU_X86_SHA256_ACCELERATION | ||||||
|  | 	if (cpu_has_sha256_accel) | ||||||
|  | 	{ | ||||||
|  | 		/* SHA-256 acceleration using intrinsics */ | ||||||
|  | 		sha256_transform_x86(ctx->state, data, SHA256_BLOCKSIZE); | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | #endif | ||||||
|  | 	{ | ||||||
|  | 		/* Portable C/C++ implementation */ | ||||||
|  | 		sha256_transform_cc(ctx, data); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Transform the message X which consists of 16 64-bit-words (SHA-512) |  * Transform the message X which consists of 16 64-bit-words (SHA-512) | ||||||
|  * This is an algorithm that *REALLY* benefits from being executed as 64-bit |  * This is an algorithm that *REALLY* benefits from being executed as 64-bit | ||||||
|  | @ -618,6 +1102,22 @@ static void sha1_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len) | ||||||
| 		len -= num; | 		len -= num; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | #ifdef CPU_X86_SHA1_ACCELERATION | ||||||
|  | 	if (cpu_has_sha1_accel) | ||||||
|  | 	{ | ||||||
|  | 		/* Process all full blocks at once */ | ||||||
|  | 		if (len >= SHA1_BLOCKSIZE) { | ||||||
|  | 			/* Calculate full blocks, in bytes */ | ||||||
|  | 			num = (len / SHA1_BLOCKSIZE) * SHA1_BLOCKSIZE; | ||||||
|  | 			/* SHA-1 acceleration using intrinsics */ | ||||||
|  | 			sha1_transform_x86(ctx->state, buf, num); | ||||||
|  | 			buf += num; | ||||||
|  | 			len -= num; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | #endif | ||||||
|  | 	{ | ||||||
| 		/* Process data in blocksize chunks */ | 		/* Process data in blocksize chunks */ | ||||||
| 		while (len >= SHA1_BLOCKSIZE) { | 		while (len >= SHA1_BLOCKSIZE) { | ||||||
| 			PREFETCH64(buf + SHA1_BLOCKSIZE); | 			PREFETCH64(buf + SHA1_BLOCKSIZE); | ||||||
|  | @ -625,6 +1125,7 @@ static void sha1_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len) | ||||||
| 			buf += SHA1_BLOCKSIZE; | 			buf += SHA1_BLOCKSIZE; | ||||||
| 			len -= SHA1_BLOCKSIZE; | 			len -= SHA1_BLOCKSIZE; | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/* Handle any remaining bytes of data. */ | 	/* Handle any remaining bytes of data. */ | ||||||
| 	memcpy(ctx->buf, buf, len); | 	memcpy(ctx->buf, buf, len); | ||||||
|  | @ -653,6 +1154,22 @@ static void sha256_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len) | ||||||
| 		len -= num; | 		len -= num; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | #ifdef CPU_X86_SHA256_ACCELERATION | ||||||
|  | 	if (cpu_has_sha256_accel) | ||||||
|  | 	{ | ||||||
|  | 		/* Process all full blocks at once */ | ||||||
|  | 		if (len >= SHA256_BLOCKSIZE) { | ||||||
|  | 			/* Calculate full blocks, in bytes */ | ||||||
|  | 			num = (len / SHA256_BLOCKSIZE) * SHA256_BLOCKSIZE; | ||||||
|  | 			/* SHA-256 acceleration using intrinsics */ | ||||||
|  | 			sha256_transform_x86(ctx->state, buf, num); | ||||||
|  | 			buf += num; | ||||||
|  | 			len -= num; | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | #endif | ||||||
|  | 	{ | ||||||
| 		/* Process data in blocksize chunks */ | 		/* Process data in blocksize chunks */ | ||||||
| 		while (len >= SHA256_BLOCKSIZE) { | 		while (len >= SHA256_BLOCKSIZE) { | ||||||
| 			PREFETCH64(buf + SHA256_BLOCKSIZE); | 			PREFETCH64(buf + SHA256_BLOCKSIZE); | ||||||
|  | @ -660,6 +1177,7 @@ static void sha256_write(SUM_CONTEXT *ctx, const uint8_t *buf, size_t len) | ||||||
| 			buf += SHA256_BLOCKSIZE; | 			buf += SHA256_BLOCKSIZE; | ||||||
| 			len -= SHA256_BLOCKSIZE; | 			len -= SHA256_BLOCKSIZE; | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	/* Handle any remaining bytes of data. */ | 	/* Handle any remaining bytes of data. */ | ||||||
| 	memcpy(ctx->buf, buf, len); | 	memcpy(ctx->buf, buf, len); | ||||||
|  | @ -1280,7 +1798,7 @@ BOOL IsFileInDB(const char* path) | ||||||
| 	return FALSE; | 	return FALSE; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #if defined(_DEBUG) | #if defined(_DEBUG) || defined(TEST) || defined(ALPHA) | ||||||
| /* Convert a lowercase hex string to binary. Returned value must be freed */ | /* Convert a lowercase hex string to binary. Returned value must be freed */ | ||||||
| uint8_t* to_bin(const char* str) | uint8_t* to_bin(const char* str) | ||||||
| { | { | ||||||
|  | @ -1303,7 +1821,7 @@ uint8_t* to_bin(const char* str) | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const char* test_msg = "Did you ever hear the tragedy of Darth Plagueis The Wise? " | const char test_msg[] = "Did you ever hear the tragedy of Darth Plagueis The Wise? " | ||||||
| 	"I thought not. It's not a story the Jedi would tell you. It's a Sith legend. " | 	"I thought not. It's not a story the Jedi would tell you. It's a Sith legend. " | ||||||
| 	"Darth Plagueis was a Dark Lord of the Sith, so powerful and so wise he could " | 	"Darth Plagueis was a Dark Lord of the Sith, so powerful and so wise he could " | ||||||
| 	"use the Force to influence the midichlorians to create life... He had such a " | 	"use the Force to influence the midichlorians to create life... He had such a " | ||||||
|  | @ -1356,6 +1874,10 @@ int TestChecksum(void) | ||||||
| 	if (msg == NULL) | 	if (msg == NULL) | ||||||
| 		return -1; | 		return -1; | ||||||
| 
 | 
 | ||||||
|  | 	/* Display accelerations available */ | ||||||
|  | 	uprintf("SHA1   acceleration: %s", (cpu_has_sha1_accel ? "TRUE" : "FALSE")); | ||||||
|  | 	uprintf("SHA256 acceleration: %s", (cpu_has_sha256_accel ? "TRUE" : "FALSE")); | ||||||
|  | 
 | ||||||
| 	for (j = 0; j < CHECKSUM_MAX; j++) { | 	for (j = 0; j < CHECKSUM_MAX; j++) { | ||||||
| 		size_t copy_msg_len[4]; | 		size_t copy_msg_len[4]; | ||||||
| 		copy_msg_len[0] = 0; | 		copy_msg_len[0] = 0; | ||||||
|  |  | ||||||
							
								
								
									
										123
									
								
								src/cpu.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								src/cpu.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,123 @@ | ||||||
|  | /*
 | ||||||
|  |  * Rufus: The Reliable USB Formatting Utility | ||||||
|  |  * CPU features detection | ||||||
|  |  * Copyright © 2022 Pete Batard <pete@akeo.ie> | ||||||
|  |  * Copyright © 2022 Jeffrey Walton <noloader@gmail.com> | ||||||
|  |  * | ||||||
|  |  * This program is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * This program is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include "cpu.h" | ||||||
|  | 
 | ||||||
|  | #if (defined(CPU_X86_SHA1_ACCELERATION) || defined(CPU_X86_SHA256_ACCELERATION)) | ||||||
|  | #if defined(RUFUS_MSC_VERSION) | ||||||
|  | #include <intrin.h> | ||||||
|  | #elif (defined(RUFUS_GCC_VERSION) || defined(RUFUS_CLANG_VERSION)) | ||||||
|  | #include <x86Intrin.h> | ||||||
|  | #elif defined(RUFUS_INTEL_VERSION) | ||||||
|  | #include <immintrin.h> | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | BOOL cpu_has_sha1_accel = FALSE; | ||||||
|  | BOOL cpu_has_sha256_accel = FALSE; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Three elements must be in place to make a meaningful call to the | ||||||
|  |  * DetectSHA###Acceleration() calls. First, the compiler must support | ||||||
|  |  * the underlying intrinsics. Second, the platform must provide a | ||||||
|  |  * cpuid() function. And third, the cpu must actually support the SHA-1 | ||||||
|  |  * and SHA-256 instructions. | ||||||
|  |  * | ||||||
|  |  * If any of the conditions are not met, then DetectSHA###Acceleration() | ||||||
|  |  * returns FALSE. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Detect if the processor supports SHA-1 acceleration. We only check for | ||||||
|  |  * the three ISAs we need - SSSE3, SSE4.1 and SHA. We don't check for OS | ||||||
|  |  * support or XSAVE because that's been enabled since Windows 2000. | ||||||
|  |  */ | ||||||
|  | BOOL DetectSHA1Acceleration(void) | ||||||
|  | { | ||||||
|  | #if defined(CPU_X86_SHA1_ACCELERATION) | ||||||
|  | #if defined(_MSC_VER) | ||||||
|  | 	uint32_t regs0[4] = {0,0,0,0}, regs1[4] = {0,0,0,0}, regs7[4] = {0,0,0,0}; | ||||||
|  | 	const uint32_t SSSE3_BIT = 1u <<  9; /* Function 1, Bit  9 of ECX */ | ||||||
|  | 	const uint32_t SSE41_BIT = 1u << 19; /* Function 1, Bit 19 of ECX */ | ||||||
|  | 	const uint32_t SHA_BIT   = 1u << 29; /* Function 7, Bit 29 of EBX */ | ||||||
|  | 
 | ||||||
|  | 	__cpuid(regs0, 0); | ||||||
|  | 	const uint32_t highest = regs0[0]; /*EAX*/ | ||||||
|  | 
 | ||||||
|  | 	if (highest >= 0x01) { | ||||||
|  | 		__cpuidex(regs1, 1, 0); | ||||||
|  | 	} | ||||||
|  | 	if (highest >= 0x07) { | ||||||
|  | 		__cpuidex(regs7, 7, 0); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (regs1[2] /*ECX*/ & SSSE3_BIT) && (regs1[2] /*ECX*/ & SSE41_BIT) && (regs7[1] /*EBX*/ & SHA_BIT) ? TRUE : FALSE; | ||||||
|  | #elif defined(__GNUC__) || defined(__clang__) | ||||||
|  | 	/* __builtin_cpu_supports available in GCC 4.8.1 and above */ | ||||||
|  | 	return __builtin_cpu_supports("ssse3") && __builtin_cpu_supports("sse4.1") && __builtin_cpu_supports("sha") ? TRUE : FALSE; | ||||||
|  | #elif defined(__INTEL_COMPILER) | ||||||
|  | 	/* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_may_i_use_cpu_feature */ | ||||||
|  | 	return _may_i_use_cpu_feature(_FEATURE_SSSE3|_FEATURE_SSE4_1|_FEATURE_SHA) ? TRUE : FALSE; | ||||||
|  | #else | ||||||
|  | 	return FALSE; | ||||||
|  | #endif | ||||||
|  | #else | ||||||
|  | 	return FALSE; | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Detect if the processor supports SHA-256 acceleration. We only check for | ||||||
|  |  * the three ISAs we need - SSSE3, SSE4.1 and SHA. We don't check for OS | ||||||
|  |  * support or XSAVE because that's been enabled since Windows 2000. | ||||||
|  |  */ | ||||||
|  | BOOL DetectSHA256Acceleration(void) | ||||||
|  | { | ||||||
|  | #if defined(CPU_X86_SHA256_ACCELERATION) | ||||||
|  | #if defined(_MSC_VER) | ||||||
|  | 	uint32_t regs0[4] = {0,0,0,0}, regs1[4] = {0,0,0,0}, regs7[4] = {0,0,0,0}; | ||||||
|  | 	const uint32_t SSSE3_BIT = 1u <<  9; /* Function 1, Bit  9 of ECX */ | ||||||
|  | 	const uint32_t SSE41_BIT = 1u << 19; /* Function 1, Bit 19 of ECX */ | ||||||
|  | 	const uint32_t SHA_BIT   = 1u << 29; /* Function 7, Bit 29 of EBX */ | ||||||
|  | 
 | ||||||
|  | 	__cpuid(regs0, 0); | ||||||
|  | 	const uint32_t highest = regs0[0]; /*EAX*/ | ||||||
|  | 
 | ||||||
|  | 	if (highest >= 0x01) { | ||||||
|  | 		__cpuidex(regs1, 1, 0); | ||||||
|  | 	} | ||||||
|  | 	if (highest >= 0x07) { | ||||||
|  | 		__cpuidex(regs7, 7, 0); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (regs1[2] /*ECX*/ & SSSE3_BIT) && (regs1[2] /*ECX*/ & SSE41_BIT) && (regs7[1] /*EBX*/ & SHA_BIT) ? TRUE : FALSE; | ||||||
|  | #elif defined(__GNUC__) || defined(__clang__) | ||||||
|  | 	/* __builtin_cpu_supports available in GCC 4.8.1 and above */ | ||||||
|  | 	return __builtin_cpu_supports("ssse3") && __builtin_cpu_supports("sse4.1") && __builtin_cpu_supports("sha") ? TRUE : FALSE; | ||||||
|  | #elif defined(__INTEL_COMPILER) | ||||||
|  | 	/* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_may_i_use_cpu_feature */ | ||||||
|  | 	return _may_i_use_cpu_feature(_FEATURE_SSSE3|_FEATURE_SSE4_1|_FEATURE_SHA) ? TRUE : FALSE; | ||||||
|  | #else | ||||||
|  | 	return FALSE; | ||||||
|  | #endif | ||||||
|  | #else | ||||||
|  | 	return FALSE; | ||||||
|  | #endif | ||||||
|  | } | ||||||
							
								
								
									
										79
									
								
								src/cpu.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								src/cpu.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,79 @@ | ||||||
|  | /*
 | ||||||
|  |  * Rufus: The Reliable USB Formatting Utility | ||||||
|  |  * CPU features detection | ||||||
|  |  * Copyright © 2022 Pete Batard <pete@akeo.ie> | ||||||
|  |  * Copyright © 2022 Jeffrey Walton <noloader@gmail.com> | ||||||
|  |  * | ||||||
|  |  * This program is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * This program is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Primarily added to support SHA instructions on x86 machines. | ||||||
|  |  * SHA acceleration is becoming as ubiquitous as AES acceleration. | ||||||
|  |  * SHA support was introduced in Intel Goldmont architecture, like | ||||||
|  |  * Celeron J3455 and Pentium J4205. The instructions are now present | ||||||
|  |  * in AMD Ryzen 3 (Zen architecture) and above, and Intel Core | ||||||
|  |  * 10th-gen processors (Ice Lake), 11th-gen processors (Rocket Lake) | ||||||
|  |  * and above. | ||||||
|  |  * | ||||||
|  |  * Typical benchmarks for x86 SHA acceleration is about a 6x to 10x | ||||||
|  |  * speedup over a C/C++ implementation. The rough measurements are | ||||||
|  |  * 1.0 to 1.8 cpb for SHA-1, and 1.5 to 2.5 cpb for SHA-256. On a | ||||||
|  |  * Celeron J3455, that's 1.1 GB/s for SHA-1 and 800 MB/s for SHA-256. | ||||||
|  |  * On a 10th-gen Core i5, that's about 1.65 GB/s for SHA-1 and about | ||||||
|  |  * 1.3 GB/s for SHA-256. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include "rufus.h" | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #ifdef _MSC_VER | ||||||
|  | #define RUFUS_MSC_VERSION (_MSC_VER) | ||||||
|  | #if (RUFUS_MSC_VERSION < 1900) | ||||||
|  | #error "Your compiler is too old to build this application" | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if defined(__GNUC__) | ||||||
|  | #define RUFUS_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) | ||||||
|  | #if (RUFUS_GCC_VERSION < 40900) | ||||||
|  | #error "Your compiler is too old to build this application" | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #ifdef __INTEL_COMPILER | ||||||
|  | #define RUFUS_INTEL_VERSION (__INTEL_COMPILER) | ||||||
|  | #if (RUFUS_INTEL_VERSION < 1600) | ||||||
|  | #error "Your compiler is too old to build this application" | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if defined(__clang__) | ||||||
|  | #define RUFUS_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) | ||||||
|  | #if (RUFUS_CLANG_VERSION < 30400) | ||||||
|  | #error "Your compiler is too old to build this application" | ||||||
|  | #endif | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || \ | ||||||
|  |      defined(_X86_) || defined(__I86__) || defined(__x86_64__)) | ||||||
|  | #define CPU_X86_SHA1_ACCELERATION 1 | ||||||
|  | #define CPU_X86_SHA256_ACCELERATION 1 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | extern BOOL cpu_has_sha1_accel, cpu_has_sha256_accel; | ||||||
|  | 
 | ||||||
|  | extern BOOL DetectSHA1Acceleration(void); | ||||||
|  | extern BOOL DetectSHA256Acceleration(void); | ||||||
|  | @ -428,7 +428,7 @@ uint64_t DownloadToFileOrBuffer(const char* url, const char* file, BYTE** buffer | ||||||
| 		uprintf("Unable to retrieve file length: %s", WinInetErrorString()); | 		uprintf("Unable to retrieve file length: %s", WinInetErrorString()); | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
| 	total_size = (uint64_t)atoll(strsize); | 	total_size = strtoull(strsize, NULL, 10); | ||||||
| 	if (hProgressDialog != NULL) { | 	if (hProgressDialog != NULL) { | ||||||
| 		char msg[128]; | 		char msg[128]; | ||||||
| 		uprintf("File length: %s", SizeToHumanReadable(total_size, FALSE, FALSE)); | 		uprintf("File length: %s", SizeToHumanReadable(total_size, FALSE, FALSE)); | ||||||
|  |  | ||||||
							
								
								
									
										12
									
								
								src/rufus.c
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								src/rufus.c
									
										
									
									
									
								
							|  | @ -47,6 +47,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "ui.h" | #include "ui.h" | ||||||
| #include "re.h" | #include "re.h" | ||||||
|  | #include "cpu.h" | ||||||
| #include "vhd.h" | #include "vhd.h" | ||||||
| #include "wue.h" | #include "wue.h" | ||||||
| #include "drive.h" | #include "drive.h" | ||||||
|  | @ -2478,7 +2479,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA | ||||||
| 					break; | 					break; | ||||||
| 				} | 				} | ||||||
| 				GetWindowTextA(GetDlgItem(hDlg, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp)); | 				GetWindowTextA(GetDlgItem(hDlg, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp)); | ||||||
| 				lPos = atol(tmp); | 				lPos = strtol(tmp, NULL, 10); | ||||||
| 				persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)); | 				persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)); | ||||||
| 				persistence_size = lPos * MB; | 				persistence_size = lPos * MB; | ||||||
| 				for (i = 0; i < persistence_unit_selection; i++) | 				for (i = 0; i < persistence_unit_selection; i++) | ||||||
|  | @ -2520,7 +2521,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA | ||||||
| 			if (ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)) == persistence_unit_selection) | 			if (ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)) == persistence_unit_selection) | ||||||
| 				break; | 				break; | ||||||
| 			GetWindowTextA(GetDlgItem(hMainDialog, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp)); | 			GetWindowTextA(GetDlgItem(hMainDialog, IDC_PERSISTENCE_SIZE), tmp, sizeof(tmp)); | ||||||
| 			persistence_size = atol(tmp) * MB; | 			persistence_size = strtol(tmp, NULL, 10) * MB; | ||||||
| 			for (i = 0; i < persistence_unit_selection; i++) | 			for (i = 0; i < persistence_unit_selection; i++) | ||||||
| 				persistence_size *= 1024; | 				persistence_size *= 1024; | ||||||
| 			persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)); | 			persistence_unit_selection = ComboBox_GetCurSel(GetDlgItem(hDlg, IDC_PERSISTENCE_UNITS)); | ||||||
|  | @ -3688,6 +3689,10 @@ skip_args_processing: | ||||||
| 			uprintf("Failed to enable AutoMount"); | 			uprintf("Failed to enable AutoMount"); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	// Detect CPU acceleration for SHA-1/SHA-256
 | ||||||
|  | 	cpu_has_sha1_accel = DetectSHA1Acceleration(); | ||||||
|  | 	cpu_has_sha256_accel = DetectSHA256Acceleration(); | ||||||
|  | 
 | ||||||
| relaunch: | relaunch: | ||||||
| 	ubprintf("Localization set to '%s'", selected_locale->txt[0]); | 	ubprintf("Localization set to '%s'", selected_locale->txt[0]); | ||||||
| 	right_to_left_mode = ((selected_locale->ctrl_id) & LOC_RIGHT_TO_LEFT); | 	right_to_left_mode = ((selected_locale->ctrl_id) & LOC_RIGHT_TO_LEFT); | ||||||
|  | @ -3774,7 +3779,8 @@ relaunch: | ||||||
| 			SendMessage(hMainDialog, WM_COMMAND, IDC_LOG, 0); | 			SendMessage(hMainDialog, WM_COMMAND, IDC_LOG, 0); | ||||||
| 			continue; | 			continue; | ||||||
| 		} | 		} | ||||||
| #if defined(_DEBUG) || defined(TEST) | #if defined(_DEBUG) || defined(TEST) || defined(ALPHA) | ||||||
|  | extern int TestChecksum(void); | ||||||
| 		// Ctrl-T => Alternate Test mode that doesn't require a full rebuild
 | 		// Ctrl-T => Alternate Test mode that doesn't require a full rebuild
 | ||||||
| 		if ((ctrl_without_focus || ((GetKeyState(VK_CONTROL) & 0x8000) && (msg.message == WM_KEYDOWN))) | 		if ((ctrl_without_focus || ((GetKeyState(VK_CONTROL) & 0x8000) && (msg.message == WM_KEYDOWN))) | ||||||
| 			&& (msg.wParam == 'T')) { | 			&& (msg.wParam == 'T')) { | ||||||
|  |  | ||||||
							
								
								
									
										12
									
								
								src/rufus.rc
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								src/rufus.rc
									
										
									
									
									
								
							|  | @ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL | ||||||
| IDD_DIALOG DIALOGEX 12, 12, 232, 326 | IDD_DIALOG DIALOGEX 12, 12, 232, 326 | ||||||
| STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | ||||||
| EXSTYLE WS_EX_ACCEPTFILES | EXSTYLE WS_EX_ACCEPTFILES | ||||||
| CAPTION "Rufus 3.21.1949" | CAPTION "Rufus 3.22.1952" | ||||||
| FONT 9, "Segoe UI Symbol", 400, 0, 0x0 | FONT 9, "Segoe UI Symbol", 400, 0, 0x0 | ||||||
| BEGIN | BEGIN | ||||||
|     LTEXT           "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP |     LTEXT           "Drive Properties",IDS_DRIVE_PROPERTIES_TXT,8,6,53,12,NOT WS_GROUP | ||||||
|  | @ -396,8 +396,8 @@ END | ||||||
| // | // | ||||||
| 
 | 
 | ||||||
| VS_VERSION_INFO VERSIONINFO | VS_VERSION_INFO VERSIONINFO | ||||||
|  FILEVERSION 3,21,1949,0 |  FILEVERSION 3,22,1952,0 | ||||||
|  PRODUCTVERSION 3,21,1949,0 |  PRODUCTVERSION 3,22,1952,0 | ||||||
|  FILEFLAGSMASK 0x3fL |  FILEFLAGSMASK 0x3fL | ||||||
| #ifdef _DEBUG | #ifdef _DEBUG | ||||||
|  FILEFLAGS 0x1L |  FILEFLAGS 0x1L | ||||||
|  | @ -415,13 +415,13 @@ BEGIN | ||||||
|             VALUE "Comments", "https://rufus.ie" |             VALUE "Comments", "https://rufus.ie" | ||||||
|             VALUE "CompanyName", "Akeo Consulting" |             VALUE "CompanyName", "Akeo Consulting" | ||||||
|             VALUE "FileDescription", "Rufus" |             VALUE "FileDescription", "Rufus" | ||||||
|             VALUE "FileVersion", "3.21.1949" |             VALUE "FileVersion", "3.22.1952" | ||||||
|             VALUE "InternalName", "Rufus" |             VALUE "InternalName", "Rufus" | ||||||
|             VALUE "LegalCopyright", "© 2011-2022 Pete Batard (GPL v3)" |             VALUE "LegalCopyright", "© 2011-2022 Pete Batard (GPL v3)" | ||||||
|             VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html" |             VALUE "LegalTrademarks", "https://www.gnu.org/licenses/gpl-3.0.html" | ||||||
|             VALUE "OriginalFilename", "rufus-3.21.exe" |             VALUE "OriginalFilename", "rufus-3.22.exe" | ||||||
|             VALUE "ProductName", "Rufus" |             VALUE "ProductName", "Rufus" | ||||||
|             VALUE "ProductVersion", "3.21.1949" |             VALUE "ProductVersion", "3.22.1952" | ||||||
|         END |         END | ||||||
|     END |     END | ||||||
|     BLOCK "VarFileInfo" |     BLOCK "VarFileInfo" | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue