use SSSE3 consistently as opposed to SSE3

This commit is contained in:
tevador 2019-10-06 22:10:02 +02:00
parent 900a936816
commit cce53cb582
10 changed files with 25 additions and 25 deletions

View file

@ -31,7 +31,7 @@ cmake_minimum_required(VERSION 2.8.7)
set (randomx_sources set (randomx_sources
src/aes_hash.cpp src/aes_hash.cpp
src/argon2_ref.c src/argon2_ref.c
src/argon2_sse3.c src/argon2_ssse3.c
src/argon2_avx2.c src/argon2_avx2.c
src/bytecode_machine.cpp src/bytecode_machine.cpp
src/dataset.cpp src/dataset.cpp
@ -107,7 +107,7 @@ if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "
add_flag("-maes") add_flag("-maes")
check_c_compiler_flag(-mssse3 HAVE_SSSE3) check_c_compiler_flag(-mssse3 HAVE_SSSE3)
if(HAVE_SSSE3) if(HAVE_SSSE3)
set_source_files_properties(src/argon2_sse3.c COMPILE_FLAGS -mssse3) set_source_files_properties(src/argon2_ssse3.c COMPILE_FLAGS -mssse3)
endif() endif()
check_c_compiler_flag(-mavx2 HAVE_AVX2) check_c_compiler_flag(-mavx2 HAVE_AVX2)
if(HAVE_AVX2) if(HAVE_AVX2)

View file

@ -253,7 +253,7 @@ extern "C" {
void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance, void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance,
argon2_position_t position); argon2_position_t position);
randomx_argon2_impl *randomx_argon2_impl_sse3(); randomx_argon2_impl *randomx_argon2_impl_ssse3();
randomx_argon2_impl *randomx_argon2_impl_avx2(); randomx_argon2_impl *randomx_argon2_impl_avx2();
#if defined(__cplusplus) #if defined(__cplusplus)

View file

@ -42,12 +42,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define __SSSE3__ #define __SSSE3__
#endif #endif
void randomx_argon2_fill_segment_sse3(const argon2_instance_t* instance, void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
argon2_position_t position); argon2_position_t position);
randomx_argon2_impl* randomx_argon2_impl_sse3() { randomx_argon2_impl* randomx_argon2_impl_ssse3() {
#if defined(__SSSE3__) #if defined(__SSSE3__)
return &randomx_argon2_fill_segment_sse3; return &randomx_argon2_fill_segment_ssse3;
#endif #endif
return NULL; return NULL;
} }
@ -58,7 +58,7 @@ randomx_argon2_impl* randomx_argon2_impl_sse3() {
#include "argon2_core.h" #include "argon2_core.h"
#include "blake2/blamka-round-sse3.h" #include "blake2/blamka-round-ssse3.h"
#include "blake2/blake2-impl.h" #include "blake2/blake2-impl.h"
#include "blake2/blake2.h" #include "blake2/blake2.h"
@ -100,7 +100,7 @@ static void fill_block(__m128i* state, const block* ref_block,
} }
} }
void randomx_argon2_fill_segment_sse3(const argon2_instance_t* instance, void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
argon2_position_t position) { argon2_position_t position) {
block* ref_block = NULL, * curr_block = NULL; block* ref_block = NULL, * curr_block = NULL;
block address_block, input_block; block address_block, input_block;

View file

@ -87,8 +87,8 @@ namespace randomx {
return &randomx_argon2_fill_segment_ref; return &randomx_argon2_fill_segment_ref;
} }
randomx_argon2_impl* impl = nullptr; randomx_argon2_impl* impl = nullptr;
if ((flags & RANDOMX_FLAG_ARGON2) == RANDOMX_FLAG_ARGON2_SSE3) { if ((flags & RANDOMX_FLAG_ARGON2) == RANDOMX_FLAG_ARGON2_SSSE3) {
impl = randomx_argon2_impl_sse3(); impl = randomx_argon2_impl_ssse3();
} }
if ((flags & RANDOMX_FLAG_ARGON2) == RANDOMX_FLAG_ARGON2_AVX2) { if ((flags & RANDOMX_FLAG_ARGON2) == RANDOMX_FLAG_ARGON2_AVX2) {
impl = randomx_argon2_impl_avx2(); impl = randomx_argon2_impl_avx2();

View file

@ -45,7 +45,7 @@ typedef enum {
RANDOMX_FLAG_FULL_MEM = 4, RANDOMX_FLAG_FULL_MEM = 4,
RANDOMX_FLAG_JIT = 8, RANDOMX_FLAG_JIT = 8,
RANDOMX_FLAG_SECURE = 16, RANDOMX_FLAG_SECURE = 16,
RANDOMX_FLAG_ARGON2_SSE3 = 32, RANDOMX_FLAG_ARGON2_SSSE3 = 32,
RANDOMX_FLAG_ARGON2_AVX2 = 64, RANDOMX_FLAG_ARGON2_AVX2 = 64,
RANDOMX_FLAG_ARGON2 = 96 RANDOMX_FLAG_ARGON2 = 96
} randomx_flags; } randomx_flags;
@ -66,7 +66,7 @@ extern "C" {
* RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes * RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
* subsequent Dataset initialization faster * subsequent Dataset initialization faster
* Optionally, one of these two flags may be selected: * Optionally, one of these two flags may be selected:
* RANDOMX_FLAG_ARGON2_SSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set * RANDOMX_FLAG_ARGON2_SSSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set
* makes subsequent cache initialization faster * makes subsequent cache initialization faster
* RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set * RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set
* makes subsequent cache initialization faster * makes subsequent cache initialization faster

View file

@ -91,7 +91,7 @@ void printUsage(const char* executable) {
std::cout << " --init Q initialize dataset with Q threads (default: 1)" << std::endl; std::cout << " --init Q initialize dataset with Q threads (default: 1)" << std::endl;
std::cout << " --nonces N run N nonces (default: 1000)" << std::endl; std::cout << " --nonces N run N nonces (default: 1000)" << std::endl;
std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl; std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl;
std::cout << " --sse3 use optimized Argon2 for SSSE3 CPUs" << std::endl; std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl; std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
} }
@ -130,7 +130,7 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, sse3, avx2; bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2;
int noncesCount, threadCount, initThreadCount; int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity; uint64_t threadAffinity;
int32_t seedValue; int32_t seedValue;
@ -151,7 +151,7 @@ int main(int argc, char** argv) {
readOption("--jit", argc, argv, jit); readOption("--jit", argc, argv, jit);
readOption("--help", argc, argv, help); readOption("--help", argc, argv, help);
readOption("--secure", argc, argv, secure); readOption("--secure", argc, argv, secure);
readOption("--sse3", argc, argv, sse3); readOption("--ssse3", argc, argv, ssse3);
readOption("--avx2", argc, argv, avx2); readOption("--avx2", argc, argv, avx2);
store32(&seed, seedValue); store32(&seed, seedValue);
@ -171,9 +171,9 @@ int main(int argc, char** argv) {
randomx_cache* cache; randomx_cache* cache;
randomx_flags flags = RANDOMX_FLAG_DEFAULT; randomx_flags flags = RANDOMX_FLAG_DEFAULT;
if (sse3) { if (ssse3) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_ARGON2_SSE3); flags = (randomx_flags)(flags | RANDOMX_FLAG_ARGON2_SSSE3);
std::cout << " - Argon2 implementation: SSE3" << std::endl; std::cout << " - Argon2 implementation: SSSE3" << std::endl;
} }
if (avx2) { if (avx2) {

View file

@ -1018,7 +1018,7 @@ int main() {
vm = nullptr; vm = nullptr;
randomx_release_cache(cache); randomx_release_cache(cache);
cache = randomx_alloc_cache(RANDOMX_FLAG_ARGON2_SSE3); cache = randomx_alloc_cache(RANDOMX_FLAG_ARGON2_SSSE3);
runTest("Cache initialization: SSSE3", cache != nullptr && RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { runTest("Cache initialization: SSSE3", cache != nullptr && RANDOMX_ARGON_ITERATIONS == 3 && RANDOMX_ARGON_LANES == 1 && RANDOMX_ARGON_MEMORY == 262144 && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
initCache("test key 000"); initCache("test key 000");

View file

@ -135,7 +135,7 @@ SET ERRORLEVEL = 0</Command>
<ClCompile Include="..\src\argon2_avx2.c" /> <ClCompile Include="..\src\argon2_avx2.c" />
<ClCompile Include="..\src\argon2_core.c" /> <ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" /> <ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\argon2_sse3.c" /> <ClCompile Include="..\src\argon2_ssse3.c" />
<ClCompile Include="..\src\assembly_generator_x86.cpp" /> <ClCompile Include="..\src\assembly_generator_x86.cpp" />
<ClCompile Include="..\src\blake2_generator.cpp" /> <ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" /> <ClCompile Include="..\src\blake2\blake2b.c" />
@ -168,7 +168,7 @@ SET ERRORLEVEL = 0</Command>
<ClInclude Include="..\src\blake2\blake2.h" /> <ClInclude Include="..\src\blake2\blake2.h" />
<ClInclude Include="..\src\blake2\blamka-round-avx2.h" /> <ClInclude Include="..\src\blake2\blamka-round-avx2.h" />
<ClInclude Include="..\src\blake2\blamka-round-ref.h" /> <ClInclude Include="..\src\blake2\blamka-round-ref.h" />
<ClInclude Include="..\src\blake2\blamka-round-sse3.h" /> <ClInclude Include="..\src\blake2\blamka-round-ssse3.h" />
<ClInclude Include="..\src\blake2\endian.h" /> <ClInclude Include="..\src\blake2\endian.h" />
<ClInclude Include="..\src\blake2_generator.hpp" /> <ClInclude Include="..\src\blake2_generator.hpp" />
<ClInclude Include="..\src\bytecode_machine.hpp" /> <ClInclude Include="..\src\bytecode_machine.hpp" />

View file

@ -81,10 +81,10 @@
<ClCompile Include="..\src\bytecode_machine.cpp"> <ClCompile Include="..\src\bytecode_machine.cpp">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\src\argon2_sse3.c"> <ClCompile Include="..\src\argon2_avx2.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\src\argon2_avx2.c"> <ClCompile Include="..\src\argon2_ssse3.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>
@ -191,10 +191,10 @@
<ClInclude Include="..\src\bytecode_machine.hpp"> <ClInclude Include="..\src\bytecode_machine.hpp">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\blake2\blamka-round-sse3.h"> <ClInclude Include="..\src\blake2\blamka-round-avx2.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\blake2\blamka-round-avx2.h"> <ClInclude Include="..\src\blake2\blamka-round-ssse3.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
</ItemGroup> </ItemGroup>