Automatic detection of CPU capabilities

This commit is contained in:
tevador 2019-10-08 23:09:35 +02:00
parent eda3603bf3
commit e0484dfb99
8 changed files with 232 additions and 28 deletions

View file

@ -34,6 +34,7 @@ src/argon2_ref.c
src/argon2_ssse3.c src/argon2_ssse3.c
src/argon2_avx2.c src/argon2_avx2.c
src/bytecode_machine.cpp src/bytecode_machine.cpp
src/cpu.cpp
src/dataset.cpp src/dataset.cpp
src/soft_aes.cpp src/soft_aes.cpp
src/virtual_memory.cpp src/virtual_memory.cpp
@ -132,6 +133,13 @@ if (ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "arm
# cheat because cmake and ccache hate each other # cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C) set_property(SOURCE src/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
# not sure if this check is needed
include(CheckIncludeFile)
check_include_file(asm/hwcap.h HAVE_HWCAP)
if(HAVE_HWCAP)
add_definitions(-DHAVE_HWCAP)
endif()
if(ARCH STREQUAL "native") if(ARCH STREQUAL "native")
add_flag("-march=native") add_flag("-march=native")
else() else()

72
src/cpu.cpp Normal file
View file

@ -0,0 +1,72 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu.hpp"
#if defined(_M_X64) || defined(__x86_64__)
#define HAVE_CPUID
#ifdef _WIN32
#include <intrin.h>
#define cpuid(info, x) __cpuidex(info, x, 0)
#else //GCC
#include <cpuid.h>
void cpuid(int info[4], int InfoType) {
__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}
#endif
#endif
#if defined(HAVE_HWCAP)
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
namespace randomx {
Cpu::Cpu() : aes_(false), ssse3_(false), avx2_(false) {
#ifdef HAVE_CPUID
int info[4];
cpuid(info, 0);
int nIds = info[0];
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
ssse3_ = (info[2] & (1 << 9)) != 0;
aes_ = (info[2] & (1 << 25)) != 0;
}
if (nIds >= 0x00000007) {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__) && defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#endif
//TODO POWER8 AES
}
}

49
src/cpu.hpp Normal file
View file

@ -0,0 +1,49 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
namespace randomx {
class Cpu {
public:
Cpu();
bool hasAes() const {
return aes_;
}
bool hasSsse3() const {
return ssse3_;
}
bool hasAvx2() const {
return avx2_;
}
private:
bool aes_, ssse3_, avx2_;
};
}

View file

@ -33,11 +33,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "vm_compiled.hpp" #include "vm_compiled.hpp"
#include "vm_compiled_light.hpp" #include "vm_compiled_light.hpp"
#include "blake2/blake2.h" #include "blake2/blake2.h"
#include "cpu.hpp"
#include <cassert> #include <cassert>
#include <limits> #include <limits>
extern "C" { extern "C" {
randomx_flags randomx_get_flags() {
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
randomx::Cpu cpu;
if (cpu.hasAes()) {
flags |= RANDOMX_FLAG_HARD_AES;
}
if (randomx_argon2_impl_avx2() != nullptr && cpu.hasAvx2()) {
flags |= RANDOMX_FLAG_ARGON2_AVX2;
}
else if (randomx_argon2_impl_ssse3() != nullptr && cpu.hasSsse3()) {
flags |= RANDOMX_FLAG_ARGON2_SSSE3;
}
return flags;
}
randomx_cache *randomx_alloc_cache(randomx_flags flags) { randomx_cache *randomx_alloc_cache(randomx_flags flags) {
randomx_cache *cache = nullptr; randomx_cache *cache = nullptr;

View file

@ -54,10 +54,38 @@ typedef struct randomx_dataset randomx_dataset;
typedef struct randomx_cache randomx_cache; typedef struct randomx_cache randomx_cache;
typedef struct randomx_vm randomx_vm; typedef struct randomx_vm randomx_vm;
#if defined(__cplusplus) #if defined(__cplusplus)
#ifdef __cpp_constexpr
#define CONSTEXPR constexpr
#else
#define CONSTEXPR
#endif
inline CONSTEXPR randomx_flags operator |(randomx_flags a, randomx_flags b) {
return static_cast<randomx_flags>(static_cast<int>(a) | static_cast<int>(b));
}
inline CONSTEXPR randomx_flags operator &(randomx_flags a, randomx_flags b) {
return static_cast<randomx_flags>(static_cast<int>(a) & static_cast<int>(b));
}
inline randomx_flags& operator |=(randomx_flags& a, randomx_flags b) {
return a = a | b;
}
extern "C" { extern "C" {
#endif #endif
/**
* @return The recommended flags to be used on the current machine.
* Does not include:
* RANDOMX_FLAG_LARGE_PAGES
* RANDOMX_FLAG_FULL_MEM
* RANDOMX_FLAG_SECURE
* These flags must be added manually if desired.
*/
RANDOMX_EXPORT randomx_flags randomx_get_flags(void);
/** /**
* Creates a randomx_cache structure and allocates memory for RandomX Cache. * Creates a randomx_cache structure and allocates memory for RandomX Cache.
* *

View file

@ -93,6 +93,7 @@ void printUsage(const char* executable) {
std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl; std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl;
std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl; std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl; std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
} }
struct MemoryException : public std::exception { struct MemoryException : public std::exception {
@ -130,7 +131,7 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2; bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2, autoFlags;
int noncesCount, threadCount, initThreadCount; int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity; uint64_t threadAffinity;
int32_t seedValue; int32_t seedValue;
@ -153,6 +154,7 @@ int main(int argc, char** argv) {
readOption("--secure", argc, argv, secure); readOption("--secure", argc, argv, secure);
readOption("--ssse3", argc, argv, ssse3); readOption("--ssse3", argc, argv, ssse3);
readOption("--avx2", argc, argv, avx2); readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
store32(&seed, seedValue); store32(&seed, seedValue);
@ -169,31 +171,56 @@ int main(int argc, char** argv) {
std::vector<std::thread> threads; std::vector<std::thread> threads;
randomx_dataset* dataset; randomx_dataset* dataset;
randomx_cache* cache; randomx_cache* cache;
randomx_flags flags = RANDOMX_FLAG_DEFAULT; randomx_flags flags;
if (autoFlags) {
initThreadCount = std::thread::hardware_concurrency();
flags = randomx_get_flags();
}
else {
flags = RANDOMX_FLAG_DEFAULT;
if (ssse3) { if (ssse3) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_ARGON2_SSSE3); flags |= RANDOMX_FLAG_ARGON2_SSSE3;
}
if (avx2) {
flags |= RANDOMX_FLAG_ARGON2_AVX2;
}
if (!softAes) {
flags |= RANDOMX_FLAG_HARD_AES;
}
if (jit) {
flags |= RANDOMX_FLAG_JIT;
}
}
if (largePages) {
flags |= RANDOMX_FLAG_LARGE_PAGES;
}
if (miningMode) {
flags |= RANDOMX_FLAG_FULL_MEM;
}
if (secure) {
flags |= RANDOMX_FLAG_SECURE;
}
if (flags & RANDOMX_FLAG_ARGON2_SSSE3) {
std::cout << " - Argon2 implementation: SSSE3" << std::endl; std::cout << " - Argon2 implementation: SSSE3" << std::endl;
} }
if (avx2) { if (flags & RANDOMX_FLAG_ARGON2_AVX2) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_ARGON2_AVX2);
std::cout << " - Argon2 implementation: AVX2" << std::endl; std::cout << " - Argon2 implementation: AVX2" << std::endl;
} }
if (miningMode) { if (flags & RANDOMX_FLAG_FULL_MEM) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM);
std::cout << " - full memory mode (2080 MiB)" << std::endl; std::cout << " - full memory mode (2080 MiB)" << std::endl;
} }
else { else {
std::cout << " - light memory mode (256 MiB)" << std::endl; std::cout << " - light memory mode (256 MiB)" << std::endl;
} }
if (jit) { if (flags & RANDOMX_FLAG_JIT) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT);
std::cout << " - JIT compiled mode "; std::cout << " - JIT compiled mode ";
if (secure) { if (flags & RANDOMX_FLAG_SECURE) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_SECURE);
std::cout << "(secure)"; std::cout << "(secure)";
} }
std::cout << std::endl; std::cout << std::endl;
@ -202,16 +229,14 @@ int main(int argc, char** argv) {
std::cout << " - interpreted mode" << std::endl; std::cout << " - interpreted mode" << std::endl;
} }
if (softAes) { if (flags & RANDOMX_FLAG_HARD_AES) {
std::cout << " - software AES mode" << std::endl;
}
else {
flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES);
std::cout << " - hardware AES mode" << std::endl; std::cout << " - hardware AES mode" << std::endl;
} }
else {
std::cout << " - software AES mode" << std::endl;
}
if (largePages) { if (flags & RANDOMX_FLAG_LARGE_PAGES) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES);
std::cout << " - large pages mode" << std::endl; std::cout << " - large pages mode" << std::endl;
} }
else { else {
@ -229,10 +254,10 @@ int main(int argc, char** argv) {
try { try {
randomx::selectArgonImpl(flags); //just to check if flags are valid randomx::selectArgonImpl(flags); //just to check if flags are valid
if (jit && !RANDOMX_HAVE_COMPILER) { if ((flags & RANDOMX_FLAG_JIT) && !RANDOMX_HAVE_COMPILER) {
throw std::runtime_error("JIT compilation is not supported on this platform. Try without --jit"); throw std::runtime_error("JIT compilation is not supported on this platform. Try without --jit");
} }
if (!jit && RANDOMX_HAVE_COMPILER) { if (!(flags & RANDOMX_FLAG_JIT) && RANDOMX_HAVE_COMPILER) {
std::cout << "WARNING: You are using the interpreter mode. Use --jit for optimal performance." << std::endl; std::cout << "WARNING: You are using the interpreter mode. Use --jit for optimal performance." << std::endl;
} }
@ -273,7 +298,7 @@ int main(int argc, char** argv) {
for (int i = 0; i < threadCount; ++i) { for (int i = 0; i < threadCount; ++i) {
randomx_vm *vm = randomx_create_vm(flags, cache, dataset); randomx_vm *vm = randomx_create_vm(flags, cache, dataset);
if (vm == nullptr) { if (vm == nullptr) {
if (!softAes) { if ((flags & RANDOMX_FLAG_HARD_AES)) {
throw std::runtime_error("Cannot create VM with the selected options. Try using --softAes"); throw std::runtime_error("Cannot create VM with the selected options. Try using --softAes");
} }
if (largePages) { if (largePages) {
@ -290,9 +315,6 @@ int main(int argc, char** argv) {
int cpuid = -1; int cpuid = -1;
if (threadAffinity) if (threadAffinity)
cpuid = cpuid_from_mask(threadAffinity, i); cpuid = cpuid_from_mask(threadAffinity, i);
if (softAes)
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
else
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid)); threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
} }
for (unsigned i = 0; i < threads.size(); ++i) { for (unsigned i = 0; i < threads.size(); ++i) {

View file

@ -114,7 +114,6 @@
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AssemblerOutput>AssemblyCode</AssemblerOutput> <AssemblerOutput>AssemblyCode</AssemblerOutput>
<PreprocessorDefinitions>_MBCS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>_MBCS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile> </ClCompile>
<Link> <Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -132,7 +131,9 @@ SET ERRORLEVEL = 0</Command>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\src\allocator.cpp" /> <ClCompile Include="..\src\allocator.cpp" />
<ClCompile Include="..\src\argon2_avx2.c" /> <ClCompile Include="..\src\argon2_avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<ClCompile Include="..\src\argon2_core.c" /> <ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" /> <ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\argon2_ssse3.c" /> <ClCompile Include="..\src\argon2_ssse3.c" />
@ -140,6 +141,7 @@ SET ERRORLEVEL = 0</Command>
<ClCompile Include="..\src\blake2_generator.cpp" /> <ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" /> <ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\bytecode_machine.cpp" /> <ClCompile Include="..\src\bytecode_machine.cpp" />
<ClCompile Include="..\src\cpu.cpp" />
<ClCompile Include="..\src\vm_compiled_light.cpp" /> <ClCompile Include="..\src\vm_compiled_light.cpp" />
<ClCompile Include="..\src\vm_compiled.cpp" /> <ClCompile Include="..\src\vm_compiled.cpp" />
<ClCompile Include="..\src\dataset.cpp" /> <ClCompile Include="..\src\dataset.cpp" />
@ -173,6 +175,7 @@ SET ERRORLEVEL = 0</Command>
<ClInclude Include="..\src\blake2_generator.hpp" /> <ClInclude Include="..\src\blake2_generator.hpp" />
<ClInclude Include="..\src\bytecode_machine.hpp" /> <ClInclude Include="..\src\bytecode_machine.hpp" />
<ClInclude Include="..\src\common.hpp" /> <ClInclude Include="..\src\common.hpp" />
<ClInclude Include="..\src\cpu.hpp" />
<ClInclude Include="..\src\jit_compiler.hpp" /> <ClInclude Include="..\src\jit_compiler.hpp" />
<ClInclude Include="..\src\jit_compiler_a64.hpp" /> <ClInclude Include="..\src\jit_compiler_a64.hpp" />
<ClInclude Include="..\src\jit_compiler_fallback.hpp" /> <ClInclude Include="..\src\jit_compiler_fallback.hpp" />

View file

@ -87,6 +87,9 @@
<ClCompile Include="..\src\argon2_ssse3.c"> <ClCompile Include="..\src\argon2_ssse3.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\src\cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\src\argon2.h"> <ClInclude Include="..\src\argon2.h">
@ -197,6 +200,9 @@
<ClInclude Include="..\src\blake2\blamka-round-ssse3.h"> <ClInclude Include="..\src\blake2\blamka-round-ssse3.h">
<Filter>Header Files</Filter> <Filter>Header Files</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\cpu.hpp">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<MASM Include="..\src\jit_compiler_x86_static.asm"> <MASM Include="..\src\jit_compiler_x86_static.asm">