Compare commits

...

22 Commits

Author SHA1 Message Date
fuwa 89b7c02bba
rx-wow-fix-3: Revert "Increase the frequency of CBRANCH (#118)"
This reverts commit 22689eda49.
2020-07-13 08:55:05 +03:00
fuwa d2416d6157
rx-wow-fix-2: Revert "Decrease the frequency of FADD/FSUB in favor of FMUL (#77)"
This reverts commit 91cd35ff13.
2020-07-13 08:54:49 +03:00
fuwa b4ada42599
rx-wow-fix-1: AesGenerator4R 2020-07-13 08:54:35 +03:00
wowario 597b1e9c91
RandomWOW parameters 2020-07-13 08:54:21 +03:00
tevador 5ce5f4906c add --noBatch benchmark option 2020-07-04 14:57:56 +02:00
tevador 9905ec9c5a
Merge pull request #188 from cryptonote-social/master
replace hardcoded literal with its appropriate symbol
2020-06-28 16:36:40 +02:00
tevador 863765bbe6
Merge pull request #185 from tevador/pr-crosscomp
Fix windows-target cross-compilation
2020-06-28 16:36:12 +02:00
tevador a1c08a2f41
Merge pull request #187 from tevador/pr-netbsd
Fix compilation and JIT support on NetBSD

1. Disable hugepages (not supported).
2. Force W^X (required).
3. When allocating JIT memory, PROT_EXEC must be reserved
   in order to set the pages executable later.
2020-06-28 16:35:19 +02:00
tevador 708a4e50c5 Fix compilation and JIT support on NetBSD:
1. Disable hugepages (not supported).
2. Force W^X (required).
3. When allocating JIT memory, PROT_EXEC must be reserved
   in order to set the pages executable later.
2020-06-28 16:16:20 +02:00
tevador 6a4afc721f
Merge pull request #189 from tevador/pr-set-cache
Fix potential use-after-free when reallocating cache
2020-06-27 20:42:15 +02:00
tevador 32ab5dea54 fix potential use-after-free when reallocating cache 2020-06-27 20:21:06 +02:00
cryptonote-social a7733de1e7 replace hardcoded literal with its appropriate symbol 2020-06-27 09:53:46 -07:00
tevador bece0a7206 fix #184 2020-06-09 19:10:56 +02:00
tevador 7741eb1e97
Merge pull request #182 from tevador/pr-restore-fpstate
Preserve floating point state when calling randomx_calculate_hash
2020-05-16 23:19:37 +02:00
tevador 148b923f71 fix test 92 not failing properly on GCC/amd64 2020-05-06 13:48:53 +02:00
tevador 6a764e90d0 Preserve floating point state when calling randomx_calculate_hash 2020-05-06 12:42:30 +02:00
tevador ac574e3743
Merge pull request #179 from tevador/pr-hash-batch
Add a missing function to calculate a batch of hashes
2020-02-07 19:33:36 +01:00
tevador 01381ccef3 Add a missing function to calculate a batch of hashes
Add a test for batch calculation
2020-02-06 18:14:38 +01:00
tevador 913e495c53 Merge branch 'master' of git@github.com:tevador/RandomX.git 2020-02-06 18:13:52 +01:00
tevador 72ac5e49b6 Update dll project 2019-12-29 19:14:00 +01:00
tevador bbbb34757b
Add a note about building portable binaries 2019-12-26 12:32:04 +01:00
tevador a223b6b33b
Fixed an incorrect URL the the documentation 2019-12-18 12:30:49 +01:00
19 changed files with 191 additions and 71 deletions

View File

@ -48,6 +48,8 @@ cmake -DARCH=native ..
make
```
To build portable binaries, omit the `ARCH` option when executing cmake.
### Windows
On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio (solution file is provided).

View File

@ -255,7 +255,7 @@ The Scratchpad is split into 3 levels to mimic the typical CPU cache hierarchy [
|----------------|----------|----------|----------|------|
ARM Cortex A55|2|6|-|[[24](https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4)]
|AMD Zen+|4|12|40|[[25](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]|
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy)]
The L3 cache is much larger and located further from the CPU core. As a result, its access latencies are much higher and can cause stalls in program execution.
@ -638,7 +638,7 @@ state3 = 00000000000000000000000000000000
[25] AMD Zen+ Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy
[27] Biryukov et al.: Fast and Tradeoff-Resilient Memory-Hard Functions for
Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Table 2, page 8
@ -647,4 +647,4 @@ Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Tab
[29] 7-Zip File archiver - https://www.7-zip.org/
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html

View File

@ -175,10 +175,10 @@ template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
#define AES_GEN_4R_KEY0 0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d
#define AES_GEN_4R_KEY1 0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450
#define AES_GEN_4R_KEY2 0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4
#define AES_GEN_4R_KEY3 0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
@ -197,10 +197,6 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
@ -210,23 +206,23 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key0);
state2 = aesdec<softAes>(state2, key4);
state3 = aesenc<softAes>(state3, key4);
state2 = aesdec<softAes>(state2, key0);
state3 = aesenc<softAes>(state3, key0);
state0 = aesdec<softAes>(state0, key1);
state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key5);
state3 = aesenc<softAes>(state3, key5);
state2 = aesdec<softAes>(state2, key1);
state3 = aesenc<softAes>(state3, key1);
state0 = aesdec<softAes>(state0, key2);
state1 = aesenc<softAes>(state1, key2);
state2 = aesdec<softAes>(state2, key6);
state3 = aesenc<softAes>(state3, key6);
state2 = aesdec<softAes>(state2, key2);
state3 = aesenc<softAes>(state3, key2);
state0 = aesdec<softAes>(state0, key3);
state1 = aesenc<softAes>(state1, key3);
state2 = aesdec<softAes>(state2, key7);
state3 = aesenc<softAes>(state3, key7);
state2 = aesdec<softAes>(state2, key3);
state3 = aesenc<softAes>(state3, key3);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);

View File

@ -15,7 +15,7 @@ RANDOMX_SCRATCHPAD_L2 EQU 262144t
RANDOMX_SCRATCHPAD_L1 EQU 16384t
RANDOMX_JUMP_BITS EQU 8t
RANDOMX_JUMP_OFFSET EQU 8t
RANDOMX_FREQ_IADD_RS EQU 16t
RANDOMX_FREQ_IADD_RS EQU 25t
RANDOMX_FREQ_IADD_M EQU 7t
RANDOMX_FREQ_ISUB_R EQU 16t
RANDOMX_FREQ_ISUB_M EQU 7t
@ -29,19 +29,19 @@ RANDOMX_FREQ_IMUL_RCP EQU 8t
RANDOMX_FREQ_INEG_R EQU 2t
RANDOMX_FREQ_IXOR_R EQU 15t
RANDOMX_FREQ_IXOR_M EQU 5t
RANDOMX_FREQ_IROR_R EQU 8t
RANDOMX_FREQ_IROL_R EQU 2t
RANDOMX_FREQ_IROR_R EQU 10t
RANDOMX_FREQ_IROL_R EQU 0t
RANDOMX_FREQ_ISWAP_R EQU 4t
RANDOMX_FREQ_FSWAP_R EQU 4t
RANDOMX_FREQ_FADD_R EQU 16t
RANDOMX_FREQ_FSWAP_R EQU 8t
RANDOMX_FREQ_FADD_R EQU 20t
RANDOMX_FREQ_FADD_M EQU 5t
RANDOMX_FREQ_FSUB_R EQU 16t
RANDOMX_FREQ_FSUB_R EQU 20t
RANDOMX_FREQ_FSUB_M EQU 5t
RANDOMX_FREQ_FSCAL_R EQU 6t
RANDOMX_FREQ_FMUL_R EQU 32t
RANDOMX_FREQ_FMUL_R EQU 20t
RANDOMX_FREQ_FDIV_M EQU 4t
RANDOMX_FREQ_FSQRT_R EQU 6t
RANDOMX_FREQ_CBRANCH EQU 25t
RANDOMX_FREQ_CBRANCH EQU 16t
RANDOMX_FREQ_CFROUND EQU 1t
RANDOMX_FREQ_ISTORE EQU 16t
RANDOMX_FREQ_NOP EQU 0t

View File

@ -67,7 +67,7 @@ namespace randomx {
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;

View File

@ -1,5 +1,6 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
Copyright (c) 2019, Wownero Inc., a Monero Enterprise Alliance partner company
All rights reserved.
@ -38,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_ARGON_LANES 1
//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomX\x03"
#define RANDOMX_ARGON_SALT "RandomWOW\x01"
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
@ -56,16 +57,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_PROGRAM_SIZE 256
//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 2048
#define RANDOMX_PROGRAM_ITERATIONS 1024
//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 8
#define RANDOMX_PROGRAM_COUNT 16
//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_L3 1048576
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144
#define RANDOMX_SCRATCHPAD_L2 131072
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384
@ -82,7 +83,7 @@ Total sum of frequencies must be 256
*/
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_RS 25
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
@ -96,23 +97,23 @@ Total sum of frequencies must be 256
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_IROR_R 10
#define RANDOMX_FREQ_IROL_R 0
#define RANDOMX_FREQ_ISWAP_R 4
//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FSWAP_R 8
#define RANDOMX_FREQ_FADD_R 20
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_R 20
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FMUL_R 20
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CBRANCH 16
#define RANDOMX_FREQ_CFROUND 1
//Store instruction

View File

@ -157,6 +157,21 @@ void rx_set_rounding_mode(uint32_t mode) {
}
}
uint32_t rx_get_rounding_mode() {
switch (fegetround()) {
case FE_DOWNWARD:
return RoundDown;
case FE_UPWARD:
return RoundUp;
case FE_TOWARDZERO:
return RoundToZero;
case FE_TONEAREST:
return RoundToNearest;
default:
UNREACHABLE;
}
}
#endif
#ifdef RANDOMX_USE_X87

View File

@ -173,6 +173,10 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
_mm_setcsr(rx_mxcsr_default | (mode << 13));
}
FORCE_INLINE uint32_t rx_get_rounding_mode() {
return (_mm_getcsr() >> 13) & 3;
}
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
#include <cstdint>
#include <stdexcept>
@ -736,6 +740,8 @@ void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
uint32_t rx_get_rounding_mode();
#endif
double loadDoublePortable(const void* addr);

View File

@ -35,3 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#include "jit_compiler_fallback.hpp"
#endif
#if defined(__OpenBSD__) || defined(__NetBSD__)
#define RANDOMX_FORCE_SECURE
#endif

View File

@ -295,7 +295,7 @@ namespace randomx {
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}

View File

@ -36,13 +36,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu.hpp"
#include <cassert>
#include <limits>
#include <cfenv>
extern "C" {
randomx_flags randomx_get_flags() {
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
randomx::Cpu cpu;
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
if (flags == RANDOMX_FLAG_JIT) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -328,7 +329,7 @@ extern "C" {
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
if (machine->cacheKey != cache->cacheKey) {
if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) {
machine->setCache(cache);
machine->cacheKey = cache->cacheKey;
}
@ -349,6 +350,8 @@ extern "C" {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
fenv_t fpstate;
fegetenv(&fpstate);
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
@ -361,6 +364,7 @@ extern "C" {
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
fesetenv(&fpstate);
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
@ -380,4 +384,14 @@ extern "C" {
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
}
void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
machine->resetRoundingMode();
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
machine->run(machine->tempHash);
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
}

View File

@ -240,9 +240,13 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
/**
* Paired functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first is called for the first input value.
* randomx_calculate_hash_next will output the hash value of the previous input.
* Set of functions used to calculate multiple RandomX hashes more efficiently.
* randomx_calculate_hash_first will begin a hash calculation.
* randomx_calculate_hash_next will output the hash value of the previous input
* and begin the calculation of the next hash.
* randomx_calculate_hash_last will output the hash value of the previous input.
*
* WARNING: These functions may alter the floating point rounding mode of the calling thread.
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param input is a pointer to memory to be hashed. Must not be NULL.
@ -254,6 +258,7 @@ RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *inpu
*/
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
#if defined(__cplusplus)
}

View File

@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread,
(thread_policy_t)&policy, 1);
#elif defined(_WIN32) || defined(__CYGWIN__)
rc = SetThreadAffinityMask(reinterpret_cast<HANDLE>(thread), 1ULL << cpuid) == 0 ? -2 : 0;
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__)
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && !defined(__NetBSD__)
cpu_set_t cs;
CPU_ZERO(&cs);
CPU_SET(cpuid, &cs);

View File

@ -40,9 +40,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "../dataset.hpp"
#include "../blake2/endian.h"
#include "../common.hpp"
#include "../jit_compiler.hpp"
#ifdef _WIN32
#include <windows.h>
#include <VersionHelpers.h>
#include <versionhelpers.h>
#endif
#include "affinity.hpp"
@ -94,6 +95,7 @@ void printUsage(const char* executable) {
std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
}
struct MemoryException : public std::exception {
@ -109,11 +111,14 @@ struct DatasetAllocException : public MemoryException {
}
};
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid=-1) {
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
template<bool batch>
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
if (cpuid >= 0) {
int rc = set_thread_affinity(cpuid);
if (rc) {
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
}
}
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
@ -122,19 +127,27 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
void* noncePtr = blockTemplate + 39;
auto nonce = atomicNonce.fetch_add(1);
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
if (batch) {
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
}
while (nonce < noncesCount) {
nonce = atomicNonce.fetch_add(1);
if (batch) {
nonce = atomicNonce.fetch_add(1);
}
store32(noncePtr, nonce);
randomx_calculate_hash_next(vm, blockTemplate, sizeof(blockTemplate), &hash);
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
if (!batch) {
nonce = atomicNonce.fetch_add(1);
}
}
}
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2, autoFlags;
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
bool ssse3, avx2, autoFlags, noBatch;
int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity;
int32_t seedValue;
@ -158,10 +171,11 @@ int main(int argc, char** argv) {
readOption("--ssse3", argc, argv, ssse3);
readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
readOption("--noBatch", argc, argv, noBatch);
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.7" << std::endl;
std::cout << "RandomX benchmark v1.1.8" << std::endl;
if (help) {
printUsage(argv[0]);
@ -199,7 +213,7 @@ int main(int argc, char** argv) {
}
if (jit) {
flags |= RANDOMX_FLAG_JIT;
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
flags |= RANDOMX_FLAG_SECURE;
#endif
}
@ -211,7 +225,7 @@ int main(int argc, char** argv) {
if (miningMode) {
flags |= RANDOMX_FLAG_FULL_MEM;
}
#ifndef __OpenBSD__
#ifndef RANDOMX_FORCE_SECURE
if (secure) {
flags |= RANDOMX_FLAG_SECURE;
}
@ -263,6 +277,16 @@ int main(int argc, char** argv) {
std::cout << " - thread affinity (" << mask_to_string(threadAffinity) << ")" << std::endl;
}
MineFunc* func;
if (noBatch) {
func = &mine<false>;
}
else {
func = &mine<true>;
std::cout << " - batch mode" << std::endl;
}
std::cout << "Initializing";
if (miningMode)
std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")");
@ -333,14 +357,14 @@ int main(int argc, char** argv) {
int cpuid = -1;
if (threadAffinity)
cpuid = cpuid_from_mask(threadAffinity, i);
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
threads.push_back(std::thread(func, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
else {
mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0);
func(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0, -1);
}
double elapsed = sw.getElapsed();

View File

@ -143,7 +143,7 @@ int main() {
randomx::JitCompiler jit;
jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
jit.generateDatasetInitCode();
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
jit.enableExecution();
#else
jit.enableAll();
@ -954,7 +954,7 @@ int main() {
assert(ibc.memMask == randomx::ScratchpadL3Mask);
});
#ifdef __OpenBSD__
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
@ -1009,10 +1009,10 @@ int main() {
vm = nullptr;
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
initCache("test key 000");
#ifdef __OpenBSD__
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_JIT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr);
#endif
}
@ -1026,9 +1026,6 @@ int main() {
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
randomx_destroy_vm(vm);
vm = nullptr;
auto flags = randomx_get_flags();
randomx_release_cache(cache);
@ -1054,6 +1051,40 @@ int main() {
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
});
if (cache != nullptr)
randomx_release_cache(cache);
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash1[RANDOMX_HASH_SIZE];
char hash2[RANDOMX_HASH_SIZE];
char hash3[RANDOMX_HASH_SIZE];
initCache("test key 000");
char input1[] = "This is a test";
char input2[] = "Lorem ipsum dolor sit amet";
char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1);
randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1);
randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2);
randomx_calculate_hash_last(vm, &hash3);
assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"));
assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"));
});
runTest("Preserve rounding mode", RANDOMX_FREQ_CFROUND > 0, []() {
rx_set_rounding_mode(RoundToNearest);
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
assert(rx_get_rounding_mode() == RoundToNearest);
});
randomx_destroy_vm(vm);
vm = nullptr;
if (cache != nullptr)
randomx_release_cache(cache);

View File

@ -54,6 +54,9 @@ public:
{
return program;
}
const uint8_t* getMemory() const {
return mem.memory;
}
protected:
void initialize();
alignas(64) randomx::Program program;

View File

@ -94,7 +94,12 @@ void* allocMemoryPages(std::size_t bytes) {
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
#else
mem = mmap(nullptr, bytes, PAGE_READWRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#if defined(__NetBSD__)
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#endif
@ -141,7 +146,7 @@ void* allocLargePagesMemory(std::size_t bytes) {
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
#elif defined(__OpenBSD__)
#elif defined(__OpenBSD__) || defined(__NetBSD__)
mem = MAP_FAILED; // OpenBSD does not support huge pages
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);

View File

@ -54,12 +54,17 @@
<ItemGroup>
<ClCompile Include="..\src\aes_hash.cpp" />
<ClCompile Include="..\src\allocator.cpp" />
<ClCompile Include="..\src\argon2_avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\argon2_ssse3.c" />
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\blake2_generator.cpp" />
<ClCompile Include="..\src\bytecode_machine.cpp" />
<ClCompile Include="..\src\cpu.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
<ClCompile Include="..\src\instruction.cpp" />
<ClCompile Include="..\src\instructions_portable.cpp" />

View File

@ -172,5 +172,14 @@
<ClCompile Include="..\src\bytecode_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_avx2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\argon2_ssse3.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>