Compare commits
44 Commits
Author | SHA1 | Date |
---|---|---|
fuwa | 89b7c02bba | |
fuwa | d2416d6157 | |
fuwa | b4ada42599 | |
wowario | 597b1e9c91 | |
tevador | 5ce5f4906c | |
tevador | 9905ec9c5a | |
tevador | 863765bbe6 | |
tevador | a1c08a2f41 | |
tevador | 708a4e50c5 | |
tevador | 6a4afc721f | |
tevador | 32ab5dea54 | |
cryptonote-social | a7733de1e7 | |
tevador | bece0a7206 | |
tevador | 7741eb1e97 | |
tevador | 148b923f71 | |
tevador | 6a764e90d0 | |
tevador | ac574e3743 | |
tevador | 01381ccef3 | |
tevador | 913e495c53 | |
tevador | 72ac5e49b6 | |
tevador | bbbb34757b | |
tevador | a223b6b33b | |
tevador | 30969c0e25 | |
tevador | 6e842d22bb | |
tevador | aa19c5b9b6 | |
tevador | 70d4b0f2f1 | |
tevador | f872ce0b94 | |
tevador | 3910d49b49 | |
SChernykh | 219c02e1e5 | |
tevador | 6235852e00 | |
tevador | e3561d661e | |
tevador | 65fae68287 | |
tevador | fd96d3df22 | |
Vladimir | 01914b49cd | |
tevador | 7e20c8e56e | |
tevador | 7646cfede6 | |
tevador | 88268ae325 | |
tevador | 57545d1c53 | |
tevador | 79c53ae9b0 | |
tevador | cb299e5a25 | |
tevador | 4381ec3c89 | |
tevador | 5e53ed9409 | |
Bertrand Jacquin | 66c039030f | |
SChernykh | e43267fa86 |
|
@ -0,0 +1,3 @@
|
|||
.gitignore export-ignore
|
||||
.gitattributes export-ignore
|
||||
audits export-ignore
|
|
@ -28,7 +28,9 @@
|
|||
|
||||
cmake_minimum_required(VERSION 2.8.7)
|
||||
|
||||
set (randomx_sources
|
||||
project(RandomX)
|
||||
|
||||
set(randomx_sources
|
||||
src/aes_hash.cpp
|
||||
src/argon2_ref.c
|
||||
src/argon2_ssse3.c
|
||||
|
@ -94,32 +96,50 @@ function(add_flag flag)
|
|||
endfunction()
|
||||
|
||||
# x86-64
|
||||
if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
|
||||
if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
|
||||
list(APPEND randomx_sources
|
||||
src/jit_compiler_x86_static.S
|
||||
src/jit_compiler_x86.cpp)
|
||||
# cheat because cmake and ccache hate each other
|
||||
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
|
||||
|
||||
if(ARCH STREQUAL "native")
|
||||
add_flag("-march=native")
|
||||
if(MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
list(APPEND randomx_sources src/jit_compiler_x86_static.asm)
|
||||
|
||||
set_property(SOURCE src/jit_compiler_x86_static.asm PROPERTY LANGUAGE ASM_MASM)
|
||||
|
||||
set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS /arch:AVX2)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/asm/configuration.asm
|
||||
COMMAND powershell -ExecutionPolicy Bypass -File h2inc.ps1 ..\\src\\configuration.h > ..\\src\\asm\\configuration.asm SET ERRORLEVEL = 0
|
||||
COMMENT "Generating configuration.asm at ${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/vcxproj)
|
||||
add_custom_target(generate-asm
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/asm/configuration.asm)
|
||||
else()
|
||||
# default build has hardware AES enabled (software AES can be selected at runtime)
|
||||
add_flag("-maes")
|
||||
check_c_compiler_flag(-mssse3 HAVE_SSSE3)
|
||||
if(HAVE_SSSE3)
|
||||
set_source_files_properties(src/argon2_ssse3.c COMPILE_FLAGS -mssse3)
|
||||
endif()
|
||||
check_c_compiler_flag(-mavx2 HAVE_AVX2)
|
||||
if(HAVE_AVX2)
|
||||
set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS -mavx2)
|
||||
list(APPEND randomx_sources src/jit_compiler_x86_static.S)
|
||||
|
||||
# cheat because cmake and ccache hate each other
|
||||
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE src/jit_compiler_x86_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
|
||||
|
||||
if(ARCH STREQUAL "native")
|
||||
add_flag("-march=native")
|
||||
else()
|
||||
# default build has hardware AES enabled (software AES can be selected at runtime)
|
||||
add_flag("-maes")
|
||||
check_c_compiler_flag(-mssse3 HAVE_SSSE3)
|
||||
if(HAVE_SSSE3)
|
||||
set_source_files_properties(src/argon2_ssse3.c COMPILE_FLAGS -mssse3)
|
||||
endif()
|
||||
check_c_compiler_flag(-mavx2 HAVE_AVX2)
|
||||
if(HAVE_AVX2)
|
||||
set_source_files_properties(src/argon2_avx2.c COMPILE_FLAGS -mavx2)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# PowerPC
|
||||
if (ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
|
||||
if(ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
|
||||
if(ARCH STREQUAL "native")
|
||||
add_flag("-mcpu=native")
|
||||
endif()
|
||||
|
@ -127,7 +147,7 @@ if (ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
|
|||
endif()
|
||||
|
||||
# ARMv8
|
||||
if (ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a")
|
||||
if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv8-a")
|
||||
list(APPEND randomx_sources
|
||||
src/jit_compiler_a64_static.S
|
||||
src/jit_compiler_a64.cpp)
|
||||
|
@ -152,14 +172,22 @@ endif()
|
|||
|
||||
set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path")
|
||||
|
||||
add_library(randomx
|
||||
${randomx_sources})
|
||||
add_library(randomx ${randomx_sources})
|
||||
|
||||
if(TARGET generate-asm)
|
||||
add_dependencies(randomx generate-asm)
|
||||
endif()
|
||||
|
||||
set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET randomx PROPERTY CXX_STANDARD 11)
|
||||
set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON)
|
||||
set_property(TARGET randomx PROPERTY PUBLIC_HEADER src/randomx.h)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
install(TARGETS randomx
|
||||
ARCHIVE DESTINATION lib
|
||||
LIBRARY DESTINATION lib)
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
add_executable(randomx-tests
|
||||
src/tests/tests.cpp)
|
||||
|
@ -176,7 +204,7 @@ target_link_libraries(randomx-codegen
|
|||
set_property(TARGET randomx-codegen PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
set_property(TARGET randomx-codegen PROPERTY CXX_STANDARD 11)
|
||||
|
||||
if (NOT Threads_FOUND AND UNIX AND NOT APPLE)
|
||||
if(NOT Threads_FOUND AND UNIX AND NOT APPLE)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads)
|
||||
endif()
|
||||
|
|
13
README.md
13
README.md
|
@ -48,6 +48,8 @@ cmake -DARCH=native ..
|
|||
make
|
||||
```
|
||||
|
||||
To build portable binaries, omit the `ARCH` option when executing cmake.
|
||||
|
||||
### Windows
|
||||
|
||||
On Windows, it is possible to build using MinGW (same procedure as on Linux) or using Visual Studio (solution file is provided).
|
||||
|
@ -63,8 +65,12 @@ RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmo
|
|||
* The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`.
|
||||
* The input `H` is the standard hashing blob with a selected nonce value.
|
||||
|
||||
RandomX was successfully activated on the Monero network on the 30th November 2019.
|
||||
|
||||
If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md).
|
||||
|
||||
**Note**: To achieve ASIC resistance, the key `K` must change and must not be miner-selectable. We recommend to use blockchain data as the key in a similar way to the Monero example above. If blockchain data cannot be used for some reason, use a predefined sequence of keys.
|
||||
|
||||
### CPU performance
|
||||
The table below lists the performance of selected CPUs using the optimal number of threads (T) and large pages (if possible), in hashes per second (H/s). "CNv4" refers to the CryptoNight variant 4 (CN/R) hashrate measured using [XMRig](https://github.com/xmrig/xmrig) v2.14.1. "Fast mode" and "Light mode" are the two modes of RandomX.
|
||||
|
||||
|
@ -106,7 +112,12 @@ Most Intel and AMD CPUs made since 2011 should be fairly efficient at RandomX. M
|
|||
* DDR4 memory is limited to about 4000-6000 H/s per channel (depending on frequency and timings)
|
||||
|
||||
### Does RandomX facilitate botnets/malware mining or web mining?
|
||||
Efficient mining requires more than 2 GiB of memory, which is difficult to hide in an infected computer and disqualifies many low-end machines such as IoT devices. Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
|
||||
|
||||
Due to the way the algorithm works, mining malware is much easier to detect. [RandomX Sniffer](https://github.com/tevador/randomx-sniffer) is a proof of concept tool that can detect illicit mining activity on Windows.
|
||||
|
||||
Efficient mining requires more than 2 GiB of memory, which also disqualifies many low-end machines such as IoT devices, which are often parts of large botnets.
|
||||
|
||||
Web mining is infeasible due to the large memory requirement and the lack of directed rounding support for floating point operations in both Javascript and WebAssembly.
|
||||
|
||||
### Since RandomX uses floating point math, does it give reproducible results on different platforms?
|
||||
|
||||
|
|
|
@ -255,7 +255,7 @@ The Scratchpad is split into 3 levels to mimic the typical CPU cache hierarchy [
|
|||
|----------------|----------|----------|----------|------|
|
||||
ARM Cortex A55|2|6|-|[[24](https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/4)]
|
||||
|AMD Zen+|4|12|40|[[25](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]|
|
||||
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy)]
|
||||
|Intel Skylake|4|12|42|[[26](https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy)]
|
||||
|
||||
The L3 cache is much larger and located further from the CPU core. As a result, its access latencies are much higher and can cause stalls in program execution.
|
||||
|
||||
|
@ -638,7 +638,7 @@ state3 = 00000000000000000000000000000000
|
|||
|
||||
[25] AMD Zen+ Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
|
||||
|
||||
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/amd/microarchitectures/zen%2B#Memory_Hierarchy
|
||||
[26] Intel Skylake Microarchitecture - https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client)#Memory_Hierarchy
|
||||
|
||||
[27] Biryukov et al.: Fast and Tradeoff-Resilient Memory-Hard Functions for
|
||||
Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Table 2, page 8
|
||||
|
@ -647,4 +647,4 @@ Cryptocurrencies and Password Hashing - https://eprint.iacr.org/2015/430.pdf Tab
|
|||
|
||||
[29] 7-Zip File archiver - https://www.7-zip.org/
|
||||
|
||||
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html
|
||||
[30] TestU01 library - http://simul.iro.umontreal.ca/testu01/tu01.html
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
mDMEXd+PeBYJKwYBBAHaRw8BAQdAZ0nqJ+nRYoScG2QLX62pl+WO1+Mkv6Yyt2Kb
|
||||
ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+FiEEMoWj
|
||||
LVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwMFCQWnqDgFCwkIBwIGFQoJCAsCBBYC
|
||||
AwECHgECF4AACgkQWijIaue9c6YBFQD+N1XTUqSCZp9jB/yTHQ9ahSaIUMtmuvdT
|
||||
So2s+quudP4A/R5wLwukpfGN9UZ4cfpmKCJ9jO1HJ2udmlGMsJbQpDAIuDgEXd+P
|
||||
eBIKKwYBBAGXVQEFAQEHQBNbQuPcDojMCkRb5B5u7Ld/AFLClOh+6ElL+u61rIY/
|
||||
AwEIB4h+BBgWCAAmFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwwFCQWn
|
||||
qDgACgkQWijIaue9c6YJvgD+IY1Q9mCM1P1iZIoXuafRihXJ7UgVXpQqW2yoaUT3
|
||||
bfQA/RkisI2eElYoOjdwPszPP6VfL5+SViwDmDuJG2P5llgE
|
||||
=V4vd
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
109
src/aes_hash.cpp
109
src/aes_hash.cpp
|
@ -175,10 +175,10 @@ template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
|||
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
|
||||
//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
|
||||
|
||||
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
|
||||
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
|
||||
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
|
||||
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
|
||||
#define AES_GEN_4R_KEY0 0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d
|
||||
#define AES_GEN_4R_KEY1 0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450
|
||||
#define AES_GEN_4R_KEY2 0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4
|
||||
#define AES_GEN_4R_KEY3 0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a
|
||||
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
|
||||
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
|
||||
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
|
||||
|
@ -197,10 +197,6 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
|
||||
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
|
||||
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
|
||||
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
|
||||
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
|
||||
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
|
||||
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
|
||||
|
||||
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
|
||||
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
|
||||
|
@ -210,23 +206,23 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||
while (outptr < outputEnd) {
|
||||
state0 = aesdec<softAes>(state0, key0);
|
||||
state1 = aesenc<softAes>(state1, key0);
|
||||
state2 = aesdec<softAes>(state2, key4);
|
||||
state3 = aesenc<softAes>(state3, key4);
|
||||
state2 = aesdec<softAes>(state2, key0);
|
||||
state3 = aesenc<softAes>(state3, key0);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key1);
|
||||
state1 = aesenc<softAes>(state1, key1);
|
||||
state2 = aesdec<softAes>(state2, key5);
|
||||
state3 = aesenc<softAes>(state3, key5);
|
||||
state2 = aesdec<softAes>(state2, key1);
|
||||
state3 = aesenc<softAes>(state3, key1);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key2);
|
||||
state1 = aesenc<softAes>(state1, key2);
|
||||
state2 = aesdec<softAes>(state2, key6);
|
||||
state3 = aesenc<softAes>(state3, key6);
|
||||
state2 = aesdec<softAes>(state2, key2);
|
||||
state3 = aesenc<softAes>(state3, key2);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key3);
|
||||
state1 = aesenc<softAes>(state1, key3);
|
||||
state2 = aesdec<softAes>(state2, key7);
|
||||
state3 = aesenc<softAes>(state3, key7);
|
||||
state2 = aesdec<softAes>(state2, key3);
|
||||
state3 = aesenc<softAes>(state3, key3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
||||
|
@ -239,3 +235,84 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
|||
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
|
||||
uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
|
||||
const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
|
||||
|
||||
// initial state
|
||||
rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||
rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||
rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||
rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
|
||||
|
||||
const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
|
||||
const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
|
||||
const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
|
||||
const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
|
||||
|
||||
rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
|
||||
rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
|
||||
rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
|
||||
rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
|
||||
|
||||
constexpr int PREFETCH_DISTANCE = 4096;
|
||||
const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
|
||||
scratchpadEnd -= PREFETCH_DISTANCE;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (scratchpadPtr < scratchpadEnd) {
|
||||
hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
|
||||
hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
|
||||
hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
|
||||
hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
|
||||
|
||||
fill_state0 = aesdec<softAes>(fill_state0, key0);
|
||||
fill_state1 = aesenc<softAes>(fill_state1, key1);
|
||||
fill_state2 = aesdec<softAes>(fill_state2, key2);
|
||||
fill_state3 = aesenc<softAes>(fill_state3, key3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
|
||||
|
||||
rx_prefetch_t0(prefetchPtr);
|
||||
|
||||
scratchpadPtr += 64;
|
||||
prefetchPtr += 64;
|
||||
}
|
||||
prefetchPtr = (const char*) scratchpad;
|
||||
scratchpadEnd += PREFETCH_DISTANCE;
|
||||
}
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
|
||||
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
|
||||
|
||||
hash_state0 = aesenc<softAes>(hash_state0, xkey0);
|
||||
hash_state1 = aesdec<softAes>(hash_state1, xkey0);
|
||||
hash_state2 = aesenc<softAes>(hash_state2, xkey0);
|
||||
hash_state3 = aesdec<softAes>(hash_state3, xkey0);
|
||||
|
||||
hash_state0 = aesenc<softAes>(hash_state0, xkey1);
|
||||
hash_state1 = aesdec<softAes>(hash_state1, xkey1);
|
||||
hash_state2 = aesenc<softAes>(hash_state2, xkey1);
|
||||
hash_state3 = aesdec<softAes>(hash_state3, xkey1);
|
||||
|
||||
//output hash
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
|
||||
}
|
||||
|
||||
template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
|
|
@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
|||
|
||||
template<bool softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
|
|
@ -47,7 +47,7 @@ namespace randomx {
|
|||
rx_aligned_free(ptr);
|
||||
}
|
||||
|
||||
template class AlignedAllocator<CacheLineSize>;
|
||||
template struct AlignedAllocator<CacheLineSize>;
|
||||
|
||||
void* LargePageAllocator::allocMemory(size_t count) {
|
||||
return allocLargePagesMemory(count);
|
||||
|
|
|
@ -15,7 +15,7 @@ RANDOMX_SCRATCHPAD_L2 EQU 262144t
|
|||
RANDOMX_SCRATCHPAD_L1 EQU 16384t
|
||||
RANDOMX_JUMP_BITS EQU 8t
|
||||
RANDOMX_JUMP_OFFSET EQU 8t
|
||||
RANDOMX_FREQ_IADD_RS EQU 16t
|
||||
RANDOMX_FREQ_IADD_RS EQU 25t
|
||||
RANDOMX_FREQ_IADD_M EQU 7t
|
||||
RANDOMX_FREQ_ISUB_R EQU 16t
|
||||
RANDOMX_FREQ_ISUB_M EQU 7t
|
||||
|
@ -29,19 +29,19 @@ RANDOMX_FREQ_IMUL_RCP EQU 8t
|
|||
RANDOMX_FREQ_INEG_R EQU 2t
|
||||
RANDOMX_FREQ_IXOR_R EQU 15t
|
||||
RANDOMX_FREQ_IXOR_M EQU 5t
|
||||
RANDOMX_FREQ_IROR_R EQU 8t
|
||||
RANDOMX_FREQ_IROL_R EQU 2t
|
||||
RANDOMX_FREQ_IROR_R EQU 10t
|
||||
RANDOMX_FREQ_IROL_R EQU 0t
|
||||
RANDOMX_FREQ_ISWAP_R EQU 4t
|
||||
RANDOMX_FREQ_FSWAP_R EQU 4t
|
||||
RANDOMX_FREQ_FADD_R EQU 16t
|
||||
RANDOMX_FREQ_FSWAP_R EQU 8t
|
||||
RANDOMX_FREQ_FADD_R EQU 20t
|
||||
RANDOMX_FREQ_FADD_M EQU 5t
|
||||
RANDOMX_FREQ_FSUB_R EQU 16t
|
||||
RANDOMX_FREQ_FSUB_R EQU 20t
|
||||
RANDOMX_FREQ_FSUB_M EQU 5t
|
||||
RANDOMX_FREQ_FSCAL_R EQU 6t
|
||||
RANDOMX_FREQ_FMUL_R EQU 32t
|
||||
RANDOMX_FREQ_FMUL_R EQU 20t
|
||||
RANDOMX_FREQ_FDIV_M EQU 4t
|
||||
RANDOMX_FREQ_FSQRT_R EQU 6t
|
||||
RANDOMX_FREQ_CBRANCH EQU 25t
|
||||
RANDOMX_FREQ_CBRANCH EQU 16t
|
||||
RANDOMX_FREQ_CFROUND EQU 1t
|
||||
RANDOMX_FREQ_ISTORE EQU 16t
|
||||
RANDOMX_FREQ_NOP EQU 0t
|
||||
|
|
|
@ -67,7 +67,7 @@ namespace randomx {
|
|||
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
|
||||
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
|
||||
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
|
||||
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
|
||||
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \
|
||||
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
|
||||
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
|
||||
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
Copyright (c) 2019, Wownero Inc., a Monero Enterprise Alliance partner company
|
||||
|
||||
All rights reserved.
|
||||
|
||||
|
@ -38,7 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define RANDOMX_ARGON_LANES 1
|
||||
|
||||
//Argon2d salt
|
||||
#define RANDOMX_ARGON_SALT "RandomX\x03"
|
||||
#define RANDOMX_ARGON_SALT "RandomWOW\x01"
|
||||
|
||||
//Number of random Cache accesses per Dataset item. Minimum is 2.
|
||||
#define RANDOMX_CACHE_ACCESSES 8
|
||||
|
@ -56,16 +57,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define RANDOMX_PROGRAM_SIZE 256
|
||||
|
||||
//Number of iterations during VM execution.
|
||||
#define RANDOMX_PROGRAM_ITERATIONS 2048
|
||||
#define RANDOMX_PROGRAM_ITERATIONS 1024
|
||||
|
||||
//Number of chained VM executions per hash.
|
||||
#define RANDOMX_PROGRAM_COUNT 8
|
||||
#define RANDOMX_PROGRAM_COUNT 16
|
||||
|
||||
//Scratchpad L3 size in bytes. Must be a power of 2.
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
#define RANDOMX_SCRATCHPAD_L3 1048576
|
||||
|
||||
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
|
||||
#define RANDOMX_SCRATCHPAD_L2 262144
|
||||
#define RANDOMX_SCRATCHPAD_L2 131072
|
||||
|
||||
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
|
||||
#define RANDOMX_SCRATCHPAD_L1 16384
|
||||
|
@ -82,7 +83,7 @@ Total sum of frequencies must be 256
|
|||
*/
|
||||
|
||||
//Integer instructions
|
||||
#define RANDOMX_FREQ_IADD_RS 16
|
||||
#define RANDOMX_FREQ_IADD_RS 25
|
||||
#define RANDOMX_FREQ_IADD_M 7
|
||||
#define RANDOMX_FREQ_ISUB_R 16
|
||||
#define RANDOMX_FREQ_ISUB_M 7
|
||||
|
@ -96,23 +97,23 @@ Total sum of frequencies must be 256
|
|||
#define RANDOMX_FREQ_INEG_R 2
|
||||
#define RANDOMX_FREQ_IXOR_R 15
|
||||
#define RANDOMX_FREQ_IXOR_M 5
|
||||
#define RANDOMX_FREQ_IROR_R 8
|
||||
#define RANDOMX_FREQ_IROL_R 2
|
||||
#define RANDOMX_FREQ_IROR_R 10
|
||||
#define RANDOMX_FREQ_IROL_R 0
|
||||
#define RANDOMX_FREQ_ISWAP_R 4
|
||||
|
||||
//Floating point instructions
|
||||
#define RANDOMX_FREQ_FSWAP_R 4
|
||||
#define RANDOMX_FREQ_FADD_R 16
|
||||
#define RANDOMX_FREQ_FSWAP_R 8
|
||||
#define RANDOMX_FREQ_FADD_R 20
|
||||
#define RANDOMX_FREQ_FADD_M 5
|
||||
#define RANDOMX_FREQ_FSUB_R 16
|
||||
#define RANDOMX_FREQ_FSUB_R 20
|
||||
#define RANDOMX_FREQ_FSUB_M 5
|
||||
#define RANDOMX_FREQ_FSCAL_R 6
|
||||
#define RANDOMX_FREQ_FMUL_R 32
|
||||
#define RANDOMX_FREQ_FMUL_R 20
|
||||
#define RANDOMX_FREQ_FDIV_M 4
|
||||
#define RANDOMX_FREQ_FSQRT_R 6
|
||||
|
||||
//Control instructions
|
||||
#define RANDOMX_FREQ_CBRANCH 25
|
||||
#define RANDOMX_FREQ_CBRANCH 16
|
||||
#define RANDOMX_FREQ_CFROUND 1
|
||||
|
||||
//Store instruction
|
||||
|
|
|
@ -61,8 +61,17 @@ struct randomx_cache {
|
|||
|
||||
//A pointer to a standard-layout struct object points to its initial member
|
||||
static_assert(std::is_standard_layout<randomx_dataset>(), "randomx_dataset must be a standard-layout struct");
|
||||
|
||||
//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug)
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_DEBUG)
|
||||
#define TO_STR(x) #x
|
||||
#define STR(x) TO_STR(x)
|
||||
#pragma message ( __FILE__ "(" STR(__LINE__) ") warning: check std::is_standard_layout<randomx_cache>() is disabled for Debug configuration. JIT mode will crash." )
|
||||
#undef STR
|
||||
#undef TO_STR
|
||||
#else
|
||||
static_assert(std::is_standard_layout<randomx_cache>(), "randomx_cache must be a standard-layout struct");
|
||||
#endif
|
||||
|
||||
namespace randomx {
|
||||
|
||||
|
|
|
@ -157,6 +157,21 @@ void rx_set_rounding_mode(uint32_t mode) {
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t rx_get_rounding_mode() {
|
||||
switch (fegetround()) {
|
||||
case FE_DOWNWARD:
|
||||
return RoundDown;
|
||||
case FE_UPWARD:
|
||||
return RoundUp;
|
||||
case FE_TOWARDZERO:
|
||||
return RoundToZero;
|
||||
case FE_TONEAREST:
|
||||
return RoundToNearest;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef RANDOMX_USE_X87
|
||||
|
|
|
@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
|
|||
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
|
||||
#define rx_aligned_free(a) _mm_free(a)
|
||||
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
||||
#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
|
||||
|
||||
#define rx_load_vec_f128 _mm_load_pd
|
||||
#define rx_store_vec_f128 _mm_store_pd
|
||||
|
@ -172,6 +173,10 @@ FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
|
|||
_mm_setcsr(rx_mxcsr_default | (mode << 13));
|
||||
}
|
||||
|
||||
FORCE_INLINE uint32_t rx_get_rounding_mode() {
|
||||
return (_mm_getcsr() >> 13) & 3;
|
||||
}
|
||||
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
|
@ -201,6 +206,7 @@ typedef union{
|
|||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
#define rx_prefetch_t0(x)
|
||||
|
||||
/* Splat 64-bit long long to 2 64-bit long longs */
|
||||
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
|
||||
|
@ -399,6 +405,10 @@ inline void rx_prefetch_nta(void* ptr) {
|
|||
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||
}
|
||||
|
||||
inline void rx_prefetch_t0(const void* ptr) {
|
||||
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
return vld1q_f64((const float64_t*)pd);
|
||||
}
|
||||
|
@ -532,6 +542,7 @@ typedef union {
|
|||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
#define rx_prefetch_t0(x)
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
rx_vec_f128 x;
|
||||
|
@ -729,6 +740,8 @@ void rx_reset_float_state();
|
|||
|
||||
void rx_set_rounding_mode(uint32_t mode);
|
||||
|
||||
uint32_t rx_get_rounding_mode();
|
||||
|
||||
#endif
|
||||
|
||||
double loadDoublePortable(const void* addr);
|
||||
|
|
|
@ -35,3 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
#include "jit_compiler_fallback.hpp"
|
||||
#endif
|
||||
|
||||
#if defined(__OpenBSD__) || defined(__NetBSD__)
|
||||
#define RANDOMX_FORCE_SECURE
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
struct ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
class Instruction;
|
||||
|
||||
|
|
|
@ -25,26 +25,32 @@
|
|||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#define DECL(x) _##x
|
||||
#else
|
||||
#define DECL(x) x
|
||||
#endif
|
||||
|
||||
.arch armv8-a
|
||||
.text
|
||||
.global randomx_program_aarch64
|
||||
.global randomx_program_aarch64_main_loop
|
||||
.global randomx_program_aarch64_vm_instructions
|
||||
.global randomx_program_aarch64_imul_rcp_literals_end
|
||||
.global randomx_program_aarch64_vm_instructions_end
|
||||
.global randomx_program_aarch64_cacheline_align_mask1
|
||||
.global randomx_program_aarch64_cacheline_align_mask2
|
||||
.global randomx_program_aarch64_update_spMix1
|
||||
.global randomx_program_aarch64_vm_instructions_end_light
|
||||
.global randomx_program_aarch64_light_cacheline_align_mask
|
||||
.global randomx_program_aarch64_light_dataset_offset
|
||||
.global randomx_init_dataset_aarch64
|
||||
.global randomx_init_dataset_aarch64_end
|
||||
.global randomx_calc_dataset_item_aarch64
|
||||
.global randomx_calc_dataset_item_aarch64_prefetch
|
||||
.global randomx_calc_dataset_item_aarch64_mix
|
||||
.global randomx_calc_dataset_item_aarch64_store_result
|
||||
.global randomx_calc_dataset_item_aarch64_end
|
||||
.global DECL(randomx_program_aarch64)
|
||||
.global DECL(randomx_program_aarch64_main_loop)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions)
|
||||
.global DECL(randomx_program_aarch64_imul_rcp_literals_end)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end)
|
||||
.global DECL(randomx_program_aarch64_cacheline_align_mask1)
|
||||
.global DECL(randomx_program_aarch64_cacheline_align_mask2)
|
||||
.global DECL(randomx_program_aarch64_update_spMix1)
|
||||
.global DECL(randomx_program_aarch64_vm_instructions_end_light)
|
||||
.global DECL(randomx_program_aarch64_light_cacheline_align_mask)
|
||||
.global DECL(randomx_program_aarch64_light_dataset_offset)
|
||||
.global DECL(randomx_init_dataset_aarch64)
|
||||
.global DECL(randomx_init_dataset_aarch64_end)
|
||||
.global DECL(randomx_calc_dataset_item_aarch64)
|
||||
.global DECL(randomx_calc_dataset_item_aarch64_prefetch)
|
||||
.global DECL(randomx_calc_dataset_item_aarch64_mix)
|
||||
.global DECL(randomx_calc_dataset_item_aarch64_store_result)
|
||||
.global DECL(randomx_calc_dataset_item_aarch64_end)
|
||||
|
||||
#include "configuration.h"
|
||||
|
||||
|
@ -101,7 +107,7 @@
|
|||
# v31 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
|
||||
.balign 4
|
||||
randomx_program_aarch64:
|
||||
DECL(randomx_program_aarch64):
|
||||
# Save callee-saved registers
|
||||
sub sp, sp, 192
|
||||
stp x16, x17, [sp]
|
||||
|
@ -189,7 +195,7 @@ randomx_program_aarch64:
|
|||
ldr q14, literal_v14
|
||||
ldr q15, literal_v15
|
||||
|
||||
randomx_program_aarch64_main_loop:
|
||||
DECL(randomx_program_aarch64_main_loop):
|
||||
# spAddr0 = spMix1 & ScratchpadL3Mask64;
|
||||
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
|
||||
lsr x18, x10, 32
|
||||
|
@ -262,7 +268,7 @@ randomx_program_aarch64_main_loop:
|
|||
orr v23.16b, v23.16b, v30.16b
|
||||
|
||||
# Execute VM instructions
|
||||
randomx_program_aarch64_vm_instructions:
|
||||
DECL(randomx_program_aarch64_vm_instructions):
|
||||
|
||||
# buffer for generated instructions
|
||||
# FDIV_M is the largest instruction taking up to 12 ARMv8 instructions
|
||||
|
@ -281,7 +287,7 @@ literal_x27: .fill 1,8,0
|
|||
literal_x28: .fill 1,8,0
|
||||
literal_x29: .fill 1,8,0
|
||||
literal_x30: .fill 1,8,0
|
||||
randomx_program_aarch64_imul_rcp_literals_end:
|
||||
DECL(randomx_program_aarch64_imul_rcp_literals_end):
|
||||
|
||||
literal_v0: .fill 2,8,0
|
||||
literal_v1: .fill 2,8,0
|
||||
|
@ -300,14 +306,14 @@ literal_v13: .fill 2,8,0
|
|||
literal_v14: .fill 2,8,0
|
||||
literal_v15: .fill 2,8,0
|
||||
|
||||
randomx_program_aarch64_vm_instructions_end:
|
||||
DECL(randomx_program_aarch64_vm_instructions_end):
|
||||
|
||||
# mx ^= r[readReg2] ^ r[readReg3];
|
||||
eor x9, x9, x18
|
||||
|
||||
# Calculate dataset pointer for dataset prefetch
|
||||
mov w18, w9
|
||||
randomx_program_aarch64_cacheline_align_mask1:
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask1):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x18, x18, 1
|
||||
add x18, x18, x1
|
||||
|
@ -320,12 +326,12 @@ randomx_program_aarch64_cacheline_align_mask1:
|
|||
|
||||
# Calculate dataset pointer for dataset read
|
||||
mov w10, w9
|
||||
randomx_program_aarch64_cacheline_align_mask2:
|
||||
DECL(randomx_program_aarch64_cacheline_align_mask2):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x10, x10, 1
|
||||
add x10, x10, x1
|
||||
|
||||
randomx_program_aarch64_xor_with_dataset_line:
|
||||
DECL(randomx_program_aarch64_xor_with_dataset_line):
|
||||
# xor integer registers with dataset data
|
||||
ldp x18, x19, [x10]
|
||||
eor x4, x4, x18
|
||||
|
@ -340,7 +346,7 @@ randomx_program_aarch64_xor_with_dataset_line:
|
|||
eor x14, x14, x18
|
||||
eor x15, x15, x19
|
||||
|
||||
randomx_program_aarch64_update_spMix1:
|
||||
DECL(randomx_program_aarch64_update_spMix1):
|
||||
# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1"
|
||||
eor x10, x0, x0
|
||||
|
||||
|
@ -361,7 +367,7 @@ randomx_program_aarch64_update_spMix1:
|
|||
stp q18, q19, [x16, 32]
|
||||
|
||||
subs x3, x3, 1
|
||||
bne randomx_program_aarch64_main_loop
|
||||
bne DECL(randomx_program_aarch64_main_loop)
|
||||
|
||||
# Restore x0
|
||||
ldr x0, [sp], 16
|
||||
|
@ -395,7 +401,7 @@ randomx_program_aarch64_update_spMix1:
|
|||
|
||||
ret
|
||||
|
||||
randomx_program_aarch64_vm_instructions_end_light:
|
||||
DECL(randomx_program_aarch64_vm_instructions_end_light):
|
||||
sub sp, sp, 96
|
||||
stp x0, x1, [sp, 64]
|
||||
stp x2, x30, [sp, 80]
|
||||
|
@ -412,26 +418,26 @@ randomx_program_aarch64_vm_instructions_end_light:
|
|||
# x1 -> pointer to output
|
||||
mov x1, sp
|
||||
|
||||
randomx_program_aarch64_light_cacheline_align_mask:
|
||||
DECL(randomx_program_aarch64_light_cacheline_align_mask):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and w2, w9, 1
|
||||
|
||||
# x2 -> item number
|
||||
lsr x2, x2, 6
|
||||
|
||||
randomx_program_aarch64_light_dataset_offset:
|
||||
DECL(randomx_program_aarch64_light_dataset_offset):
|
||||
# Apply dataset offset (filled in by JIT compiler)
|
||||
add x2, x2, 0
|
||||
add x2, x2, 0
|
||||
|
||||
bl randomx_calc_dataset_item_aarch64
|
||||
bl DECL(randomx_calc_dataset_item_aarch64)
|
||||
|
||||
mov x10, sp
|
||||
ldp x0, x1, [sp, 64]
|
||||
ldp x2, x30, [sp, 80]
|
||||
add sp, sp, 96
|
||||
|
||||
b randomx_program_aarch64_xor_with_dataset_line
|
||||
b DECL(randomx_program_aarch64_xor_with_dataset_line)
|
||||
|
||||
|
||||
|
||||
|
@ -442,26 +448,26 @@ randomx_program_aarch64_light_dataset_offset:
|
|||
# x2 -> start item
|
||||
# x3 -> end item
|
||||
|
||||
randomx_init_dataset_aarch64:
|
||||
DECL(randomx_init_dataset_aarch64):
|
||||
# Save x30 (return address)
|
||||
str x30, [sp, -16]!
|
||||
|
||||
# Load pointer to cache memory
|
||||
ldr x0, [x0]
|
||||
|
||||
randomx_init_dataset_aarch64_main_loop:
|
||||
bl randomx_calc_dataset_item_aarch64
|
||||
DECL(randomx_init_dataset_aarch64_main_loop):
|
||||
bl DECL(randomx_calc_dataset_item_aarch64)
|
||||
add x1, x1, 64
|
||||
add x2, x2, 1
|
||||
cmp x2, x3
|
||||
bne randomx_init_dataset_aarch64_main_loop
|
||||
bne DECL(randomx_init_dataset_aarch64_main_loop)
|
||||
|
||||
# Restore x30 (return address)
|
||||
ldr x30, [sp], 16
|
||||
|
||||
ret
|
||||
|
||||
randomx_init_dataset_aarch64_end:
|
||||
DECL(randomx_init_dataset_aarch64_end):
|
||||
|
||||
# Input parameters
|
||||
#
|
||||
|
@ -479,7 +485,7 @@ randomx_init_dataset_aarch64_end:
|
|||
# x12 -> temporary
|
||||
# x13 -> temporary
|
||||
|
||||
randomx_calc_dataset_item_aarch64:
|
||||
DECL(randomx_calc_dataset_item_aarch64):
|
||||
sub sp, sp, 112
|
||||
stp x0, x1, [sp]
|
||||
stp x2, x3, [sp, 16]
|
||||
|
@ -526,7 +532,7 @@ randomx_calc_dataset_item_aarch64:
|
|||
ldr x12, superscalarAdd7
|
||||
eor x7, x0, x12
|
||||
|
||||
b randomx_calc_dataset_item_aarch64_prefetch
|
||||
b DECL(randomx_calc_dataset_item_aarch64_prefetch)
|
||||
|
||||
superscalarMul0: .quad 6364136223846793005
|
||||
superscalarAdd1: .quad 9298411001130361340
|
||||
|
@ -539,7 +545,7 @@ superscalarAdd7: .quad 9549104520008361294
|
|||
|
||||
# Prefetch -> SuperScalar hash -> Mix will be repeated N times
|
||||
|
||||
randomx_calc_dataset_item_aarch64_prefetch:
|
||||
DECL(randomx_calc_dataset_item_aarch64_prefetch):
|
||||
# Actual mask will be inserted by JIT compiler
|
||||
and x11, x10, 1
|
||||
add x11, x8, x11, lsl 6
|
||||
|
@ -547,7 +553,7 @@ randomx_calc_dataset_item_aarch64_prefetch:
|
|||
|
||||
# Generated SuperScalar hash program goes here
|
||||
|
||||
randomx_calc_dataset_item_aarch64_mix:
|
||||
DECL(randomx_calc_dataset_item_aarch64_mix):
|
||||
ldp x12, x13, [x11]
|
||||
eor x0, x0, x12
|
||||
eor x1, x1, x13
|
||||
|
@ -561,7 +567,7 @@ randomx_calc_dataset_item_aarch64_mix:
|
|||
eor x6, x6, x12
|
||||
eor x7, x7, x13
|
||||
|
||||
randomx_calc_dataset_item_aarch64_store_result:
|
||||
DECL(randomx_calc_dataset_item_aarch64_store_result):
|
||||
stp x0, x1, [x9]
|
||||
stp x2, x3, [x9, 16]
|
||||
stp x4, x5, [x9, 32]
|
||||
|
@ -578,4 +584,4 @@ randomx_calc_dataset_item_aarch64_store_result:
|
|||
|
||||
ret
|
||||
|
||||
randomx_calc_dataset_item_aarch64_end:
|
||||
DECL(randomx_calc_dataset_item_aarch64_end):
|
||||
|
|
|
@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
struct ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
|
||||
class JitCompilerFallback {
|
||||
|
|
|
@ -295,7 +295,7 @@ namespace randomx {
|
|||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
instructionOffsets.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
struct ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
class JitCompilerX86;
|
||||
class Instruction;
|
||||
|
|
|
@ -36,13 +36,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "cpu.hpp"
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
#include <cfenv>
|
||||
|
||||
extern "C" {
|
||||
|
||||
randomx_flags randomx_get_flags() {
|
||||
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
|
||||
randomx::Cpu cpu;
|
||||
#ifdef __OpenBSD__
|
||||
#ifdef RANDOMX_FORCE_SECURE
|
||||
if (flags == RANDOMX_FLAG_JIT) {
|
||||
flags |= RANDOMX_FLAG_SECURE;
|
||||
}
|
||||
|
@ -328,7 +329,7 @@ extern "C" {
|
|||
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
|
||||
assert(machine != nullptr);
|
||||
assert(cache != nullptr && cache->isInitialized());
|
||||
if (machine->cacheKey != cache->cacheKey) {
|
||||
if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) {
|
||||
machine->setCache(cache);
|
||||
machine->cacheKey = cache->cacheKey;
|
||||
}
|
||||
|
@ -349,6 +350,8 @@ extern "C" {
|
|||
assert(machine != nullptr);
|
||||
assert(inputSize == 0 || input != nullptr);
|
||||
assert(output != nullptr);
|
||||
fenv_t fpstate;
|
||||
fegetenv(&fpstate);
|
||||
alignas(16) uint64_t tempHash[8];
|
||||
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
assert(blakeResult == 0);
|
||||
|
@ -361,6 +364,34 @@ extern "C" {
|
|||
}
|
||||
machine->run(&tempHash);
|
||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
fesetenv(&fpstate);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
|
||||
blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0);
|
||||
machine->initScratchpad(machine->tempHash);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) {
|
||||
machine->resetRoundingMode();
|
||||
for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
|
||||
machine->run(machine->tempHash);
|
||||
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
}
|
||||
machine->run(machine->tempHash);
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
|
||||
machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
|
||||
}
|
||||
|
||||
void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
|
||||
machine->resetRoundingMode();
|
||||
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
|
||||
machine->run(machine->tempHash);
|
||||
blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
}
|
||||
machine->run(machine->tempHash);
|
||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define RANDOMX_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define RANDOMX_HASH_SIZE 32
|
||||
#define RANDOMX_DATASET_ITEM_SIZE 64
|
||||
|
@ -238,6 +239,27 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
|
|||
*/
|
||||
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
|
||||
|
||||
/**
|
||||
* Set of functions used to calculate multiple RandomX hashes more efficiently.
|
||||
* randomx_calculate_hash_first will begin a hash calculation.
|
||||
* randomx_calculate_hash_next will output the hash value of the previous input
|
||||
* and begin the calculation of the next hash.
|
||||
* randomx_calculate_hash_last will output the hash value of the previous input.
|
||||
*
|
||||
* WARNING: These functions may alter the floating point rounding mode of the calling thread.
|
||||
*
|
||||
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
|
||||
* @param input is a pointer to memory to be hashed. Must not be NULL.
|
||||
* @param inputSize is the number of bytes to be hashed.
|
||||
* @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL.
|
||||
* @param nextInputSize is the number of bytes to be hashed for the next hash.
|
||||
* @param output is a pointer to memory where the hash will be stored. Must not
|
||||
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
|
||||
*/
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
|
||||
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -65,7 +65,7 @@ set_thread_affinity(std::thread::native_handle_type thread,
|
|||
(thread_policy_t)&policy, 1);
|
||||
#elif defined(_WIN32) || defined(__CYGWIN__)
|
||||
rc = SetThreadAffinityMask(reinterpret_cast<HANDLE>(thread), 1ULL << cpuid) == 0 ? -2 : 0;
|
||||
#elif !defined(__OpenBSD__)
|
||||
#elif !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__ANDROID__) && !defined(__NetBSD__)
|
||||
cpu_set_t cs;
|
||||
CPU_ZERO(&cs);
|
||||
CPU_SET(cpuid, &cs);
|
||||
|
|
|
@ -40,9 +40,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "../dataset.hpp"
|
||||
#include "../blake2/endian.h"
|
||||
#include "../common.hpp"
|
||||
#include "../jit_compiler.hpp"
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <VersionHelpers.h>
|
||||
#include <versionhelpers.h>
|
||||
#endif
|
||||
#include "affinity.hpp"
|
||||
|
||||
|
@ -94,6 +95,7 @@ void printUsage(const char* executable) {
|
|||
std::cout << " --ssse3 use optimized Argon2 for SSSE3 CPUs" << std::endl;
|
||||
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
|
||||
std::cout << " --auto select the best options for the current CPU" << std::endl;
|
||||
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
|
||||
}
|
||||
|
||||
struct MemoryException : public std::exception {
|
||||
|
@ -109,11 +111,14 @@ struct DatasetAllocException : public MemoryException {
|
|||
}
|
||||
};
|
||||
|
||||
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid=-1) {
|
||||
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
|
||||
|
||||
template<bool batch>
|
||||
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
|
||||
if (cpuid >= 0) {
|
||||
int rc = set_thread_affinity(cpuid);
|
||||
if (rc) {
|
||||
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
|
||||
std::cerr << "Failed to set thread affinity for thread " << thread << " (error=" << rc << ")" << std::endl;
|
||||
}
|
||||
}
|
||||
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
|
||||
|
@ -122,16 +127,27 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
|
|||
void* noncePtr = blockTemplate + 39;
|
||||
auto nonce = atomicNonce.fetch_add(1);
|
||||
|
||||
while (nonce < noncesCount) {
|
||||
if (batch) {
|
||||
store32(noncePtr, nonce);
|
||||
randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash);
|
||||
randomx_calculate_hash_first(vm, blockTemplate, sizeof(blockTemplate));
|
||||
}
|
||||
|
||||
while (nonce < noncesCount) {
|
||||
if (batch) {
|
||||
nonce = atomicNonce.fetch_add(1);
|
||||
}
|
||||
store32(noncePtr, nonce);
|
||||
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
|
||||
result.xorWith(hash);
|
||||
nonce = atomicNonce.fetch_add(1);
|
||||
if (!batch) {
|
||||
nonce = atomicNonce.fetch_add(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, ssse3, avx2, autoFlags;
|
||||
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
|
||||
bool ssse3, avx2, autoFlags, noBatch;
|
||||
int noncesCount, threadCount, initThreadCount;
|
||||
uint64_t threadAffinity;
|
||||
int32_t seedValue;
|
||||
|
@ -155,16 +171,23 @@ int main(int argc, char** argv) {
|
|||
readOption("--ssse3", argc, argv, ssse3);
|
||||
readOption("--avx2", argc, argv, avx2);
|
||||
readOption("--auto", argc, argv, autoFlags);
|
||||
readOption("--noBatch", argc, argv, noBatch);
|
||||
|
||||
store32(&seed, seedValue);
|
||||
|
||||
std::cout << "RandomX benchmark v1.1.5" << std::endl;
|
||||
std::cout << "RandomX benchmark v1.1.8" << std::endl;
|
||||
|
||||
if (help || (!miningMode && !verificationMode)) {
|
||||
if (help) {
|
||||
printUsage(argv[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!miningMode && !verificationMode) {
|
||||
std::cout << "Please select either the fast mode (--mine) or the slow mode (--verify)" << std::endl;
|
||||
std::cout << "Run '" << argv[0] << " --help' to see all supported options" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::atomic<uint32_t> atomicNonce(0);
|
||||
AtomicHash result;
|
||||
std::vector<randomx_vm*> vms;
|
||||
|
@ -190,7 +213,7 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
if (jit) {
|
||||
flags |= RANDOMX_FLAG_JIT;
|
||||
#ifdef __OpenBSD__
|
||||
#ifdef RANDOMX_FORCE_SECURE
|
||||
flags |= RANDOMX_FLAG_SECURE;
|
||||
#endif
|
||||
}
|
||||
|
@ -202,7 +225,7 @@ int main(int argc, char** argv) {
|
|||
if (miningMode) {
|
||||
flags |= RANDOMX_FLAG_FULL_MEM;
|
||||
}
|
||||
#ifndef __OpenBSD__
|
||||
#ifndef RANDOMX_FORCE_SECURE
|
||||
if (secure) {
|
||||
flags |= RANDOMX_FLAG_SECURE;
|
||||
}
|
||||
|
@ -254,6 +277,16 @@ int main(int argc, char** argv) {
|
|||
std::cout << " - thread affinity (" << mask_to_string(threadAffinity) << ")" << std::endl;
|
||||
}
|
||||
|
||||
MineFunc* func;
|
||||
|
||||
if (noBatch) {
|
||||
func = &mine<false>;
|
||||
}
|
||||
else {
|
||||
func = &mine<true>;
|
||||
std::cout << " - batch mode" << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Initializing";
|
||||
if (miningMode)
|
||||
std::cout << " (" << initThreadCount << " thread" << (initThreadCount > 1 ? "s)" : ")");
|
||||
|
@ -324,14 +357,14 @@ int main(int argc, char** argv) {
|
|||
int cpuid = -1;
|
||||
if (threadAffinity)
|
||||
cpuid = cpuid_from_mask(threadAffinity, i);
|
||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
|
||||
threads.push_back(std::thread(func, vms[i], std::ref(atomicNonce), std::ref(result), noncesCount, i, cpuid));
|
||||
}
|
||||
for (unsigned i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
else {
|
||||
mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0);
|
||||
func(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0, -1);
|
||||
}
|
||||
|
||||
double elapsed = sw.getElapsed();
|
||||
|
|
|
@ -143,7 +143,7 @@ int main() {
|
|||
randomx::JitCompiler jit;
|
||||
jit.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
jit.generateDatasetInitCode();
|
||||
#ifdef __OpenBSD__
|
||||
#ifdef RANDOMX_FORCE_SECURE
|
||||
jit.enableExecution();
|
||||
#else
|
||||
jit.enableAll();
|
||||
|
@ -954,7 +954,7 @@ int main() {
|
|||
assert(ibc.memMask == randomx::ScratchpadL3Mask);
|
||||
});
|
||||
|
||||
#ifdef __OpenBSD__
|
||||
#ifdef RANDOMX_FORCE_SECURE
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
|
||||
#else
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
|
||||
|
@ -1009,10 +1009,10 @@ int main() {
|
|||
vm = nullptr;
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_JIT);
|
||||
initCache("test key 000");
|
||||
#ifdef __OpenBSD__
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
|
||||
#ifdef RANDOMX_FORCE_SECURE
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_JIT | RANDOMX_FLAG_SECURE, cache, nullptr);
|
||||
#else
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_JIT, cache, nullptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1026,9 +1026,6 @@ int main() {
|
|||
|
||||
runTest("Hash test 2e (compiler)", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_e);
|
||||
|
||||
randomx_destroy_vm(vm);
|
||||
vm = nullptr;
|
||||
|
||||
auto flags = randomx_get_flags();
|
||||
|
||||
randomx_release_cache(cache);
|
||||
|
@ -1054,6 +1051,40 @@ int main() {
|
|||
assert(cacheMemory[33554431] == 0x1f47f056d05cd99b);
|
||||
});
|
||||
|
||||
if (cache != nullptr)
|
||||
randomx_release_cache(cache);
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
|
||||
|
||||
runTest("Hash batch test", RANDOMX_HAVE_COMPILER && stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
|
||||
char hash1[RANDOMX_HASH_SIZE];
|
||||
char hash2[RANDOMX_HASH_SIZE];
|
||||
char hash3[RANDOMX_HASH_SIZE];
|
||||
initCache("test key 000");
|
||||
char input1[] = "This is a test";
|
||||
char input2[] = "Lorem ipsum dolor sit amet";
|
||||
char input3[] = "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
|
||||
|
||||
randomx_calculate_hash_first(vm, input1, sizeof(input1) - 1);
|
||||
randomx_calculate_hash_next(vm, input2, sizeof(input2) - 1, &hash1);
|
||||
randomx_calculate_hash_next(vm, input3, sizeof(input3) - 1, &hash2);
|
||||
randomx_calculate_hash_last(vm, &hash3);
|
||||
|
||||
assert(equalsHex(hash1, "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"));
|
||||
assert(equalsHex(hash2, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
|
||||
assert(equalsHex(hash3, "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"));
|
||||
});
|
||||
|
||||
runTest("Preserve rounding mode", RANDOMX_FREQ_CFROUND > 0, []() {
|
||||
rx_set_rounding_mode(RoundToNearest);
|
||||
char hash[RANDOMX_HASH_SIZE];
|
||||
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
|
||||
assert(equalsHex(hash, "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"));
|
||||
assert(rx_get_rounding_mode() == RoundToNearest);
|
||||
});
|
||||
|
||||
randomx_destroy_vm(vm);
|
||||
vm = nullptr;
|
||||
|
||||
if (cache != nullptr)
|
||||
randomx_release_cache(cache);
|
||||
|
||||
|
|
|
@ -120,6 +120,12 @@ namespace randomx {
|
|||
blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) {
|
||||
hashAndFillAes1Rx4<softAes>((void*) getScratchpad(), ScratchpadSize, ®.a, fill_state);
|
||||
blake2b(out, outSize, ®, sizeof(RegisterFile), nullptr, 0);
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
|
||||
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
|
||||
|
|
|
@ -38,6 +38,7 @@ public:
|
|||
virtual ~randomx_vm() = 0;
|
||||
virtual void allocate() = 0;
|
||||
virtual void getFinalResult(void* out, size_t outSize) = 0;
|
||||
virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0;
|
||||
virtual void setDataset(randomx_dataset* dataset) { }
|
||||
virtual void setCache(randomx_cache* cache) { }
|
||||
virtual void initScratchpad(void* seed) = 0;
|
||||
|
@ -53,6 +54,9 @@ public:
|
|||
{
|
||||
return program;
|
||||
}
|
||||
const uint8_t* getMemory() const {
|
||||
return mem.memory;
|
||||
}
|
||||
protected:
|
||||
void initialize();
|
||||
alignas(64) randomx::Program program;
|
||||
|
@ -67,6 +71,7 @@ protected:
|
|||
uint64_t datasetOffset;
|
||||
public:
|
||||
std::string cacheKey;
|
||||
alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data
|
||||
};
|
||||
|
||||
namespace randomx {
|
||||
|
@ -78,6 +83,7 @@ namespace randomx {
|
|||
void allocate() override;
|
||||
void initScratchpad(void* seed) override;
|
||||
void getFinalResult(void* out, size_t outSize) override;
|
||||
void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override;
|
||||
protected:
|
||||
void generateProgram(void* seed);
|
||||
};
|
||||
|
|
|
@ -94,7 +94,12 @@ void* allocMemoryPages(std::size_t bytes) {
|
|||
if (mem == nullptr)
|
||||
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
|
||||
#else
|
||||
mem = mmap(nullptr, bytes, PAGE_READWRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
#if defined(__NetBSD__)
|
||||
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
|
||||
#else
|
||||
#define RESERVED_FLAGS 0
|
||||
#endif
|
||||
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (mem == MAP_FAILED)
|
||||
throw std::runtime_error("allocMemoryPages - mmap failed");
|
||||
#endif
|
||||
|
@ -141,7 +146,7 @@ void* allocLargePagesMemory(std::size_t bytes) {
|
|||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
|
||||
#elif defined(__FreeBSD__)
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
|
||||
#elif defined(__OpenBSD__)
|
||||
#elif defined(__OpenBSD__) || defined(__NetBSD__)
|
||||
mem = MAP_FAILED; // OpenBSD does not support huge pages
|
||||
#else
|
||||
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
|
||||
|
|
|
@ -54,12 +54,17 @@
|
|||
<ItemGroup>
|
||||
<ClCompile Include="..\src\aes_hash.cpp" />
|
||||
<ClCompile Include="..\src\allocator.cpp" />
|
||||
<ClCompile Include="..\src\argon2_avx2.c">
|
||||
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\argon2_core.c" />
|
||||
<ClCompile Include="..\src\argon2_ref.c" />
|
||||
<ClCompile Include="..\src\argon2_ssse3.c" />
|
||||
<ClCompile Include="..\src\assembly_generator_x86.cpp" />
|
||||
<ClCompile Include="..\src\blake2\blake2b.c" />
|
||||
<ClCompile Include="..\src\blake2_generator.cpp" />
|
||||
<ClCompile Include="..\src\bytecode_machine.cpp" />
|
||||
<ClCompile Include="..\src\cpu.cpp" />
|
||||
<ClCompile Include="..\src\dataset.cpp" />
|
||||
<ClCompile Include="..\src\instruction.cpp" />
|
||||
<ClCompile Include="..\src\instructions_portable.cpp" />
|
||||
|
|
|
@ -172,5 +172,14 @@
|
|||
<ClCompile Include="..\src\bytecode_machine.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\argon2_avx2.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\argon2_ssse3.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\cpu.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue