From 0c5b666df4e7b8630b24173de47b8d987e95538d Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Mon, 10 Jun 2019 15:57:36 +0200 Subject: [PATCH 1/6] Configuration guidelines (#59) * added detailed guidelines for the selection of configuration values * added additional compile-time checks to prevent bad configurations * removed RANDOMX_SUPERSCALAR_MAX_SIZE parameter --- README.md | 2 +- doc/configuration.md | 269 ++++++++++++++++++++++++++++++++++++ doc/specs.md | 3 +- src/asm/configuration.asm | 1 - src/common.hpp | 17 ++- src/configuration.h | 22 +-- src/superscalar.cpp | 6 +- src/superscalar_program.hpp | 2 +- 8 files changed, 303 insertions(+), 19 deletions(-) create mode 100644 doc/configuration.md diff --git a/README.md b/README.md index 9a6f251..0ec8efb 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ RandomX was primarily designed as a PoW algorithm for [Monero](https://www.getmo * The key `K` is selected to be the hash of a block in the blockchain - this block is called the 'key block'. For optimal mining and verification performance, the key should change every 2048 blocks (~2.8 days) and there should be a delay of 64 blocks (~2 hours) between the key block and the change of the key `K`. This can be achieved by changing the key when `blockHeight % 2048 == 64` and selecting key block such that `keyBlockHeight % 2048 == 0`. * The input `H` is the standard hashing blob. -If you wish to use RandomX as a PoW algorithm for your cryptocurrency, we strongly recommend not using the [default parameters](src/configuration.h) to avoid compatibility with Monero. +If you wish to use RandomX as a PoW algorithm for your cryptocurrency, please follow the [configuration guidelines](doc/configuration.md). ### CPU mining performance Preliminary performance of selected CPUs using the optimal number of threads (T) and large pages (if possible), in hashes per second (H/s): diff --git a/doc/configuration.md b/doc/configuration.md new file mode 100644 index 0000000..4e564eb --- /dev/null +++ b/doc/configuration.md @@ -0,0 +1,269 @@ +# RandomX configuration + +RandomX has 45 customizable parameters (see table below). We recommend each project using RandomX to select a unique configuration to prevent network attacks from hashpower rental services. + +These parameters can be modified in source file [configuration.h](../src/configuration.h). + +|parameter|description|default value| +|---------|-----|-------| +|`RANDOMX_ARGON_MEMORY`|The number of 1 KiB Argon2 blocks in the Cache| `262144`| +|`RANDOMX_ARGON_ITERATIONS`|The number of Argon2d iterations for Cache initialization|`3`| +|`RANDOMX_ARGON_LANES`|The number of parallel lanes for Cache initialization|`1`| +|`RANDOMX_ARGON_SALT`|Argon2 salt|`"RandomX\x03"`| +|`RANDOMX_CACHE_ACCESSES`|The number of random Cache accesses per Dataset item|`8`| +|`RANDOMX_SUPERSCALAR_LATENCY`|Target latency for SuperscalarHash (in cycles of the reference CPU)|`170`| +|`RANDOMX_DATASET_BASE_SIZE`|Dataset base size in bytes|`2147483648`| +|`RANDOMX_DATASET_EXTRA_SIZE`|Dataset extra size in bytes|`33554368`| +|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`| +|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`| +|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`| +|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`| +|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`| +|`RANDOMX_SCRATCHPAD_L3`|Scratchpad size in bytes|`2097152`| +|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`| +|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`| +|`RANDOMX_FREQ_*` (29x)|Instruction frequencies|multiple values| + +Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected. Follow the guidelines below. + +### RANDOMX_ARGON_MEMORY + +This parameter determines the amount of memory needed in the light mode. Memory is specified in KiB (1 KiB = 1024 bytes). + +#### Permitted values +Any integer power of 2. + +#### Notes +Lower sizes will reduce the memory-hardness of the algorithm. + +### RANDOMX_ARGON_ITERATIONS + +Determines the number of passes of Argon2 that are used to generate the Cache. + +#### Permitted values +Any positive integer. + +#### Notes +The time needed to initialize the Cache is proportional to the value of this constant. + +### RANDOMX_ARGON_LANES + +The number of parallel lanes for Cache initialization. + +#### Permitted values +Any positive integer. + +#### Notes +This parameter determines how many threads can be used for Cache initialization. + +### RANDOMX_ARGON_SALT + +Salt value for Cache initialization. + +#### Permitted values +Any string of byte values. + +#### Note +Every implementation should choose a unique salt value. + +### RANDOMX_CACHE_ACCESSES + +The number of random Cache access per Dataset item. + +#### Permitted values +Any integer greater than 1. + +#### Notes +This value directly determines the performance ratio between the 'fast' and 'light' modes. + +### RANDOMX_SUPERSCALAR_LATENCY +Target latency for SuperscalarHash, in cycles of the reference CPU. + +#### Permitted values +Any positive integer. + +#### Notes +The default value was tuned so that a high-performance superscalar CPU running at 2-4 GHz will execute SuperscalarHash in similar time it takes to load data from RAM (40-80 ns). Using a lower value will make Dataset generation (and light mode) more memory bound, while increasing this value will make Dataset generation (and light mode) more compute bound. + +### RANDOMX_DATASET_BASE_SIZE + +Dataset base size in bytes. + +#### Permitted values +Integer powers of 2 in the range 64 - 4294967296 (inclusive). + +#### Note +This constant affects the memory requirements in fast mode. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_DATASET_EXTRA_SIZE + +Dataset extra size in bytes. + +#### Permitted values +Non-negative integer divisible by 64. + +#### Note +This constant affects the memory requirements in fast mode. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_PROGRAM_SIZE + +The number of instructions in a RandomX program. + +#### Permitted values +Any positive integer divisible by 8. + +#### Notes +Smaller values will make RandomX more DRAM-latency bound, while higher values will make RandomX more compute-bound. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_PROGRAM_ITERATIONS + +The number of iterations per program. + +#### Permitted values +Any positive integer. + +#### Notes +Time per hash increases linearly with this constant. Smaller values will increase the overhead of program compilation, while larger values may allow more time for optimizations. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_PROGRAM_COUNT + +The number of programs per hash. + +#### Permitted values +Any positive integer. + +#### Notes +Time per hash increases linearly with this constant. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_JUMP_BITS +Jump condition mask size in bits. + +#### Permitted values +Positive integers. The sum of `RANDOMX_JUMP_BITS` and `RANDOMX_JUMP_OFFSET` must not exceed 16. + +#### Notes +This determines the jump probability of the CBRANCH instruction. The default value of 8 results in jump probability of 1/28 = 1/256. Increasing this constant will decrease the rate of jumps (and vice versa). + +### RANDOMX_JUMP_OFFSET +Jump condition mask offset in bits. + +#### Permitted values +Non-negative integers. The sum of `RANDOMX_JUMP_BITS` and `RANDOMX_JUMP_OFFSET` must not exceed 16. + +#### Notes +Since the low-order bits of RandomX registers are slightly biased, this offset moves the condition mask to higher bits, which are less biased. Using values smaller than the default may result in a slightly lower jump probability than the theoretical value calculated from `RANDOMX_JUMP_BITS`. + +### RANDOMX_SCRATCHPAD_L3 +RandomX Scratchpad size in bytes. + +#### Permitted values +Any integer power of 2. Must be larger than or equal to `RANDOMX_SCRATCHPAD_L2`. + +#### Notes + +The default value of 2 MiB was selected to match the typical cache/core ratio of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. Some values are unsafe depending on other parameters. See [Unsafe configurations](#unsafe-configurations). + +### RANDOMX_SCRATCHPAD_L2 + +Scratchpad L2 size in bytes. + +#### Permitted values +Any integer power of 2. Must be larger than or equal to `RANDOMX_SCRATCHPAD_L1`. + +#### Notes +The default value of 256 KiB was selected to match the typical per-core L2 cache size of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. + +### RANDOMX_SCRATCHPAD_L1 + +Scratchpad L1 size in bytes. + +#### Permitted values +Any integer power of 2. The minimum is 64 bytes. + +#### Notes +The default value of 16 KiB was selected to be about half of the per-core L1 cache size of desktop processors. Using a lower value will make RandomX more core-bound, while using larger values will make the algorithm more latency-bound. + +### RANDOMX_FREQ_* + +Instruction frequencies (per 256 instructions). + +#### Permitted values +There is a total of 29 different instructions. The sum of frequencies must be equal to 256. + +#### Notes + +Making large changes to the default values is not recommended. The only exceptions are the instruction pairs IROR_R/IROL_R, FADD_R/FSUB_R and FADD_M/FSUB_M, which are functionally equivalent. + +## Unsafe configurations + +There are some configurations that are considered 'unsafe' because they affect the security of the algorithm against attacks. If the conditions listed below are not satisfied, the configuration is unsafe and a compilation error is emitted when building the RandomX library. + +These checks can be disabled by definining `RANDOMX_UNSAFE` when building RandomX, e.g. by using `-DRANDOMX_UNSAFE` command line switch in GCC or MSVC. It is not recommended to disable these checks except for testing purposes. + +### 1. Memory-time tradeoffs + +#### Condition +```` +RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY * 1024 + 33554432 >= RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE +```` + +Configurations not satisfying this condition are vulnerable to memory-time tradeoffs, which enables efficient mining in light mode. + +#### Solutions + +* Increase `RANDOMX_CACHE_ACCESSES` or `RANDOMX_ARGON_MEMORY`. +* Decrease `RANDOMX_DATASET_BASE_SIZE` or `RANDOMX_DATASET_EXTRA_SIZE`. + +### 2. Insufficient Scratchpad writes + +#### Condition +```` +(128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3 +```` + +Configurations not satisfying this condition are vulnerable to Scratchpad size optimizations due to low amount of writes. + +#### Solutions + +* Increase `RANDOMX_PROGRAM_SIZE`, `RANDOMX_FREQ_ISTORE`, `RANDOMX_PROGRAM_COUNT` or `RANDOMX_PROGRAM_ITERATIONS`. +* Decrease `RANDOMX_SCRATCHPAD_L3`. + +### 3. Program filtering strategies + +#### Condition +``` +RANDOMX_PROGRAM_COUNT > 1 +``` + +Configurations not satisfying this condition are vulnerable to program filtering strategies. + +#### Solution + +* Increase `RANDOMX_PROGRAM_COUNT` to at least 2. + +### 4. Low program entropy + +#### Condition +``` +RANDOMX_PROGRAM_SIZE >= 64 +``` + +Configurations not satisfying this condition do not have a sufficient number of instruction combinations. + +#### Solution + +* Increase `RANDOMX_PROGRAM_SIZE` to at least 64. + +### 5. High compilation overhead + +#### Condition +``` +RANDOMX_PROGRAM_ITERATIONS >= 400 +``` + +Configurations not satisfying this condition have a program compilation overhead exceeding 10%. + +#### Solution + +* Increase `RANDOMX_PROGRAM_ITERATIONS` to at least 400. + diff --git a/doc/specs.md b/doc/specs.md index 7872cd8..f59edbc 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -62,7 +62,6 @@ RandomX has several configurable parameters that are listed in Table 1.2.1 with |`RANDOMX_ARGON_SALT`|Argon2 salt|`"RandomX\x03"`| |`RANDOMX_CACHE_ACCESSES`|The number of random Cache accesses per Dataset item|`8`| |`RANDOMX_SUPERSCALAR_LATENCY`|Target latency for SuperscalarHash (in cycles of the reference CPU)|`170`| -|`RANDOMX_SUPERSCALAR_MAX_SIZE`|The maximum number of instructions of SuperscalarHash|`512`| |`RANDOMX_DATASET_BASE_SIZE`|Dataset base size in bytes|`2147483648`| |`RANDOMX_DATASET_EXTRA_SIZE`|Dataset extra size in bytes|`33554368`| |`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`| @@ -786,7 +785,7 @@ SuperscalarHash programs are generated to maximize the usage of all 3 execution Program generation is complete when one of two conditions is met: 1. An instruction is scheduled for execution on cycle that is equal to or greater than `RANDOMX_SUPERSCALAR_LATENCY` -1. The number of generated instructions reaches `RANDOMX_SUPERSCALAR_MAX_SIZE` +1. The number of generated instructions reaches `3 * RANDOMX_SUPERSCALAR_LATENCY + 2`. #### 6.3.1 Decoding stage diff --git a/src/asm/configuration.asm b/src/asm/configuration.asm index 47feeed..f2f2069 100644 --- a/src/asm/configuration.asm +++ b/src/asm/configuration.asm @@ -5,7 +5,6 @@ RANDOMX_ARGON_LANES EQU 1t RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03"> RANDOMX_CACHE_ACCESSES EQU 8t RANDOMX_SUPERSCALAR_LATENCY EQU 170t -RANDOMX_SUPERSCALAR_MAX_SIZE EQU 512t RANDOMX_DATASET_BASE_SIZE EQU 2147483648t RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t RANDOMX_PROGRAM_SIZE EQU 256t diff --git a/src/common.hpp b/src/common.hpp index 9df86f5..08fe7f3 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -37,7 +37,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { + static_assert(RANDOMX_ARGON_MEMORY > 0, "RANDOMX_ARGON_MEMORY must be greater than 0."); static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); + static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64."); static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2."); static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296."); static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); @@ -48,8 +50,10 @@ namespace randomx { static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2."); static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2."); static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); + static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64."); static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); + static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0"); static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0."); static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0."); static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16."); @@ -64,8 +68,10 @@ namespace randomx { static_assert(wtSum == 256, "Sum of instruction frequencies must be 256."); + constexpr int ArgonBlockSize = 1024; - constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; + constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1; + constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2; constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1); @@ -76,6 +82,15 @@ namespace randomx { constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET; constexpr int StoreL3Condition = 14; + //Prevent some unsafe configurations. +#ifndef RANDOMX_UNSAFE + static_assert(RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY * ArgonBlockSize + 33554432 >= RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs"); + static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes"); + static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies"); + static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy"); + static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead"); +#endif + #ifdef TRACE constexpr bool trace = true; #else diff --git a/src/configuration.h b/src/configuration.h index 144ac99..006d07e 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -31,10 +31,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //Cache size in KiB. Must be a power of 2. #define RANDOMX_ARGON_MEMORY 262144 -//Number of Argon2d iterations for Cache initialization +//Number of Argon2d iterations for Cache initialization. #define RANDOMX_ARGON_ITERATIONS 3 -//Number of parallel lanes for Cache initialization +//Number of parallel lanes for Cache initialization. #define RANDOMX_ARGON_LANES 1 //Argon2d salt @@ -46,22 +46,19 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //Target latency for SuperscalarHash (in cycles of the reference CPU). #define RANDOMX_SUPERSCALAR_LATENCY 170 -//The maximum size of a SuperscalarHash program (number of instructions). -#define RANDOMX_SUPERSCALAR_MAX_SIZE 512 - //Dataset base size in bytes. Must be a power of 2. #define RANDOMX_DATASET_BASE_SIZE 2147483648 //Dataset extra size. Must be divisible by 64. #define RANDOMX_DATASET_EXTRA_SIZE 33554368 -//Number of instructions in a RandomX program +//Number of instructions in a RandomX program. Must be divisible by 8. #define RANDOMX_PROGRAM_SIZE 256 -//Number of iterations during VM execution +//Number of iterations during VM execution. #define RANDOMX_PROGRAM_ITERATIONS 2048 -//Number of chained VM executions per hash +//Number of chained VM executions per hash. #define RANDOMX_PROGRAM_COUNT 8 //Scratchpad L3 size in bytes. Must be a power of 2. @@ -70,13 +67,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3. #define RANDOMX_SCRATCHPAD_L2 262144 -//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. +//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2. #define RANDOMX_SCRATCHPAD_L1 16384 //Jump condition mask size in bits. #define RANDOMX_JUMP_BITS 8 -//Jump condition mask offset in bits. +//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16. #define RANDOMX_JUMP_OFFSET 8 /* @@ -84,6 +81,7 @@ Instruction frequencies (per 256 opcodes) Total sum of frequencies must be 256 */ +//Integer instructions #define RANDOMX_FREQ_IADD_RS 25 #define RANDOMX_FREQ_IADD_M 7 #define RANDOMX_FREQ_ISUB_R 16 @@ -102,6 +100,7 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_IROL_R 0 #define RANDOMX_FREQ_ISWAP_R 4 +//Floating point instructions #define RANDOMX_FREQ_FSWAP_R 8 #define RANDOMX_FREQ_FADD_R 20 #define RANDOMX_FREQ_FADD_M 5 @@ -112,11 +111,14 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_FDIV_M 4 #define RANDOMX_FREQ_FSQRT_R 6 +//Control instructions #define RANDOMX_FREQ_CBRANCH 16 #define RANDOMX_FREQ_CFROUND 1 +//Store instruction #define RANDOMX_FREQ_ISTORE 16 +//No-op instruction #define RANDOMX_FREQ_NOP 0 /* ------ 256 diff --git a/src/superscalar.cpp b/src/superscalar.cpp index 245641b..73c7571 100644 --- a/src/superscalar.cpp +++ b/src/superscalar.cpp @@ -673,8 +673,8 @@ namespace randomx { //Each decode cycle decodes 16 bytes of x86 code. //Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always //saturated first. The cycle limit is present only to guarantee loop termination. - //Program size is limited to RANDOMX_SUPERSCALAR_MAX_SIZE instructions. - for (decodeCycle = 0; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY && !portsSaturated && programSize < RANDOMX_SUPERSCALAR_MAX_SIZE; ++decodeCycle) { + //Program size is limited to SuperscalarMaxSize instructions. + for (decodeCycle = 0; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY && !portsSaturated && programSize < SuperscalarMaxSize; ++decodeCycle) { //select a decode configuration decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen); @@ -688,7 +688,7 @@ namespace randomx { //if we have issued all macro-ops for the current RandomX instruction, create a new instruction if (macroOpIndex >= currentInstruction.getInfo().getSize()) { - if (portsSaturated || programSize >= RANDOMX_SUPERSCALAR_MAX_SIZE) + if (portsSaturated || programSize >= SuperscalarMaxSize) break; //select an instruction so that the first macro-op fits into the current slot currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0); diff --git a/src/superscalar_program.hpp b/src/superscalar_program.hpp index fd6c712..145006a 100644 --- a/src/superscalar_program.hpp +++ b/src/superscalar_program.hpp @@ -56,7 +56,7 @@ namespace randomx { addrReg = val; } - Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE]; + Instruction programBuffer[SuperscalarMaxSize]; uint32_t size; int addrReg; double ipc; From d660798b9f53c73f90494b9af39c5177a4f25407 Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Mon, 10 Jun 2019 15:58:12 +0200 Subject: [PATCH 2/6] Fix cmake standalone build (#56) --- CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index fec0cda..3c24849 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,8 @@ # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +cmake_minimum_required(VERSION 2.8.7) + set (randomx_sources src/aes_hash.cpp src/argon2_ref.c @@ -48,6 +50,14 @@ src/virtual_machine.cpp src/vm_compiled_light.cpp src/blake2/blake2b.c) +if (NOT ARCH_ID) + set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR}) +endif() + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + if (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") list(APPEND randomx_sources src/jit_compiler_x86_static.S From cc2551b02ba4c81724571125149b852aed40ecfa Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Mon, 10 Jun 2019 15:58:51 +0200 Subject: [PATCH 3/6] Support building a shared library (#53) --- randomx.sln | 10 ++ src/randomx.h | 30 ++-- vcxproj/randomx-dll.vcxproj | 211 ++++++++++++++++++++++++++++ vcxproj/randomx-dll.vcxproj.filters | 173 +++++++++++++++++++++++ 4 files changed, 411 insertions(+), 13 deletions(-) create mode 100644 vcxproj/randomx-dll.vcxproj create mode 100644 vcxproj/randomx-dll.vcxproj.filters diff --git a/randomx.sln b/randomx.sln index bbe9986..3da2b50 100644 --- a/randomx.sln +++ b/randomx.sln @@ -29,6 +29,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "perf-simulation", "vcxproj\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "runtime-distr", "vcxproj\runtime-distr.vcxproj", "{F207EC8C-C55F-46C0-8851-887A71574F54}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "randomx-dll", "vcxproj\randomx-dll.vcxproj", "{59560AD8-18E3-463E-A941-BBD808EC7C83}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -133,6 +135,14 @@ Global {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x64.Build.0 = Release|x64 {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x86.ActiveCfg = Release|Win32 {F207EC8C-C55F-46C0-8851-887A71574F54}.Release|x86.Build.0 = Release|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x64.ActiveCfg = Debug|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x64.Build.0 = Debug|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x86.ActiveCfg = Debug|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Debug|x86.Build.0 = Debug|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.ActiveCfg = Release|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x64.Build.0 = Release|x64 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.ActiveCfg = Release|Win32 + {59560AD8-18E3-463E-A941-BBD808EC7C83}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/randomx.h b/src/randomx.h index c1de61f..8f9b30c 100644 --- a/src/randomx.h +++ b/src/randomx.h @@ -34,6 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RANDOMX_HASH_SIZE 32 #define RANDOMX_DATASET_ITEM_SIZE 64 +#ifndef RANDOMX_EXPORT +#define RANDOMX_EXPORT +#endif + typedef enum { RANDOMX_FLAG_DEFAULT = 0, RANDOMX_FLAG_LARGE_PAGES = 1, @@ -62,7 +66,7 @@ extern "C" { * NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT * is set and JIT compilation is not supported on the current platform. */ -randomx_cache *randomx_alloc_cache(randomx_flags flags); +RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags); /** * Initializes the cache memory and SuperscalarHash using the provided key value. @@ -71,14 +75,14 @@ randomx_cache *randomx_alloc_cache(randomx_flags flags); * @param key is a pointer to memory which contains the key value. Must not be NULL. * @param keySize is the number of bytes of the key. */ -void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize); +RANDOMX_EXPORT void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize); /** * Releases all memory occupied by the randomx_cache structure. * * @param cache is a pointer to a previously allocated randomx_cache structure. */ -void randomx_release_cache(randomx_cache* cache); +RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache); /** * Creates a randomx_dataset structure and allocates memory for RandomX Dataset. @@ -89,14 +93,14 @@ void randomx_release_cache(randomx_cache* cache); * @return Pointer to an allocated randomx_dataset structure. * NULL is returned if memory allocation fails. */ -randomx_dataset *randomx_alloc_dataset(randomx_flags flags); +RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags); /** * Gets the number of items contained in the dataset. * * @return the number of items contained in the dataset. */ -unsigned long randomx_dataset_item_count(void); +RANDOMX_EXPORT unsigned long randomx_dataset_item_count(void); /** * Initializes dataset items. @@ -109,7 +113,7 @@ unsigned long randomx_dataset_item_count(void); * @param startItem is the item number where intialization should start. * @param itemCount is the number of items that should be initialized. */ -void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount); +RANDOMX_EXPORT void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount); /** * Returns a pointer to the internal memory buffer of the dataset structure. The size @@ -119,14 +123,14 @@ void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsign * * @return Pointer to the internal memory buffer of the dataset structure. */ -void *randomx_get_dataset_memory(randomx_dataset *dataset); +RANDOMX_EXPORT void *randomx_get_dataset_memory(randomx_dataset *dataset); /** * Releases all memory occupied by the randomx_dataset structure. * * @param dataset is a pointer to a previously allocated randomx_dataset structure. */ -void randomx_release_dataset(randomx_dataset *dataset); +RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset); /** * Creates and initializes a RandomX virtual machine. @@ -151,7 +155,7 @@ void randomx_release_dataset(randomx_dataset *dataset); * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set */ -randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset); +RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset); /** * Reinitializes a virtual machine with a new Cache. This function should be called anytime @@ -161,7 +165,7 @@ randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx * without RANDOMX_FLAG_FULL_MEM. Must not be NULL. * @param cache is a pointer to an initialized randomx_cache structure. Must not be NULL. */ -void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache); +RANDOMX_EXPORT void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache); /** * Reinitializes a virtual machine with a new Dataset. @@ -170,14 +174,14 @@ void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache); * with RANDOMX_FLAG_FULL_MEM. Must not be NULL. * @param dataset is a pointer to an initialized randomx_dataset structure. Must not be NULL. */ -void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset); +RANDOMX_EXPORT void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset); /** * Releases all memory occupied by the randomx_vm structure. * * @param machine is a pointer to a previously created randomx_vm structure. */ -void randomx_destroy_vm(randomx_vm *machine); +RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine); /** * Calculates a RandomX hash value. @@ -188,7 +192,7 @@ void randomx_destroy_vm(randomx_vm *machine); * @param output is a pointer to memory where the hash will be stored. Must not * be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing. */ -void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output); +RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output); #if defined(__cplusplus) } diff --git a/vcxproj/randomx-dll.vcxproj b/vcxproj/randomx-dll.vcxproj new file mode 100644 index 0000000..e0cf2f3 --- /dev/null +++ b/vcxproj/randomx-dll.vcxproj @@ -0,0 +1,211 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 15.0 + {59560AD8-18E3-463E-A941-BBD808EC7C83} + Win32Proj + randomxdll + 10.0.17763.0 + + + + DynamicLibrary + true + v141 + Unicode + + + DynamicLibrary + false + v141 + true + Unicode + + + DynamicLibrary + true + v141 + Unicode + + + DynamicLibrary + false + v141 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + false + + + true + + + true + + + false + randomx + + + + Use + Level3 + MaxSpeed + true + true + true + WIN32;NDEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + + + Windows + true + true + true + + + + + Use + Level3 + Disabled + true + WIN32;_DEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + + + Windows + true + + + + + Use + Level3 + Disabled + true + _DEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + + + Windows + true + + + + + NotUsing + Level3 + MaxSpeed + true + true + false + NDEBUG;RANDOMXDLL_EXPORTS;_WINDOWS;_USRDLL;RANDOMX_EXPORT=__declspec(dllexport) + true + + + Windows + true + true + true + + + + + + + \ No newline at end of file diff --git a/vcxproj/randomx-dll.vcxproj.filters b/vcxproj/randomx-dll.vcxproj.filters new file mode 100644 index 0000000..a30fa8e --- /dev/null +++ b/vcxproj/randomx-dll.vcxproj.filters @@ -0,0 +1,173 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file From eb6f6bb04193f9ca306574c81bae7e55ea977c78 Mon Sep 17 00:00:00 2001 From: hyc Date: Mon, 10 Jun 2019 14:59:25 +0100 Subject: [PATCH 4/6] Fix Windows detection (#58) --- src/jit_compiler_x86_static.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S index 8e921fa..eab1b4c 100644 --- a/src/jit_compiler_x86_static.S +++ b/src/jit_compiler_x86_static.S @@ -33,6 +33,10 @@ #define DECL(x) x #endif +#if defined(__WIN32__) || defined(__CYGWIN__) +#define WINABI +#endif + .global DECL(randomx_program_prologue) .global DECL(randomx_program_loop_begin) .global DECL(randomx_program_loop_load) @@ -61,7 +65,7 @@ .balign 64 DECL(randomx_program_prologue): -#if defined(__CYGWIN__) +#if defined(WINABI) #include "asm/program_prologue_win64.inc" #else #include "asm/program_prologue_linux.inc" @@ -107,7 +111,7 @@ DECL(randomx_dataset_init): push r13 push r14 push r15 -#if defined(__CYGWIN__) +#if defined(WINABI) push rdi push rsi mov rdi, qword ptr [rcx] ;# cache->memory @@ -140,7 +144,7 @@ call_offset: cmp rbp, qword ptr [rsp] jb init_block_loop pop rax -#if defined(__CYGWIN__) +#if defined(WINABI) pop rsi pop rdi #endif @@ -155,7 +159,7 @@ call_offset: .balign 64 DECL(randomx_program_epilogue): #include "asm/program_epilogue_store.inc" -#if defined(__CYGWIN__) +#if defined(WINABI) #include "asm/program_epilogue_win64.inc" #else #include "asm/program_epilogue_linux.inc" @@ -200,7 +204,7 @@ DECL(randomx_program_end): nop DECL(randomx_reciprocal_fast): -#if !defined(__CYGWIN__) +#if !defined(WINABI) mov rcx, rdi #endif #include "asm/randomx_reciprocal.inc" From 52aa36249e679e787ffe8b3abbb7edc7dcb899ea Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Mon, 10 Jun 2019 16:00:04 +0200 Subject: [PATCH 5/6] Add Dataset prefetch in interpreted VM (#52) - to formally match the specification - a small increase in interpreted mining speed (~4%) --- src/vm_interpreted.cpp | 6 ++++++ src/vm_interpreted.hpp | 1 + src/vm_interpreted_light.hpp | 1 + 3 files changed, 8 insertions(+) diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 25dc466..dd5f217 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -246,6 +246,7 @@ namespace randomx { mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; mem.mx &= CacheLineAlignMask; + datasetPrefetch(datasetOffset + mem.mx); datasetRead(datasetOffset + mem.ma, r); std::swap(mem.mx, mem.ma); @@ -279,6 +280,11 @@ namespace randomx { r[i] ^= datasetLine[i]; } + template + void InterpretedVm::datasetPrefetch(uint64_t address) { + rx_prefetch_nta(mem.memory + address); + } + #include "instruction_weights.hpp" template diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index 9fb137c..dfa1ba7 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -81,6 +81,7 @@ namespace randomx { void setDataset(randomx_dataset* dataset) override; protected: virtual void datasetRead(uint64_t blockNumber, int_reg_t(&r)[RegistersCount]); + virtual void datasetPrefetch(uint64_t blockNumber); private: void execute(); void precompileProgram(int_reg_t(&r)[RegistersCount], rx_vec_f128(&f)[RegisterCountFlt], rx_vec_f128(&e)[RegisterCountFlt], rx_vec_f128(&a)[RegisterCountFlt]); diff --git a/src/vm_interpreted_light.hpp b/src/vm_interpreted_light.hpp index f3cc2c6..02d678f 100644 --- a/src/vm_interpreted_light.hpp +++ b/src/vm_interpreted_light.hpp @@ -51,6 +51,7 @@ namespace randomx { void setCache(randomx_cache* cache) override; protected: void datasetRead(uint64_t address, int_reg_t(&r)[8]) override; + void datasetPrefetch(uint64_t address) override { } }; using InterpretedLightVmDefault = InterpretedLightVm, true>; From 447634f51d5584cbb4aea95e7f8ddce364a70ae8 Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Mon, 10 Jun 2019 16:02:25 +0200 Subject: [PATCH 6/6] Use strongly typed enums (#55) --- src/assembly_generator_x86.cpp | 14 ++++++-- src/dataset.cpp | 2 +- src/instruction.hpp | 64 +++++++++++++++++----------------- src/jit_compiler_x86.cpp | 2 +- src/superscalar.cpp | 56 ++++++++++++++--------------- src/superscalar.hpp | 35 +++++++++---------- src/vm_interpreted.hpp | 4 ++- 7 files changed, 94 insertions(+), 83 deletions(-) diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index b9866d6..645fd9d 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -67,10 +67,12 @@ namespace randomx { void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) { asmCode.str(std::string()); //clear +#ifdef RANDOMX_ALIGN asmCode << "ALIGN 16" << std::endl; +#endif for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { case SuperscalarInstructionType::ISUB_R: asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; @@ -95,19 +97,27 @@ namespace randomx { break; case SuperscalarInstructionType::IADD_C8: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "nop" << std::endl; +#endif break; case SuperscalarInstructionType::IXOR_C8: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "nop" << std::endl; +#endif break; case SuperscalarInstructionType::IADD_C9: asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "xchg ax, ax ;nop" << std::endl; +#endif break; case SuperscalarInstructionType::IXOR_C9: asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; +#ifdef RANDOMX_ALIGN asmCode << "xchg ax, ax ;nop" << std::endl; +#endif break; case SuperscalarInstructionType::IMULH_R: asmCode << "mov rax, " << regR[instr.dst] << std::endl; @@ -179,7 +189,7 @@ namespace randomx { asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl; for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { case SuperscalarInstructionType::ISUB_R: asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; diff --git a/src/dataset.cpp b/src/dataset.cpp index 193d49a..e382fd0 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -125,7 +125,7 @@ namespace randomx { randomx::generateSuperscalar(cache->programs[i], gen); for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) { auto& instr = cache->programs[i](j); - if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) { + if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) { auto rcp = randomx_reciprocal(instr.getImm32()); instr.setImm32(cache->reciprocalCache.size()); cache->reciprocalCache.push_back(rcp); diff --git a/src/instruction.hpp b/src/instruction.hpp index 44b2c48..b1863b5 100644 --- a/src/instruction.hpp +++ b/src/instruction.hpp @@ -39,38 +39,38 @@ namespace randomx { typedef void(Instruction::*InstructionFormatter)(std::ostream&) const; - namespace InstructionType { - constexpr int IADD_RS = 0; - constexpr int IADD_M = 1; - constexpr int ISUB_R = 2; - constexpr int ISUB_M = 3; - constexpr int IMUL_R = 4; - constexpr int IMUL_M = 5; - constexpr int IMULH_R = 6; - constexpr int IMULH_M = 7; - constexpr int ISMULH_R = 8; - constexpr int ISMULH_M = 9; - constexpr int IMUL_RCP = 10; - constexpr int INEG_R = 11; - constexpr int IXOR_R = 12; - constexpr int IXOR_M = 13; - constexpr int IROR_R = 14; - constexpr int IROL_R = 15; - constexpr int ISWAP_R = 16; - constexpr int FSWAP_R = 17; - constexpr int FADD_R = 18; - constexpr int FADD_M = 19; - constexpr int FSUB_R = 20; - constexpr int FSUB_M = 21; - constexpr int FSCAL_R = 22; - constexpr int FMUL_R = 23; - constexpr int FDIV_M = 24; - constexpr int FSQRT_R = 25; - constexpr int CBRANCH = 26; - constexpr int CFROUND = 27; - constexpr int ISTORE = 28; - constexpr int NOP = 29; - } + enum class InstructionType : uint16_t { + IADD_RS = 0, + IADD_M = 1, + ISUB_R = 2, + ISUB_M = 3, + IMUL_R = 4, + IMUL_M = 5, + IMULH_R = 6, + IMULH_M = 7, + ISMULH_R = 8, + ISMULH_M = 9, + IMUL_RCP = 10, + INEG_R = 11, + IXOR_R = 12, + IXOR_M = 13, + IROR_R = 14, + IROL_R = 15, + ISWAP_R = 16, + FSWAP_R = 17, + FADD_R = 18, + FADD_M = 19, + FSUB_R = 20, + FSUB_M = 21, + FSCAL_R = 22, + FMUL_R = 23, + FDIV_M = 24, + FSQRT_R = 25, + CBRANCH = 26, + CFROUND = 27, + ISTORE = 28, + NOP = 29, + }; class Instruction { public: diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index bb2ae76..054a171 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -306,7 +306,7 @@ namespace randomx { } void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector &reciprocalCache) { - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { case randomx::SuperscalarInstructionType::ISUB_R: emit(REX_SUB_RR); diff --git a/src/superscalar.cpp b/src/superscalar.cpp index 73c7571..39d772f 100644 --- a/src/superscalar.cpp +++ b/src/superscalar.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { - static bool isMultiplication(int type) { + static bool isMultiplication(SuperscalarInstructionType type) { return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; } @@ -167,7 +167,7 @@ namespace randomx { const MacroOp& getOp(int index) const { return ops_[index]; } - int getType() const { + SuperscalarInstructionType getType() const { return type_; } int getResultOp() const { @@ -196,7 +196,7 @@ namespace randomx { static const SuperscalarInstructionInfo NOP; private: const char* name_; - int type_; + SuperscalarInstructionType type_; std::vector ops_; int latency_; int resultOp_ = 0; @@ -204,13 +204,13 @@ namespace randomx { int srcOp_; SuperscalarInstructionInfo(const char* name) - : name_(name), type_(-1), latency_(0) {} - SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp) + : name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {} + SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp& op, int srcOp) : name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) { ops_.push_back(MacroOp(op)); } template - SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) + SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) : name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) { for (unsigned i = 0; i < N; ++i) { ops_.push_back(MacroOp(arr[i])); @@ -267,7 +267,7 @@ namespace randomx { const char* getName() const { return name_; } - const DecoderBuffer* fetchNext(int instrType, int cycle, int mulCount, Blake2Generator& gen) const { + const DecoderBuffer* fetchNext(SuperscalarInstructionType instrType, int cycle, int mulCount, Blake2Generator& gen) const { //If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10 //because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs. //Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops. @@ -345,9 +345,9 @@ namespace randomx { class RegisterInfo { public: - RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {} + RegisterInfo() : latency(0), lastOpGroup(SuperscalarInstructionType::INVALID), lastOpPar(-1), value(0) {} int latency; - int lastOpGroup; + SuperscalarInstructionType lastOpGroup; int lastOpPar; int value; }; @@ -356,7 +356,7 @@ namespace randomx { class SuperscalarInstruction { public: void toInstr(Instruction& instr) { //translate to a RandomX instruction format - instr.opcode = getType(); + instr.opcode = (int)getType(); instr.dst = dst_; instr.src = src_ >= 0 ? src_ : dst_; instr.setMod(mod_); @@ -534,7 +534,7 @@ namespace randomx { return false; } - int getType() { + SuperscalarInstructionType getType() { return info_->getType(); } int getSource() { @@ -543,7 +543,7 @@ namespace randomx { int getDestination() { return dst_; } - int getGroup() { + SuperscalarInstructionType getGroup() { return opGroup_; } int getGroupPar() { @@ -562,7 +562,7 @@ namespace randomx { int dst_ = -1; int mod_; uint32_t imm32_; - int opGroup_; + SuperscalarInstructionType opGroup_; int opGroupPar_; bool canReuse_ = false; bool groupParIsSource_ = false; @@ -849,40 +849,40 @@ namespace randomx { void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals) { for (unsigned j = 0; j < prog.getSize(); ++j) { Instruction& instr = prog(j); - switch (instr.opcode) + switch ((SuperscalarInstructionType)instr.opcode) { - case randomx::SuperscalarInstructionType::ISUB_R: + case SuperscalarInstructionType::ISUB_R: r[instr.dst] -= r[instr.src]; break; - case randomx::SuperscalarInstructionType::IXOR_R: + case SuperscalarInstructionType::IXOR_R: r[instr.dst] ^= r[instr.src]; break; - case randomx::SuperscalarInstructionType::IADD_RS: + case SuperscalarInstructionType::IADD_RS: r[instr.dst] += r[instr.src] << instr.getModShift(); break; - case randomx::SuperscalarInstructionType::IMUL_R: + case SuperscalarInstructionType::IMUL_R: r[instr.dst] *= r[instr.src]; break; - case randomx::SuperscalarInstructionType::IROR_C: + case SuperscalarInstructionType::IROR_C: r[instr.dst] = rotr(r[instr.dst], instr.getImm32()); break; - case randomx::SuperscalarInstructionType::IADD_C7: - case randomx::SuperscalarInstructionType::IADD_C8: - case randomx::SuperscalarInstructionType::IADD_C9: + case SuperscalarInstructionType::IADD_C7: + case SuperscalarInstructionType::IADD_C8: + case SuperscalarInstructionType::IADD_C9: r[instr.dst] += signExtend2sCompl(instr.getImm32()); break; - case randomx::SuperscalarInstructionType::IXOR_C7: - case randomx::SuperscalarInstructionType::IXOR_C8: - case randomx::SuperscalarInstructionType::IXOR_C9: + case SuperscalarInstructionType::IXOR_C7: + case SuperscalarInstructionType::IXOR_C8: + case SuperscalarInstructionType::IXOR_C9: r[instr.dst] ^= signExtend2sCompl(instr.getImm32()); break; - case randomx::SuperscalarInstructionType::IMULH_R: + case SuperscalarInstructionType::IMULH_R: r[instr.dst] = mulh(r[instr.dst], r[instr.src]); break; - case randomx::SuperscalarInstructionType::ISMULH_R: + case SuperscalarInstructionType::ISMULH_R: r[instr.dst] = smulh(r[instr.dst], r[instr.src]); break; - case randomx::SuperscalarInstructionType::IMUL_RCP: + case SuperscalarInstructionType::IMUL_RCP: if (reciprocals != nullptr) r[instr.dst] *= (*reciprocals)[instr.getImm32()]; else diff --git a/src/superscalar.hpp b/src/superscalar.hpp index 96360aa..2e55533 100644 --- a/src/superscalar.hpp +++ b/src/superscalar.hpp @@ -35,25 +35,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace randomx { // Intel Ivy Bridge reference - namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size - constexpr int ISUB_R = 0; //1 p015 1 3 (sub) - constexpr int IXOR_R = 1; //1 p015 1 3 (xor) - constexpr int IADD_RS = 2; //1 p01 1 4 (lea) - constexpr int IMUL_R = 3; //1 p1 3 4 (imul) - constexpr int IROR_C = 4; //1 p05 1 4 (ror) - constexpr int IADD_C7 = 5; //1 p015 1 7 (add) - constexpr int IXOR_C7 = 6; //1 p015 1 7 (xor) - constexpr int IADD_C8 = 7; //1+0 p015 1 7+1 (add+nop) - constexpr int IXOR_C8 = 8; //1+0 p015 1 7+1 (xor+nop) - constexpr int IADD_C9 = 9; //1+0 p015 1 7+2 (add+nop) - constexpr int IXOR_C9 = 10; //1+0 p015 1 7+2 (xor+nop) - constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) - constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) - constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 (mov+imul) + enum class SuperscalarInstructionType { //uOPs (decode) execution ports latency code size + ISUB_R = 0, //1 p015 1 3 (sub) + IXOR_R = 1, //1 p015 1 3 (xor) + IADD_RS = 2, //1 p01 1 4 (lea) + IMUL_R = 3, //1 p1 3 4 (imul) + IROR_C = 4, //1 p05 1 4 (ror) + IADD_C7 = 5, //1 p015 1 7 (add) + IXOR_C7 = 6, //1 p015 1 7 (xor) + IADD_C8 = 7, //1+0 p015 1 7+1 (add+nop) + IXOR_C8 = 8, //1+0 p015 1 7+1 (xor+nop) + IADD_C9 = 9, //1+0 p015 1 7+2 (add+nop) + IXOR_C9 = 10, //1+0 p015 1 7+2 (xor+nop) + IMULH_R = 11, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) + ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) + IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul) - constexpr int COUNT = 14; - constexpr int INVALID = -1; - } + INVALID = -1 + }; void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); void executeSuperscalar(uint64_t(&r)[8], SuperscalarProgram& prog, std::vector *reciprocals = nullptr); diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index dfa1ba7..25795a6 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -50,7 +50,7 @@ namespace randomx { uint64_t imm; int64_t simm; }; - uint16_t type; + InstructionType type; union { int16_t target; uint16_t shift; @@ -58,6 +58,8 @@ namespace randomx { uint32_t memMask; }; + static_assert(sizeof(InstructionByteCode) == 32, "Invalid packing of struct InstructionByteCode"); + template class InterpretedVm : public VmBase { public: