diff --git a/doc/program.asm b/doc/program.asm index d0cbe1d..d3b9cab 100644 --- a/doc/program.asm +++ b/doc/program.asm @@ -18,8 +18,8 @@ randomx_isn_5: xchg r12, r8 randomx_isn_6: ; CBRANCH -188214077, COND 5 - add r9, -188214045 - test r9, 8160 + add r9, -188209981 + test r9, 2088960 jz randomx_isn_0 randomx_isn_7: ; ISTORE L3[r0-784322734], r3 @@ -52,13 +52,13 @@ randomx_isn_12: imul r15, r12 randomx_isn_13: ; CBRANCH 179989705, COND 3 - add r8, 179989705 - test r8, 2040 + add r8, 179988681 + test r8, 522240 jz randomx_isn_7 randomx_isn_14: ; CBRANCH 1801296358, COND 3 - add r10, 1801296366 - test r10, 2040 + add r10, 1801296358 + test r10, 522240 jz randomx_isn_14 randomx_isn_15: ; IADD_RS r6, r2, SHFT 3 @@ -80,8 +80,8 @@ randomx_isn_19: mulpd xmm5, xmm10 randomx_isn_20: ; CBRANCH 1593588996, COND 3 - add r11, 1593589004 - test r11, 2040 + add r11, 1593587972 + test r11, 522240 jz randomx_isn_15 randomx_isn_21: ; IROR_R r7, r2 @@ -102,7 +102,7 @@ randomx_isn_23: randomx_isn_24: ; CBRANCH 149087159, COND 13 add r12, 149087159 - test r12, 2088960 + test r12, 534773760 jz randomx_isn_21 randomx_isn_25: ; FADD_R f3, a0 @@ -208,8 +208,8 @@ randomx_isn_50: subpd xmm3, xmm8 randomx_isn_51: ; CBRANCH -1975981803, COND 14 - add r9, -1975981803 - test r9, 4177920 + add r9, -1973884651 + test r9, 1069547520 jz randomx_isn_25 randomx_isn_52: ; IADD_M r1, L3[1622792] @@ -219,8 +219,8 @@ randomx_isn_53: subpd xmm2, xmm8 randomx_isn_54: ; CBRANCH 1917049931, COND 12 - add r13, 1917049931 - test r13, 1044480 + add r13, 1918098507 + test r13, 267386880 jz randomx_isn_52 randomx_isn_55: ; IXOR_R r2, r3 @@ -249,7 +249,7 @@ randomx_isn_61: randomx_isn_62: ; CBRANCH 1111898647, COND 1 add r14, 1111898647 - test r14, 510 + test r14, 130560 jz randomx_isn_55 randomx_isn_63: ; IMUL_R r6, r5 @@ -288,8 +288,8 @@ randomx_isn_73: mulpd xmm4, xmm8 randomx_isn_74: ; CBRANCH -1200328848, COND 4 - add r15, -1200328848 - test r15, 4080 + add r15, -1200326800 + test r15, 1044480 jz randomx_isn_63 randomx_isn_75: ; FSQRT_R e0 @@ -346,8 +346,8 @@ randomx_isn_88: imul r9, qword ptr [rsi+rax] randomx_isn_89: ; CBRANCH -122257389, COND 13 - add r8, -122249197 - test r8, 2088960 + add r8, -123305965 + test r8, 534773760 jz randomx_isn_75 randomx_isn_90: ; ISTORE L1[r5+228116180], r7 @@ -481,8 +481,8 @@ randomx_isn_122: subpd xmm0, xmm9 randomx_isn_123: ; CBRANCH 269211216, COND 3 - add r9, 269211224 - test r9, 2040 + add r9, 269212240 + test r9, 522240 jz randomx_isn_100 randomx_isn_124: ; FSUB_M f2, L1[r6-1615966581] @@ -564,8 +564,8 @@ randomx_isn_142: addpd xmm1, xmm8 randomx_isn_143: ; CBRANCH 880467599, COND 5 - add r14, 880467631 - test r14, 8160 + add r14, 880471695 + test r14, 2088960 jz randomx_isn_124 randomx_isn_144: ; FMUL_R e1, a1 @@ -585,8 +585,8 @@ randomx_isn_147: add r9, qword ptr [rsi+rax] randomx_isn_148: ; CBRANCH -1843326985, COND 14 - add r10, -1843310601 - test r10, 4177920 + add r10, -1841229833 + test r10, 1069547520 jz randomx_isn_144 randomx_isn_149: ; IADD_RS r4, r3, SHFT 2 @@ -655,8 +655,8 @@ randomx_isn_163: shufpd xmm3, xmm3, 1 randomx_isn_164: ; CBRANCH -2107581963, COND 4 - add r11, -2107581963 - test r11, 4080 + add r11, -2107584011 + test r11, 1044480 jz randomx_isn_149 randomx_isn_165: ; FSUB_R f1, a2 @@ -720,8 +720,8 @@ randomx_isn_180: subpd xmm3, xmm9 randomx_isn_181: ; CBRANCH 556152230, COND 12 - add r12, 556152230 - test r12, 1044480 + add r12, 557200806 + test r12, 267386880 jz randomx_isn_165 randomx_isn_182: ; FSQRT_R e2 @@ -956,8 +956,8 @@ randomx_isn_246: imul r15, r10 randomx_isn_247: ; CBRANCH -8545330, COND 4 - add r8, -8545314 - test r8, 4080 + add r8, -8547378 + test r8, 1044480 jz randomx_isn_213 randomx_isn_248: ; ISTORE L1[r0+1951752498], r5 diff --git a/doc/specs.md b/doc/specs.md index 2d1ee20..2aab074 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -55,7 +55,8 @@ RandomX has several configurable parameters that are listed in Table 1.2.1 with |`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`| |`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`| |`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`| -|`RANDOMX_JUMP_BITS`|How many register bits must be zero for the CBRANCH instruction to jump|`8`| +|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`| +|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`| |`RANDOMX_SCRATCHPAD_L3`|Scratchpad L3 size in bytes|`2097152`| |`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`| |`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`| @@ -613,16 +614,28 @@ A register is considered as modified by an instruction in the following cases: There are 3 rules for the selection of the `creg` register, evaluated in this order: 1. The register with the lowest value of `lastUsed` tag is selected. -2. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected. -3. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.). +1. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected. +1. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.). Whenever a register is selected as the operand of a CBRANCH instruction, its `count` tag is increased by 1. -The CBRANCH instruction performs the following steps (`|` represents a bitwise OR operation, `&` is a bitwise AND operation): +The CBRANCH instruction performs the following steps: -1. A constant value of `imm32 | (1 << mod.cond)` is added to `creg`. -2. `conditionMask` is constructed as `RANDOMX_JUMP_BITS` one-bits shifted left by `mod.cond`. -3. If `creg & conditionMask` is zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified). +1. A constant `b` is calculated as `mod.cond + RANDOMX_JUMP_OFFSET`. +1. A constant `conditionImmediate` is constructed as sign-extended `imm32` with bit `b` set to 1 and bit `b-1` set to 0 (if `b > 0`). +1. `conditionImmediate` is added to `creg`. +1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of `creg` are zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified). + +Bits in immediate and register values are numbered from 0 to 63 with 0 being the least significant bit. For example, for `b = 10` and `RANDOMX_JUMP_BITS = 8`, the bits are arranged like this: + +``` +conditionImmediate = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMMMMMMMMMMMMMMMMMMMMM10MMMMMMMMM + creg = ..............................................XXXXXXXX.......... +``` + +`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value would be added to `creg`. + +The second line uses `X` to mark bits of `creg` that would be checked by the condition. If all these bits are 0 after adding `conditionImmediate`, the jump is executed. The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program. diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index 4492ec2..030ea60 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -532,8 +532,11 @@ namespace randomx { int reg = getConditionRegister(registerUsage); int target = registerUsage[reg].lastUsed + 1; registerUsage[reg].count++; - int shift = instr.getModCond(); - asmCode << "\tadd " << regR[reg] << ", " << (int32_t)(instr.getImm32() | (1 << shift)) << std::endl; + int shift = instr.getModCond() + ConditionOffset; + int32_t imm = instr.getImm32() | (1L << shift); + if (ConditionOffset > 0 || shift > 0) + imm &= ~(1L << (shift - 1)); + asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl; asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl; asmCode << "\tjz randomx_isn_" << target << std::endl; //mark all registers as used diff --git a/src/common.hpp b/src/common.hpp index 92a36c9..ccbd301 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -41,7 +41,9 @@ namespace randomx { static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); - static_assert(RANDOMX_JUMP_BITS >= 1 && RANDOMX_JUMP_BITS <= 16, "RANDOMX_JUMP_BITS must be an integer in the range 1-16."); + static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0."); + static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0."); + static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16."); constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ @@ -62,6 +64,7 @@ namespace randomx { constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1); + constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET; constexpr int StoreL3Condition = 14; #ifdef TRACE diff --git a/src/configuration.h b/src/configuration.h index 71f3932..c77ebb2 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -64,9 +64,12 @@ along with RandomX. If not, see. //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. #define RANDOMX_SCRATCHPAD_L1 (16 * 1024) -//How many register bits must be zero for CBRANCH instruction to jump. Must be an integer in the range 1-16. +//Jump condition mask size in bits. #define RANDOMX_JUMP_BITS 8 +//Jump condition mask offset in bits. +#define RANDOMX_JUMP_OFFSET 8 + /* Instruction frequencies (per 256 opcodes) Total sum of frequencies must be 256 diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 5182e8f..b8ae884 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -775,10 +775,13 @@ namespace randomx { int reg = getConditionRegister(registerUsage); int target = registerUsage[reg].lastUsed + 1; registerUsage[reg].count++; - int shift = instr.getModCond(); emit(REX_ADD_I); emitByte(0xc0 + reg); - emit32(instr.getImm32() | (1 << shift)); + int shift = instr.getModCond() + ConditionOffset; + uint32_t imm = instr.getImm32() | (1UL << shift); + if (ConditionOffset > 0 || shift > 0) + imm &= ~(1UL << (shift - 1)); + emit32(imm); emit(REX_TEST); emitByte(0xc0 + reg); emit32(ConditionMask << shift); diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 4f7a8f6..01ca0fd 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -229,7 +229,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: a15448785857f9a78703eb5da235dfe73d0d5fc4c8effaebe73869904f5af47d" << std::endl; + std::cout << "Reference result: 47452f6064db799ae580dd71fe0ebe221579cedf837fac7095f1c5edc07cf345" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 8d1afa4..b24a275 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -615,9 +615,11 @@ namespace randomx { ibc.isrc = &r[reg]; ibc.target = registerUsage[reg].lastUsed; registerUsage[reg].count++; - int shift = instr.getModCond(); - const uint64_t conditionMask = ConditionMask << instr.getModCond(); + int shift = instr.getModCond() + ConditionOffset; + const uint64_t conditionMask = ConditionMask << shift; ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); + if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2 + ibc.imm &= ~(1ULL << (shift - 1)); ibc.memMask = ConditionMask << shift; //mark all registers as used for (unsigned j = 0; j < RegistersCount; ++j) {