Fixed a chance of CBRANCH looping

Fixed CBRANCH jump probability being lower than expected
This commit is contained in:
tevador 2019-05-03 14:02:40 +02:00
parent 5241cb902e
commit 9e5eac8645
8 changed files with 73 additions and 46 deletions

View file

@ -18,8 +18,8 @@ randomx_isn_5:
xchg r12, r8
randomx_isn_6:
; CBRANCH -188214077, COND 5
add r9, -188214045
test r9, 8160
add r9, -188209981
test r9, 2088960
jz randomx_isn_0
randomx_isn_7:
; ISTORE L3[r0-784322734], r3
@ -52,13 +52,13 @@ randomx_isn_12:
imul r15, r12
randomx_isn_13:
; CBRANCH 179989705, COND 3
add r8, 179989705
test r8, 2040
add r8, 179988681
test r8, 522240
jz randomx_isn_7
randomx_isn_14:
; CBRANCH 1801296358, COND 3
add r10, 1801296366
test r10, 2040
add r10, 1801296358
test r10, 522240
jz randomx_isn_14
randomx_isn_15:
; IADD_RS r6, r2, SHFT 3
@ -80,8 +80,8 @@ randomx_isn_19:
mulpd xmm5, xmm10
randomx_isn_20:
; CBRANCH 1593588996, COND 3
add r11, 1593589004
test r11, 2040
add r11, 1593587972
test r11, 522240
jz randomx_isn_15
randomx_isn_21:
; IROR_R r7, r2
@ -102,7 +102,7 @@ randomx_isn_23:
randomx_isn_24:
; CBRANCH 149087159, COND 13
add r12, 149087159
test r12, 2088960
test r12, 534773760
jz randomx_isn_21
randomx_isn_25:
; FADD_R f3, a0
@ -208,8 +208,8 @@ randomx_isn_50:
subpd xmm3, xmm8
randomx_isn_51:
; CBRANCH -1975981803, COND 14
add r9, -1975981803
test r9, 4177920
add r9, -1973884651
test r9, 1069547520
jz randomx_isn_25
randomx_isn_52:
; IADD_M r1, L3[1622792]
@ -219,8 +219,8 @@ randomx_isn_53:
subpd xmm2, xmm8
randomx_isn_54:
; CBRANCH 1917049931, COND 12
add r13, 1917049931
test r13, 1044480
add r13, 1918098507
test r13, 267386880
jz randomx_isn_52
randomx_isn_55:
; IXOR_R r2, r3
@ -249,7 +249,7 @@ randomx_isn_61:
randomx_isn_62:
; CBRANCH 1111898647, COND 1
add r14, 1111898647
test r14, 510
test r14, 130560
jz randomx_isn_55
randomx_isn_63:
; IMUL_R r6, r5
@ -288,8 +288,8 @@ randomx_isn_73:
mulpd xmm4, xmm8
randomx_isn_74:
; CBRANCH -1200328848, COND 4
add r15, -1200328848
test r15, 4080
add r15, -1200326800
test r15, 1044480
jz randomx_isn_63
randomx_isn_75:
; FSQRT_R e0
@ -346,8 +346,8 @@ randomx_isn_88:
imul r9, qword ptr [rsi+rax]
randomx_isn_89:
; CBRANCH -122257389, COND 13
add r8, -122249197
test r8, 2088960
add r8, -123305965
test r8, 534773760
jz randomx_isn_75
randomx_isn_90:
; ISTORE L1[r5+228116180], r7
@ -481,8 +481,8 @@ randomx_isn_122:
subpd xmm0, xmm9
randomx_isn_123:
; CBRANCH 269211216, COND 3
add r9, 269211224
test r9, 2040
add r9, 269212240
test r9, 522240
jz randomx_isn_100
randomx_isn_124:
; FSUB_M f2, L1[r6-1615966581]
@ -564,8 +564,8 @@ randomx_isn_142:
addpd xmm1, xmm8
randomx_isn_143:
; CBRANCH 880467599, COND 5
add r14, 880467631
test r14, 8160
add r14, 880471695
test r14, 2088960
jz randomx_isn_124
randomx_isn_144:
; FMUL_R e1, a1
@ -585,8 +585,8 @@ randomx_isn_147:
add r9, qword ptr [rsi+rax]
randomx_isn_148:
; CBRANCH -1843326985, COND 14
add r10, -1843310601
test r10, 4177920
add r10, -1841229833
test r10, 1069547520
jz randomx_isn_144
randomx_isn_149:
; IADD_RS r4, r3, SHFT 2
@ -655,8 +655,8 @@ randomx_isn_163:
shufpd xmm3, xmm3, 1
randomx_isn_164:
; CBRANCH -2107581963, COND 4
add r11, -2107581963
test r11, 4080
add r11, -2107584011
test r11, 1044480
jz randomx_isn_149
randomx_isn_165:
; FSUB_R f1, a2
@ -720,8 +720,8 @@ randomx_isn_180:
subpd xmm3, xmm9
randomx_isn_181:
; CBRANCH 556152230, COND 12
add r12, 556152230
test r12, 1044480
add r12, 557200806
test r12, 267386880
jz randomx_isn_165
randomx_isn_182:
; FSQRT_R e2
@ -956,8 +956,8 @@ randomx_isn_246:
imul r15, r10
randomx_isn_247:
; CBRANCH -8545330, COND 4
add r8, -8545314
test r8, 4080
add r8, -8547378
test r8, 1044480
jz randomx_isn_213
randomx_isn_248:
; ISTORE L1[r0+1951752498], r5

View file

@ -55,7 +55,8 @@ RandomX has several configurable parameters that are listed in Table 1.2.1 with
|`RANDOMX_PROGRAM_SIZE`|The number of instructions in a RandomX program|`256`|
|`RANDOMX_PROGRAM_ITERATIONS`|The number of iterations per program|`2048`|
|`RANDOMX_PROGRAM_COUNT`|The number of programs per hash|`8`|
|`RANDOMX_JUMP_BITS`|How many register bits must be zero for the CBRANCH instruction to jump|`8`|
|`RANDOMX_JUMP_BITS`|Jump condition mask size in bits|`8`|
|`RANDOMX_JUMP_OFFSET`|Jump condition mask offset in bits|`8`|
|`RANDOMX_SCRATCHPAD_L3`|Scratchpad L3 size in bytes|`2097152`|
|`RANDOMX_SCRATCHPAD_L2`|Scratchpad L2 size in bytes|`262144`|
|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`|
@ -613,16 +614,28 @@ A register is considered as modified by an instruction in the following cases:
There are 3 rules for the selection of the `creg` register, evaluated in this order:
1. The register with the lowest value of `lastUsed` tag is selected.
2. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected.
3. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.).
1. In case multiple registers have the same value of the `lastUsed` tag, the register with the lowest value of the `count` tag is selected.
1. In case multiple registers have the same values of both `lastUsed` and `count` tags, a register with the lowest index is selected (`r0` before `r1` etc.).
Whenever a register is selected as the operand of a CBRANCH instruction, its `count` tag is increased by 1.
The CBRANCH instruction performs the following steps (`|` represents a bitwise OR operation, `&` is a bitwise AND operation):
The CBRANCH instruction performs the following steps:
1. A constant value of `imm32 | (1 << mod.cond)` is added to `creg`.
2. `conditionMask` is constructed as `RANDOMX_JUMP_BITS` one-bits shifted left by `mod.cond`.
3. If `creg & conditionMask` is zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified).
1. A constant `b` is calculated as `mod.cond + RANDOMX_JUMP_OFFSET`.
1. A constant `conditionImmediate` is constructed as sign-extended `imm32` with bit `b` set to 1 and bit `b-1` set to 0 (if `b > 0`).
1. `conditionImmediate` is added to `creg`.
1. If bits `b` to `b + RANDOMX_JUMP_BITS - 1` of `creg` are zero, execution jumps to instruction `creg.lastUsed + 1` (the instruction following the instruction where `creg` was last modified).
Bits in immediate and register values are numbered from 0 to 63 with 0 being the least significant bit. For example, for `b = 10` and `RANDOMX_JUMP_BITS = 8`, the bits are arranged like this:
```
conditionImmediate = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSMMMMMMMMMMMMMMMMMMMMM10MMMMMMMMM
creg = ..............................................XXXXXXXX..........
```
`S` is a copied sign bit from `imm32`. `M` denotes bits of `imm32`. The 9th bit is set to 0 and the 10th bit is set to 1. This value would be added to `creg`.
The second line uses `X` to mark bits of `creg` that would be checked by the condition. If all these bits are 0 after adding `conditionImmediate`, the jump is executed.
The construction of the CBRANCH instruction ensures that no inifinite loops are possible in the program.

View file

@ -532,8 +532,11 @@ namespace randomx {
int reg = getConditionRegister(registerUsage);
int target = registerUsage[reg].lastUsed + 1;
registerUsage[reg].count++;
int shift = instr.getModCond();
asmCode << "\tadd " << regR[reg] << ", " << (int32_t)(instr.getImm32() | (1 << shift)) << std::endl;
int shift = instr.getModCond() + ConditionOffset;
int32_t imm = instr.getImm32() | (1L << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1L << (shift - 1));
asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
asmCode << "\tjz randomx_isn_" << target << std::endl;
//mark all registers as used

View file

@ -41,7 +41,9 @@ namespace randomx {
static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
static_assert(RANDOMX_JUMP_BITS >= 1 && RANDOMX_JUMP_BITS <= 16, "RANDOMX_JUMP_BITS must be an integer in the range 1-16.");
static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
@ -62,6 +64,7 @@ namespace randomx {
constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
constexpr int StoreL3Condition = 14;
#ifdef TRACE

View file

@ -64,9 +64,12 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
//How many register bits must be zero for CBRANCH instruction to jump. Must be an integer in the range 1-16.
//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8
//Jump condition mask offset in bits.
#define RANDOMX_JUMP_OFFSET 8
/*
Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256

View file

@ -775,10 +775,13 @@ namespace randomx {
int reg = getConditionRegister(registerUsage);
int target = registerUsage[reg].lastUsed + 1;
registerUsage[reg].count++;
int shift = instr.getModCond();
emit(REX_ADD_I);
emitByte(0xc0 + reg);
emit32(instr.getImm32() | (1 << shift));
int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);

View file

@ -229,7 +229,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: a15448785857f9a78703eb5da235dfe73d0d5fc4c8effaebe73869904f5af47d" << std::endl;
std::cout << "Reference result: 47452f6064db799ae580dd71fe0ebe221579cedf837fac7095f1c5edc07cf345" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
}

View file

@ -615,9 +615,11 @@ namespace randomx {
ibc.isrc = &r[reg];
ibc.target = registerUsage[reg].lastUsed;
registerUsage[reg].count++;
int shift = instr.getModCond();
const uint64_t conditionMask = ConditionMask << instr.getModCond();
int shift = instr.getModCond() + ConditionOffset;
const uint64_t conditionMask = ConditionMask << shift;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = ConditionMask << shift;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {