diff --git a/doc/program.asm b/doc/program.asm index 8476cbb..6609d89 100644 --- a/doc/program.asm +++ b/doc/program.asm @@ -25,9 +25,9 @@ randomx_isn_6: ; FSQRT_R e3 sqrtpd xmm7, xmm7 randomx_isn_7: - ; ISTORE L1[r0-784322734], r3 + ; ISTORE L3[r0-784322734], r3 lea eax, [r8d-784322734] - and eax, 16376 + and eax, 2097144 mov qword ptr [rsi+rax], r11 randomx_isn_8: ; FMUL_R e1, a1 @@ -55,8 +55,8 @@ randomx_isn_14: ; FSQRT_R e2 sqrtpd xmm6, xmm6 randomx_isn_15: - ; IADD_RS r6, r2, LSH 1 - lea r14, [r14+r10*2] + ; IADD_RS r6, r2, SHFT 3 + lea r14, [r14+r10*8] randomx_isn_16: ; FSUB_M f2, L1[r1-1890725713] lea eax, [r9d-1890725713] @@ -68,22 +68,18 @@ randomx_isn_17: mov ecx, r11d ror r12, cl randomx_isn_18: - ; ISTORE L3[r4+1297827817], r4 + ; ISTORE L1[r4+1297827817], r4 lea eax, [r12d+1297827817] - and eax, 2097144 + and eax, 16376 mov qword ptr [rsi+rax], r12 randomx_isn_19: ; FMUL_R e1, a2 mulpd xmm5, xmm10 randomx_isn_20: - ; COND_R r6, of(r3, 1593588996), LSH 1 - add r8, 2 - test r8, 254 + ; CBRANCH 1593588996, COND 3 + add r8, 1593589004 + test r8, 1016 jz randomx_isn_0 - xor ecx, ecx - cmp r11d, 1593588996 - seto cl - add r14, rcx randomx_isn_21: ; IXOR_M r7, L1[r2+1680388681] lea eax, [r10d+1680388681] @@ -98,26 +94,22 @@ randomx_isn_23: ; FMUL_R e2, a0 mulpd xmm6, xmm8 randomx_isn_24: - ; COND_R r6, no(r0, 149087159), LSH 6 - add r8, 64 - test r8, 8128 + ; CBRANCH 149087159, COND 13 + add r8, 149087159 + test r8, 1040384 jz randomx_isn_21 - xor ecx, ecx - cmp r8d, 149087159 - setno cl - add r14, rcx randomx_isn_25: ; FADD_R f3, a0 addpd xmm3, xmm8 randomx_isn_26: - ; IADD_RS r7, r0, LSH 3 - lea r15, [r15+r8*8] + ; IADD_RS r7, r0, SHFT 2 + lea r15, [r15+r8*4] randomx_isn_27: ; IMUL_R r2, r3 imul r10, r11 randomx_isn_28: - ; IADD_RS r5, r7, 1345488645, LSH 1 - lea r13, [r13+r15*2+1345488645] + ; IADD_RS r5, r7, 1345488645, SHFT 3 + lea r13, [r13+r15*8+1345488645] randomx_isn_29: ; ISTORE L2[r6-950233266], r2 lea eax, [r14d-950233266] @@ -144,8 +136,8 @@ randomx_isn_35: ; IMUL_R r6, 835132161 imul r14, 835132161 randomx_isn_36: - ; IADD_RS r3, r4, LSH 2 - lea r11, [r11+r12*4] + ; IADD_RS r3, r4, SHFT 0 + lea r11, [r11+r12*1] randomx_isn_37: ; ISUB_M r6, L2[r4+1885029796] lea eax, [r12d+1885029796] @@ -173,12 +165,12 @@ randomx_isn_44: ; FADD_R f1, a2 addpd xmm1, xmm10 randomx_isn_45: - ; ISTORE L3[r0+1805562386], r5 + ; ISTORE L1[r0+1805562386], r5 lea eax, [r8d+1805562386] - and eax, 2097144 + and eax, 16376 mov qword ptr [rsi+rax], r13 randomx_isn_46: - ; IADD_RS r0, r7, LSH 0 + ; IADD_RS r0, r7, SHFT 0 lea r8, [r8+r15*1] randomx_isn_47: ; IXOR_R r5, r2 @@ -193,29 +185,21 @@ randomx_isn_50: ; FSUB_R f3, a0 subpd xmm3, xmm8 randomx_isn_51: - ; COND_R r2, be(r3, -1975981803), LSH 7 - add r12, 128 - test r12, 16256 + ; CBRANCH -1975981803, COND 14 + add r12, -1975981803 + test r12, 2080768 jz randomx_isn_25 - xor ecx, ecx - cmp r11d, -1975981803 - setbe cl - add r10, rcx randomx_isn_52: - ; IADD_RS r1, r1, LSH 2 + ; IADD_RS r1, r1, SHFT 2 lea r9, [r9+r9*4] randomx_isn_53: ; FSUB_R f2, a0 subpd xmm2, xmm8 randomx_isn_54: - ; COND_R r5, ns(r1, 1917049931), LSH 6 - add r8, 64 - test r8, 8128 + ; CBRANCH 1917049931, COND 12 + add r8, 1917049931 + test r8, 520192 jz randomx_isn_52 - xor ecx, ecx - cmp r9d, 1917049931 - setns cl - add r13, rcx randomx_isn_55: ; IXOR_R r2, r3 xor r10, r11 @@ -226,7 +210,7 @@ randomx_isn_57: ; IMUL_R r5, r1 imul r13, r9 randomx_isn_58: - ; IADD_RS r5, r1, -999103579, LSH 0 + ; IADD_RS r5, r1, -999103579, SHFT 0 lea r13, [r13+r9*1-999103579] randomx_isn_59: ; FMUL_R e2, a2 @@ -236,8 +220,8 @@ randomx_isn_60: mov ecx, r14d ror r10, cl randomx_isn_61: - ; IADD_RS r0, r3, LSH 1 - lea r8, [r8+r11*2] + ; IADD_RS r0, r3, SHFT 3 + lea r8, [r8+r11*8] randomx_isn_62: ; FSQRT_R e3 sqrtpd xmm7, xmm7 @@ -259,8 +243,8 @@ randomx_isn_66: ; ISUB_R r4, 841292629 sub r12, 841292629 randomx_isn_67: - ; IADD_RS r4, r6, LSH 2 - lea r12, [r12+r14*4] + ; IADD_RS r4, r6, SHFT 3 + lea r12, [r12+r14*8] randomx_isn_68: ; FSUB_M f3, L1[r4+613549729] lea eax, [r12d+613549729] @@ -268,8 +252,8 @@ randomx_isn_68: cvtdq2pd xmm12, qword ptr [rsi+rax] subpd xmm3, xmm12 randomx_isn_69: - ; IADD_RS r6, r4, LSH 0 - lea r14, [r14+r12*1] + ; IADD_RS r6, r4, SHFT 3 + lea r14, [r14+r12*8] randomx_isn_70: ; FSUB_M f1, L1[r5+629563256] lea eax, [r13d+629563256] @@ -286,14 +270,10 @@ randomx_isn_73: ; FMUL_R e0, a0 mulpd xmm4, xmm8 randomx_isn_74: - ; COND_R r6, ns(r3, -1200328848), LSH 2 - add r9, 4 - test r9, 508 + ; CBRANCH -1200328848, COND 4 + add r9, -1200328848 + test r9, 2032 jz randomx_isn_55 - xor ecx, ecx - cmp r11d, -1200328848 - setns cl - add r14, rcx randomx_isn_75: ; FMUL_R e0, a3 mulpd xmm4, xmm11 @@ -312,12 +292,12 @@ randomx_isn_78: ; FMUL_R e2, a1 mulpd xmm6, xmm9 randomx_isn_79: - ; IADD_RS r3, r1, LSH 1 - lea r11, [r11+r9*2] + ; IADD_RS r3, r1, SHFT 2 + lea r11, [r11+r9*4] randomx_isn_80: - ; ISTORE L3[r2+1885666804], r4 + ; ISTORE L1[r2+1885666804], r4 lea eax, [r10d+1885666804] - and eax, 2097144 + and eax, 16376 mov qword ptr [rsi+rax], r12 randomx_isn_81: ; IMULH_R r3, r0 @@ -348,23 +328,19 @@ randomx_isn_88: ; IMUL_R r1, r3 imul r9, r11 randomx_isn_89: - ; COND_R r2, no(r0, -122257389), LSH 6 - add r8, 64 - test r8, 8128 + ; CBRANCH -122257389, COND 13 + add r8, -122249197 + test r8, 1040384 jz randomx_isn_75 - xor ecx, ecx - cmp r8d, -122257389 - setno cl - add r10, rcx randomx_isn_90: ; ISTORE L1[r5+228116180], r7 lea eax, [r13d+228116180] and eax, 16376 mov qword ptr [rsi+rax], r15 randomx_isn_91: - ; ISTORE L1[r6+650356254], r5 + ; ISTORE L3[r6+650356254], r5 lea eax, [r14d+650356254] - and eax, 16376 + and eax, 2097144 mov qword ptr [rsi+rax], r13 randomx_isn_92: ; FSUB_R f2, a0 @@ -426,8 +402,8 @@ randomx_isn_107: imul r13 mov r14, rdx randomx_isn_108: - ; IADD_RS r7, r0, LSH 1 - lea r15, [r15+r8*2] + ; IADD_RS r7, r0, SHFT 2 + lea r15, [r15+r8*4] randomx_isn_109: ; IMUL_R r6, r5 imul r14, r13 @@ -441,14 +417,14 @@ randomx_isn_111: cvtdq2pd xmm12, qword ptr [rsi+rax] addpd xmm2, xmm12 randomx_isn_112: - ; IADD_RS r0, r3, LSH 0 - lea r8, [r8+r11*1] + ; IADD_RS r0, r3, SHFT 1 + lea r8, [r8+r11*2] randomx_isn_113: - ; IADD_RS r3, r4, LSH 1 + ; IADD_RS r3, r4, SHFT 1 lea r11, [r11+r12*2] randomx_isn_114: - ; IADD_RS r2, r4, LSH 2 - lea r10, [r10+r12*4] + ; IADD_RS r2, r4, SHFT 1 + lea r10, [r10+r12*2] randomx_isn_115: ; IMUL_M r7, L1[r2-106928748] lea eax, [r10d-106928748] @@ -461,17 +437,17 @@ randomx_isn_117: ; FSUB_R f2, a2 subpd xmm2, xmm10 randomx_isn_118: - ; IADD_RS r2, r2, LSH 0 - lea r10, [r10+r10*1] + ; IADD_RS r2, r2, SHFT 3 + lea r10, [r10+r10*8] randomx_isn_119: ; ISUB_R r7, -342152774 sub r15, -342152774 randomx_isn_120: - ; IADD_RS r4, r1, LSH 1 - lea r12, [r12+r9*2] + ; IADD_RS r4, r1, SHFT 0 + lea r12, [r12+r9*1] randomx_isn_121: - ; IADD_RS r4, r7, LSH 2 - lea r12, [r12+r15*4] + ; IADD_RS r4, r7, SHFT 3 + lea r12, [r12+r15*8] randomx_isn_122: ; FSUB_R f0, a1 subpd xmm0, xmm9 @@ -501,7 +477,7 @@ randomx_isn_128: ; FSUB_R f3, a1 subpd xmm3, xmm9 randomx_isn_129: - ; IADD_RS r1, r2, LSH 2 + ; IADD_RS r1, r2, SHFT 2 lea r9, [r9+r10*4] randomx_isn_130: ; FSUB_R f1, a1 @@ -528,8 +504,8 @@ randomx_isn_136: ; ISUB_R r3, r6 sub r11, r14 randomx_isn_137: - ; IADD_RS r4, r1, LSH 0 - lea r12, [r12+r9*1] + ; IADD_RS r4, r1, SHFT 1 + lea r12, [r12+r9*2] randomx_isn_138: ; ISTORE L1[r0+56684410], r0 lea eax, [r8d+56684410] @@ -552,14 +528,10 @@ randomx_isn_142: ; FADD_R f1, a0 addpd xmm1, xmm8 randomx_isn_143: - ; COND_R r5, ge(r1, 880467599), LSH 2 - add r14, 4 - test r14, 508 + ; CBRANCH 880467599, COND 5 + add r14, 880467631 + test r14, 4064 jz randomx_isn_110 - xor ecx, ecx - cmp r9d, 880467599 - setge cl - add r13, rcx randomx_isn_144: ; FSUB_M f1, L1[r5+1283529302] lea eax, [r13d+1283529302] @@ -570,17 +542,17 @@ randomx_isn_145: ; ISUB_R r5, r3 sub r13, r11 randomx_isn_146: - ; IADD_RS r0, r3, LSH 1 - lea r8, [r8+r11*2] + ; IADD_RS r0, r3, SHFT 3 + lea r8, [r8+r11*8] randomx_isn_147: - ; IADD_RS r1, r3, LSH 1 - lea r9, [r9+r11*2] + ; IADD_RS r1, r3, SHFT 2 + lea r9, [r9+r11*4] randomx_isn_148: ; FSQRT_R e1 sqrtpd xmm5, xmm5 randomx_isn_149: - ; IADD_RS r4, r3, LSH 1 - lea r12, [r12+r11*2] + ; IADD_RS r4, r3, SHFT 2 + lea r12, [r12+r11*4] randomx_isn_150: ; FADD_M f1, L1[r0-1977073973] lea eax, [r8d-1977073973] @@ -588,8 +560,8 @@ randomx_isn_150: cvtdq2pd xmm12, qword ptr [rsi+rax] addpd xmm1, xmm12 randomx_isn_151: - ; IADD_RS r1, r0, LSH 1 - lea r9, [r9+r8*2] + ; IADD_RS r1, r0, SHFT 3 + lea r9, [r9+r8*8] randomx_isn_152: ; FSUB_R f1, a0 subpd xmm1, xmm8 @@ -622,8 +594,8 @@ randomx_isn_158: and eax, 16376 mov qword ptr [rsi+rax], r12 randomx_isn_159: - ; IADD_RS r7, r2, LSH 3 - lea r15, [r15+r10*8] + ; IADD_RS r7, r2, SHFT 2 + lea r15, [r15+r10*4] randomx_isn_160: ; IMUL_RCP r7, 2040763167 mov rax, 9705702723791900149 @@ -632,8 +604,8 @@ randomx_isn_161: ; FADD_R f3, a3 addpd xmm3, xmm11 randomx_isn_162: - ; IADD_RS r6, r4, LSH 1 - lea r14, [r14+r12*2] + ; IADD_RS r6, r4, SHFT 3 + lea r14, [r14+r12*8] randomx_isn_163: ; ISWAP_R r3, r5 xchg r11, r13 @@ -697,8 +669,8 @@ randomx_isn_177: ; IMUL_M r3, L3[232968] imul r11, qword ptr [rsi+232968] randomx_isn_178: - ; IADD_RS r5, r3, -2108568616, LSH 1 - lea r13, [r13+r11*2-2108568616] + ; IADD_RS r5, r3, -2108568616, SHFT 0 + lea r13, [r13+r11*1-2108568616] randomx_isn_179: ; IADD_M r3, L1[r4+1322108729] lea eax, [r12d+1322108729] @@ -714,8 +686,8 @@ randomx_isn_182: ; FMUL_R e2, a2 mulpd xmm6, xmm10 randomx_isn_183: - ; IADD_RS r6, r2, LSH 0 - lea r14, [r14+r10*1] + ; IADD_RS r6, r2, SHFT 1 + lea r14, [r14+r10*2] randomx_isn_184: ; FADD_R f2, a3 addpd xmm2, xmm11 @@ -726,7 +698,7 @@ randomx_isn_186: ; FSCAL_R f3 xorps xmm3, xmm15 randomx_isn_187: - ; IADD_RS r6, r6, LSH 3 + ; IADD_RS r6, r6, SHFT 3 lea r14, [r14+r14*8] randomx_isn_188: ; FSCAL_R f2 @@ -779,8 +751,8 @@ randomx_isn_199: ; FSUB_R f3, a3 subpd xmm3, xmm11 randomx_isn_200: - ; IADD_RS r2, r5, LSH 2 - lea r10, [r10+r13*4] + ; IADD_RS r2, r5, SHFT 0 + lea r10, [r10+r13*1] randomx_isn_201: ; ISUB_M r6, L2[r3+376384700] lea eax, [r11d+376384700] @@ -803,14 +775,14 @@ randomx_isn_205: and eax, 262136 add r15, qword ptr [rsi+rax] randomx_isn_206: - ; IADD_RS r3, r5, LSH 0 - lea r11, [r11+r13*1] + ; IADD_RS r3, r5, SHFT 2 + lea r11, [r11+r13*4] randomx_isn_207: ; FSCAL_R f1 xorps xmm1, xmm15 randomx_isn_208: - ; IADD_RS r6, r3, LSH 1 - lea r14, [r14+r11*2] + ; IADD_RS r6, r3, SHFT 0 + lea r14, [r14+r11*1] randomx_isn_209: ; FSUB_M f0, L1[r4-557177119] lea eax, [r12d-557177119] @@ -866,18 +838,18 @@ randomx_isn_221: ; IMUL_R r1, r0 imul r9, r8 randomx_isn_222: - ; IADD_RS r1, r0, LSH 2 + ; IADD_RS r1, r0, SHFT 2 lea r9, [r9+r8*4] randomx_isn_223: ; FSCAL_R f2 xorps xmm2, xmm15 randomx_isn_224: - ; IADD_RS r5, r4, 312567979, LSH 1 - lea r13, [r13+r12*2+312567979] + ; IADD_RS r5, r4, 312567979, SHFT 3 + lea r13, [r13+r12*8+312567979] randomx_isn_225: - ; ISTORE L2[r2+260885699], r1 + ; ISTORE L3[r2+260885699], r1 lea eax, [r10d+260885699] - and eax, 262136 + and eax, 2097144 mov qword ptr [rsi+rax], r9 randomx_isn_226: ; ISUB_R r6, -791575725 @@ -897,8 +869,8 @@ randomx_isn_229: ; ISWAP_R r0, r6 xchg r8, r14 randomx_isn_230: - ; IADD_RS r2, r7, LSH 2 - lea r10, [r10+r15*4] + ; IADD_RS r2, r7, SHFT 3 + lea r10, [r10+r15*8] randomx_isn_231: ; FMUL_R e1, a0 mulpd xmm5, xmm8 @@ -923,8 +895,8 @@ randomx_isn_237: ; FSUB_R f1, a3 subpd xmm1, xmm11 randomx_isn_238: - ; IADD_RS r4, r2, LSH 1 - lea r12, [r12+r10*2] + ; IADD_RS r4, r2, SHFT 0 + lea r12, [r12+r10*1] randomx_isn_239: ; IMUL_RCP r7, 3065786637 mov rax, 12921343181238534701 @@ -957,36 +929,32 @@ randomx_isn_246: and eax, 262136 sub r15, qword ptr [rsi+rax] randomx_isn_247: - ; COND_R r2, be(r5, -8545330), LSH 2 - add r9, 4 - test r9, 508 + ; CBRANCH -8545330, COND 4 + add r9, -8545314 + test r9, 2032 jz randomx_isn_223 - xor ecx, ecx - cmp r13d, -8545330 - setbe cl - add r10, rcx randomx_isn_248: ; ISTORE L1[r0+1951752498], r5 lea eax, [r8d+1951752498] and eax, 16376 mov qword ptr [rsi+rax], r13 randomx_isn_249: - ; IADD_RS r6, r5, LSH 2 - lea r14, [r14+r13*4] + ; IADD_RS r6, r5, SHFT 3 + lea r14, [r14+r13*8] randomx_isn_250: ; FADD_R f3, a0 addpd xmm3, xmm8 randomx_isn_251: - ; IADD_RS r0, r0, LSH 0 + ; IADD_RS r0, r0, SHFT 0 lea r8, [r8+r8*1] randomx_isn_252: ; ISUB_R r4, r2 sub r12, r10 randomx_isn_253: - ; IADD_RS r5, r4, 256175395, LSH 0 - lea r13, [r13+r12*1+256175395] + ; IADD_RS r5, r4, 256175395, SHFT 3 + lea r13, [r13+r12*8+256175395] randomx_isn_254: - ; IADD_RS r6, r7, LSH 2 + ; IADD_RS r6, r7, SHFT 2 lea r14, [r14+r15*4] randomx_isn_255: ; IROR_R r7, r3 diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index 165d016..259901c 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -44,14 +44,12 @@ namespace randomx { static const char* regScratchpadAddr = "rsi"; void AssemblyGeneratorX86::generateProgram(Program& prog) { - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < RegistersCount; ++i) { registerUsage[i] = -1; } asmCode.str(std::string()); //clear for (unsigned i = 0; i < prog.getSize(); ++i) { -#if RANDOMX_JUMP asmCode << "randomx_isn_" << i << ":" << std::endl; -#endif Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; @@ -261,7 +259,7 @@ namespace randomx { void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; int mask; - if (instr.getModCond()) { + if (instr.getModCond() < StoreL3Condition) { mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask; } else { @@ -277,9 +275,9 @@ namespace randomx { void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { registerUsage[instr.dst] = i; if(instr.dst == RegisterNeedsDisplacement) - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; + asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; else - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << "]" << std::endl; + asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl; traceint(instr); } @@ -542,55 +540,18 @@ namespace randomx { tracenop(instr); } - static inline const char* condition(Instruction& instr) { - switch (instr.getModCond()) - { - case 0: - return "be"; - case 1: - return "a"; - case 2: - return "s"; - case 3: - return "ns"; - case 4: - return "o"; - case 5: - return "no"; - case 6: - return "l"; - case 7: - return "ge"; - default: - UNREACHABLE; - } - } - - void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { - const int shift = instr.getModShift(); - const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift; + void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) { int reg = getConditionRegister(); int target = registerUsage[reg] + 1; - registerUsage[reg] = i; - asmCode << "\tadd " << regR[reg] << ", " << (1 << shift) << std::endl; - asmCode << "\ttest " << regR[reg] << ", " << conditionMask << std::endl; + int shift = instr.getModCond(); + asmCode << "\tadd " << regR[reg] << ", " << (int32_t)(instr.getImm32() | (1 << shift)) << std::endl; + asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl; asmCode << "\tjz randomx_isn_" << target << std::endl; - for (unsigned j = 0; j < 8; ++j) { //mark all registers as used + for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used registerUsage[j] = i; } } - void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { -#if RANDOMX_JUMP - handleCondition(instr, i); -#endif - asmCode << "\txor ecx, ecx" << std::endl; - asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; - asmCode << "\tset" << condition(instr) << " cl" << std::endl; - asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; - traceint(instr); - } - void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) { genAddressRegDst(instr); asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl; @@ -632,7 +593,7 @@ namespace randomx { INST_HANDLE(FMUL_R) INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) - INST_HANDLE(COND_R) + INST_HANDLE(CBRANCH) INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) INST_HANDLE(NOP) diff --git a/src/assembly_generator_x86.hpp b/src/assembly_generator_x86.hpp index 1c27364..f0cc89a 100644 --- a/src/assembly_generator_x86.hpp +++ b/src/assembly_generator_x86.hpp @@ -44,7 +44,6 @@ namespace randomx { void genAddressRegDst(Instruction&, int); int32_t genAddressImm(Instruction&); int getConditionRegister(); - void handleCondition(Instruction&, int); void generateCode(Instruction&, int); void traceint(Instruction&); void traceflt(Instruction&); @@ -76,7 +75,7 @@ namespace randomx { void h_FMUL_R(Instruction&, int); void h_FDIV_M(Instruction&, int); void h_FSQRT_R(Instruction&, int); - void h_COND_R(Instruction&, int); + void h_CBRANCH(Instruction&, int); void h_CFROUND(Instruction&, int); void h_ISTORE(Instruction&, int); void h_NOP(Instruction&, int); diff --git a/src/common.hpp b/src/common.hpp index ce6755a..09811e5 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -40,13 +40,14 @@ namespace randomx { static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); + static_assert(RANDOMX_JUMP_BITS >= 1 && RANDOMX_JUMP_BITS <= 16, "RANDOMX_JUMP_BITS must be an integer in the range 1-16."); constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \ RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \ RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \ - RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_COND_R + \ + RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \ RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; static_assert(wtSum == 256, "Sum of instruction frequencies must be 256."); @@ -59,6 +60,8 @@ namespace randomx { constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; + constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1); + constexpr int StoreL3Condition = 14; #ifdef TRACE constexpr bool trace = true; @@ -76,8 +79,6 @@ namespace randomx { #endif #endif -#define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0) - using addr_t = uint32_t; using int_reg_t = uint64_t; diff --git a/src/configuration.h b/src/configuration.h index d155e4e..99385cc 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -64,7 +64,7 @@ along with RandomX. If not, see. //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. #define RANDOMX_SCRATCHPAD_L1 (16 * 1024) -//How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled. +//How many register bits must be zero for CBRANCH instruction to jump. Must be an integer in the range 1-16. #define RANDOMX_JUMP_BITS 7 /* @@ -100,8 +100,9 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_FDIV_M 4 #define RANDOMX_FREQ_FSQRT_R 6 -#define RANDOMX_FREQ_COND_R 8 +#define RANDOMX_FREQ_CBRANCH 8 #define RANDOMX_FREQ_CFROUND 1 + #define RANDOMX_FREQ_ISTORE 16 #define RANDOMX_FREQ_NOP 0 diff --git a/src/instruction.cpp b/src/instruction.cpp index e1dc557..5adf09d 100644 --- a/src/instruction.cpp +++ b/src/instruction.cpp @@ -33,7 +33,7 @@ namespace randomx { } void Instruction::genAddressRegDst(std::ostream& os) const { - if (getModCond()) + if (getModCond() < StoreL3Condition) os << (getModMem() ? "L1" : "L2"); else os << "L3"; @@ -49,7 +49,7 @@ namespace randomx { if(dst == RegisterNeedsDisplacement) { os << ", " << (int32_t)getImm32(); } - os << ", LSH " << (int)getModMem() << std::endl; + os << ", SHFT " << (int)getModShift() << std::endl; } void Instruction::h_IADD_M(std::ostream& os) const { @@ -278,8 +278,8 @@ namespace randomx { } } - void Instruction::h_COND_R(std::ostream& os) const { - os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl; + void Instruction::h_CBRANCH(std::ostream& os) const { + os << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl; } void Instruction::h_ISTORE(std::ostream& os) const { @@ -321,7 +321,7 @@ namespace randomx { INST_NAME(FMUL_R) INST_NAME(FDIV_M) INST_NAME(FSQRT_R) - INST_NAME(COND_R) + INST_NAME(CBRANCH) INST_NAME(CFROUND) INST_NAME(ISTORE) INST_NAME(NOP) @@ -354,7 +354,7 @@ namespace randomx { INST_HANDLE(FMUL_R) INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) - INST_HANDLE(COND_R) + INST_HANDLE(CBRANCH) INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) INST_HANDLE(NOP) diff --git a/src/instruction.hpp b/src/instruction.hpp index f6dbc3b..8b1e6ec 100644 --- a/src/instruction.hpp +++ b/src/instruction.hpp @@ -57,7 +57,7 @@ namespace randomx { constexpr int FMUL_R = 23; constexpr int FDIV_M = 24; constexpr int FSQRT_R = 25; - constexpr int COND_R = 26; + constexpr int CBRANCH = 26; constexpr int CFROUND = 27; constexpr int ISTORE = 28; constexpr int NOP = 29; @@ -81,11 +81,11 @@ namespace randomx { int getModMem() const { return mod % 4; //bits 0-1 } - int getModCond() const { - return (mod >> 2) % 8; //bits 2-4 - } int getModShift() const { - return mod >> 5; //bits 5-7 + return (mod >> 2) % 4; //bits 2-3 + } + int getModCond() const { + return mod >> 4; //bits 4-7 } void setMod(uint8_t val) { mod = val; @@ -129,7 +129,7 @@ namespace randomx { void h_FMUL_R(std::ostream&) const; void h_FDIV_M(std::ostream&) const; void h_FSQRT_R(std::ostream&) const; - void h_COND_R(std::ostream&) const; + void h_CBRANCH(std::ostream&) const; void h_CFROUND(std::ostream&) const; void h_ISTORE(std::ostream&) const; void h_NOP(std::ostream&) const; diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 793be0e..10b1059 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -297,12 +297,10 @@ namespace randomx { } void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { -#if RANDOMX_JUMP instructionOffsets.clear(); for (unsigned i = 0; i < 8; ++i) { registerUsage[i] = -1; } -#endif codePos = prologueSize; memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); emit(REX_XOR_RAX_R64); @@ -334,9 +332,7 @@ namespace randomx { } void JitCompilerX86::generateCode(Instruction& instr, int i) { -#if RANDOMX_JUMP instructionOffsets.push_back(codePos); -#endif auto generator = engine[instr.opcode]; (this->*generator)(instr, i); } @@ -457,7 +453,7 @@ namespace randomx { } emit32(instr.getImm32()); emitByte(AND_EAX_I); - if (instr.getModCond()) { + if (instr.getModCond() < StoreL3Condition) { int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; emit32(instr.getModMem() ? maskL1 : maskL2); @@ -478,7 +474,7 @@ namespace randomx { emitByte(0xac); else emitByte(0x04 + 8 * instr.dst); - genSIB(instr.getModMem(), instr.src, instr.dst); + genSIB(instr.getModShift(), instr.src, instr.dst); if (instr.dst == RegisterNeedsDisplacement) emit32(instr.getImm32()); } @@ -774,56 +770,10 @@ namespace randomx { emit(AND_OR_MOV_LDMXCSR); } - static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { - switch (instr.getModCond() ^ invert) - { - case 0: - return 0x76; //jbe - case 1: - return 0x77; //ja - case 2: - return 0x78; //js - case 3: - return 0x79; //jns - case 4: - return 0x70; //jo - case 5: - return 0x71; //jno - case 6: - return 0x7c; //jl - case 7: - return 0x7d; //jge - } - } - - static inline uint8_t condition(Instruction& instr) { - switch (instr.getModCond()) - { - case 0: - return 0x96; //setbe - case 1: - return 0x97; //seta - case 2: - return 0x98; //sets - case 3: - return 0x99; //setns - case 4: - return 0x90; //seto - case 5: - return 0x91; //setno - case 6: - return 0x9c; //setl - case 7: - return 0x9d; //setge - default: - UNREACHABLE; - } - } - int JitCompilerX86::getConditionRegister() { int min = INT_MAX; int minIndex; - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < RegistersCount; ++i) { if (registerUsage[i] < min) { min = registerUsage[i]; minIndex = i; @@ -832,40 +782,23 @@ namespace randomx { return minIndex; } - void JitCompilerX86::handleCondition(Instruction& instr, int i) { - const int shift = instr.getModShift(); - const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift; + void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) { int reg = getConditionRegister(); int target = registerUsage[reg] + 1; + int shift = instr.getModCond(); emit(REX_ADD_I); emitByte(0xc0 + reg); - emit32(1 << shift); + emit32(instr.getImm32() | (1 << shift)); emit(REX_TEST); emitByte(0xc0 + reg); - emit32(conditionMask); + emit32(ConditionMask << shift); emit(JZ); emit32(instructionOffsets[target] - (codePos + 4)); - for (unsigned j = 0; j < 8; ++j) { //mark all registers as used + for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used registerUsage[j] = i; } } - void JitCompilerX86::h_COND_R(Instruction& instr, int i) { -#if RANDOMX_JUMP - handleCondition(instr, i); -#endif - emit(XOR_ECX_ECX); - emit(REX_CMP_R32I); - emitByte(0xf8 + instr.src); - emit32(instr.getImm32()); - emitByte(0x0f); - emitByte(condition(instr)); - emitByte(0xc1); - emit(REX_ADD_RM); - emitByte(0xc1 + 8 * instr.dst); - - } - void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { genAddressRegDst(instr); emit(REX_MOV_MR); @@ -907,7 +840,7 @@ namespace randomx { INST_HANDLE(FMUL_R) INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) - INST_HANDLE(COND_R) + INST_HANDLE(CBRANCH) INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) INST_HANDLE(NOP) diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp index 64e7016..98f1cb4 100644 --- a/src/jit_compiler_x86.hpp +++ b/src/jit_compiler_x86.hpp @@ -70,8 +70,6 @@ namespace randomx { void genAddressImm(Instruction&); void genSIB(int scale, int index, int base); - void handleCondition(Instruction&, int); - void generateCode(Instruction&, int); void generateSuperscalarCode(Instruction &, std::vector &); @@ -136,7 +134,7 @@ namespace randomx { void h_FMUL_R(Instruction&, int); void h_FDIV_M(Instruction&, int); void h_FSQRT_R(Instruction&, int); - void h_COND_R(Instruction&, int); + void h_CBRANCH(Instruction&, int); void h_CFROUND(Instruction&, int); void h_ISTORE(Instruction&, int); void h_NOP(Instruction&, int); diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index e5014db..cedd3b0 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -229,7 +229,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: 89336a85bf6d1e83eb20fbc92170705ded9b42285b30178ed8e855d65c4c4b69" << std::endl; + std::cout << "Reference result: 804fed4a3dc4ed12917a210aad295925544e688e28549d7178eb27f412476a10" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 2f69855..fbb96ad 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -180,16 +180,11 @@ namespace randomx { *ibc.fdst = _mm_sqrt_pd(*ibc.fdst); } break; - case InstructionType::COND_R: { -#if RANDOMX_JUMP - *ibc.creg += (1 << ibc.shift); - const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift; - if ((*ibc.creg & conditionMask) == 0) { + case InstructionType::CBRANCH: { + *ibc.isrc += ibc.imm; + if ((*ibc.isrc & ibc.memMask) == 0) { pc = ibc.target; - break; } -#endif - *ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0; } break; case InstructionType::CFROUND: { @@ -308,12 +303,12 @@ namespace randomx { ibc.idst = &r[dst]; if (dst != RegisterNeedsDisplacement) { ibc.isrc = &r[src]; - ibc.shift = instr.getModMem(); + ibc.shift = instr.getModShift(); ibc.imm = 0; } else { ibc.isrc = &r[src]; - ibc.shift = instr.getModMem(); + ibc.shift = instr.getModShift(); ibc.imm = signExtend2sCompl(instr.getImm32()); } registerUsage[dst] = i; @@ -626,19 +621,16 @@ namespace randomx { ibc.fdst = &e[dst]; } break; - CASE_REP(COND_R) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::COND_R; - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - ibc.condition = instr.getModCond(); - ibc.imm = instr.getImm32(); + CASE_REP(CBRANCH) { + ibc.type = InstructionType::CBRANCH; //jump condition int reg = getConditionRegister(registerUsage); + ibc.isrc = &r[reg]; ibc.target = registerUsage[reg]; - ibc.shift = instr.getModShift(); - ibc.creg = &r[reg]; + int shift = instr.getModCond(); + const uint64_t conditionMask = ConditionMask << instr.getModCond(); + ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift); + ibc.memMask = ConditionMask << shift; for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used registerUsage[j] = i; } @@ -658,7 +650,7 @@ namespace randomx { ibc.idst = &r[dst]; ibc.isrc = &r[src]; ibc.imm = signExtend2sCompl(instr.getImm32()); - if (instr.getModCond()) + if (instr.getModCond() < StoreL3Condition) ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); else ibc.memMask = ScratchpadL3Mask; diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index e3a3eb4..eafb216 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -41,12 +41,12 @@ namespace randomx { uint64_t imm; int64_t simm; }; - int_reg_t* creg; - uint16_t condition; - int16_t target; - uint32_t memMask; uint16_t type; - uint16_t shift; + union { + int16_t target; + uint16_t shift; + }; + uint32_t memMask; }; template