diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index a916372..0c75461 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -76,7 +76,7 @@ namespace RandomX { asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; break; case RandomX::SuperscalarInstructionType::IADD_RS: - asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; + asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; break; case RandomX::SuperscalarInstructionType::IMUL_R: asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; @@ -185,7 +185,7 @@ namespace RandomX { asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; break; case RandomX::SuperscalarInstructionType::IADD_RS: - asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << ";" << std::endl; + asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << ";" << std::endl; break; case RandomX::SuperscalarInstructionType::IMUL_R: asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; @@ -258,12 +258,19 @@ namespace RandomX { void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") { asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; + asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; } void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl; + int mask; + if (instr.getModCond()) { + mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask; + } + else { + mask = ScratchpadL3Mask; + } + asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl; } int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) { @@ -274,9 +281,9 @@ namespace RandomX { void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { registerUsage[instr.dst] = i; if(instr.dst == RegisterNeedsDisplacement) - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; + asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; else - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; + asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; traceint(instr); } @@ -607,7 +614,7 @@ namespace RandomX { } static inline const char* condition(Instruction& instr) { - switch ((instr.mod >> 2) & 7) + switch (instr.getModCond()) { case 0: return "be"; @@ -631,7 +638,7 @@ namespace RandomX { } void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { - const int shift = (instr.mod >> 5); + const int shift = instr.getModShift3(); const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; int reg = getConditionRegister(); int target = registerUsage[reg] + 1; @@ -647,7 +654,7 @@ namespace RandomX { //4 uOPs void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { handleCondition(instr, i); - asmCode << "\txor rcx, rcx" << std::endl; + asmCode << "\txor ecx, ecx" << std::endl; asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; asmCode << "\tset" << condition(instr) << " cl" << std::endl; asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; @@ -657,7 +664,7 @@ namespace RandomX { //6 uOPs void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) { handleCondition(instr, i); - asmCode << "\txor rcx, rcx" << std::endl; + asmCode << "\txor ecx, ecx" << std::endl; genAddressReg(instr); asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl; asmCode << "\tset" << condition(instr) << " cl" << std::endl; diff --git a/src/Instruction.cpp b/src/Instruction.cpp index e4aa772..528798d 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -33,7 +33,11 @@ namespace RandomX { } void Instruction::genAddressRegDst(std::ostream& os) const { - os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; + if (getModCond()) + os << ((mod % 4) ? "L1" : "L2"); + else + os << "L3"; + os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; } void Instruction::genAddressImm(std::ostream& os) const { diff --git a/src/Instruction.hpp b/src/Instruction.hpp index 9baf8ce..e6b9d2b 100644 --- a/src/Instruction.hpp +++ b/src/Instruction.hpp @@ -74,7 +74,6 @@ namespace RandomX { uint8_t opcode; uint8_t dst; uint8_t src; - uint8_t mod; uint32_t getImm32() const { return load32(&imm32); } @@ -88,7 +87,23 @@ namespace RandomX { i.print(os); return os; } + int getModMem() const { + return mod % 4; + } + int getModCond() const { + return (mod >> 2) & 7; + } + int getModShift3() const { + return mod >> 5; + } + int getModShift2() const { + return mod >> 6; + } + void setMod(uint8_t val) { + mod = val; + } private: + uint8_t mod; uint32_t imm32; void print(std::ostream&) const; static const char* names[256]; diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 132a2c9..827f2e6 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -45,6 +45,8 @@ constexpr bool fpuCheck = false; namespace RandomX { + static int_reg_t Zero = 0; + template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; @@ -108,6 +110,12 @@ namespace RandomX { return std::fpclassify(x) == FP_SUBNORMAL; } + template + FORCE_INLINE void* InterpretedVirtualMachine::getScratchpadAddress(InstructionByteCode& ibc) { + uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; + return scratchpad + addr; + } + template FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { auto& ibc = byteCode[ic]; @@ -120,7 +128,7 @@ namespace RandomX { } break; case InstructionType::IADD_M: { - *ibc.idst += load64(scratchpad + (*ibc.isrc & ibc.memMask)); + *ibc.idst += load64(getScratchpadAddress(ibc)); } break; case InstructionType::IADD_RC: { @@ -132,7 +140,7 @@ namespace RandomX { } break; case InstructionType::ISUB_M: { - *ibc.idst -= load64(scratchpad + (*ibc.isrc & ibc.memMask)); + *ibc.idst -= load64(getScratchpadAddress(ibc)); } break; case InstructionType::IMUL_9C: { @@ -144,7 +152,7 @@ namespace RandomX { } break; case InstructionType::IMUL_M: { - *ibc.idst *= load64(scratchpad + (*ibc.isrc & ibc.memMask)); + *ibc.idst *= load64(getScratchpadAddress(ibc)); } break; case InstructionType::IMULH_R: { @@ -152,7 +160,7 @@ namespace RandomX { } break; case InstructionType::IMULH_M: { - *ibc.idst = mulh(*ibc.idst, load64(scratchpad + (*ibc.isrc & ibc.memMask))); + *ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc))); } break; case InstructionType::ISMULH_R: { @@ -160,7 +168,7 @@ namespace RandomX { } break; case InstructionType::ISMULH_M: { - *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(scratchpad + (*ibc.isrc & ibc.memMask)))); + *ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc)))); } break; case InstructionType::INEG_R: { @@ -172,7 +180,7 @@ namespace RandomX { } break; case InstructionType::IXOR_M: { - *ibc.idst ^= load64(scratchpad + (*ibc.isrc & ibc.memMask)); + *ibc.idst ^= load64(getScratchpadAddress(ibc)); } break; case InstructionType::IROR_R: { @@ -198,7 +206,7 @@ namespace RandomX { } break; case InstructionType::FADD_M: { - __m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask)); + __m128d fsrc = load_cvt_i32x2(getScratchpadAddress(ibc)); *ibc.fdst = _mm_add_pd(*ibc.fdst, fsrc); } break; @@ -207,7 +215,7 @@ namespace RandomX { } break; case InstructionType::FSUB_M: { - __m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask)); + __m128d fsrc = load_cvt_i32x2(getScratchpadAddress(ibc)); *ibc.fdst = _mm_sub_pd(*ibc.fdst, fsrc); } break; @@ -221,7 +229,7 @@ namespace RandomX { } break; case InstructionType::FDIV_M: { - __m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask))); + __m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc))); *ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc); } break; @@ -262,7 +270,7 @@ namespace RandomX { count_JUMP_not_taken++; #endif #endif - *ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0; + *ibc.idst += condition(ibc.condition, load64(getScratchpadAddress(ibc)), ibc.imm) ? 1 : 0; } break; case InstructionType::CFROUND: { @@ -270,7 +278,7 @@ namespace RandomX { } break; case InstructionType::ISTORE: { - store64(scratchpad + (*ibc.idst & ibc.memMask), *ibc.isrc); + store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc); } break; case InstructionType::NOP: { @@ -486,7 +494,7 @@ namespace RandomX { r[instr.dst] ^= r[instr.src]; break; case RandomX::SuperscalarInstructionType::IADD_RS: - r[instr.dst] += r[instr.src] << (instr.mod % 4); + r[instr.dst] += r[instr.src] << instr.getModShift2(); break; case RandomX::SuperscalarInstructionType::IMUL_R: r[instr.dst] *= r[instr.src]; @@ -585,14 +593,14 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::IADD_RS; ibc.idst = &r[dst]; - if (dst != 5) { + if (dst != RegisterNeedsDisplacement) { ibc.isrc = &r[src]; - ibc.shift = instr.mod % 4; + ibc.shift = instr.getModShift2(); ibc.imm = 0; } else { ibc.isrc = &r[src]; - ibc.shift = instr.mod % 4; + ibc.shift = instr.getModShift2(); ibc.imm = signExtend2sCompl(instr.getImm32()); } registerUsage[instr.dst] = i; @@ -603,13 +611,13 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::IADD_M; ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.src != instr.dst) { ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; + ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } registerUsage[instr.dst] = i; @@ -645,13 +653,13 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISUB_M; ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.src != instr.dst) { ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; + ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } registerUsage[instr.dst] = i; @@ -685,13 +693,13 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::IMUL_M; ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.src != instr.dst) { ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; + ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } registerUsage[instr.dst] = i; @@ -711,13 +719,13 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::IMULH_M; ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.src != instr.dst) { ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; + ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } registerUsage[instr.dst] = i; @@ -737,13 +745,13 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::ISMULH_M; ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.src != instr.dst) { ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; + ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } registerUsage[instr.dst] = i; @@ -791,13 +799,13 @@ namespace RandomX { auto src = instr.src % RegistersCount; ibc.type = InstructionType::IXOR_M; ibc.idst = &r[dst]; + ibc.imm = signExtend2sCompl(instr.getImm32()); if (instr.src != instr.dst) { ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } else { - ibc.imm = instr.getImm32(); - ibc.isrc = &ibc.imm; + ibc.isrc = &Zero; ibc.memMask = ScratchpadL3Mask; } registerUsage[instr.dst] = i; @@ -871,7 +879,8 @@ namespace RandomX { ibc.type = InstructionType::FADD_M; ibc.fdst = &f[dst]; ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); } break; CASE_REP(FSUB_R) { @@ -888,7 +897,8 @@ namespace RandomX { ibc.type = InstructionType::FSUB_M; ibc.fdst = &f[dst]; ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); } break; CASE_REP(FSCAL_R) { @@ -911,7 +921,8 @@ namespace RandomX { ibc.type = InstructionType::FDIV_M; ibc.fdst = &e[dst]; ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); } break; CASE_REP(FSQRT_R) { @@ -926,12 +937,12 @@ namespace RandomX { ibc.type = InstructionType::COND_R; ibc.idst = &r[dst]; ibc.isrc = &r[src]; - ibc.condition = (instr.mod >> 2) & 7; + ibc.condition = instr.getModCond(); ibc.imm = instr.getImm32(); //jump condition int reg = getConditionRegister(registerUsage); ibc.target = registerUsage[reg]; - ibc.shift = (instr.mod >> 5); + ibc.shift = instr.getModShift3(); ibc.creg = &r[reg]; for (unsigned j = 0; j < 8; ++j) { //mark all registers as used registerUsage[j] = i; @@ -944,13 +955,13 @@ namespace RandomX { ibc.type = InstructionType::COND_M; ibc.idst = &r[dst]; ibc.isrc = &r[src]; - ibc.condition = (instr.mod >> 2) & 7; + ibc.condition = instr.getModCond(); ibc.imm = instr.getImm32(); - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); //jump condition int reg = getConditionRegister(registerUsage); ibc.target = registerUsage[reg]; - ibc.shift = (instr.mod >> 5); + ibc.shift = instr.getModShift3(); ibc.creg = &r[reg]; for (unsigned j = 0; j < 8; ++j) { //mark all registers as used registerUsage[j] = i; @@ -970,7 +981,11 @@ namespace RandomX { ibc.type = InstructionType::ISTORE; ibc.idst = &r[dst]; ibc.isrc = &r[src]; - ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + ibc.imm = signExtend2sCompl(instr.getImm32()); + if (instr.getModCond()) + ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); + else + ibc.memMask = ScratchpadL3Mask; } break; CASE_REP(NOP) { diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index 3632112..1dcc441 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -132,5 +132,6 @@ namespace RandomX { void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); + void* getScratchpadAddress(InstructionByteCode& ibc); }; } \ No newline at end of file diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 7d17ef2..0ad7350 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -182,7 +182,7 @@ namespace RandomX { static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xF8, 0x0F, 0xAE, 0x54, 0x24, 0xF8 }; static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; - static const uint8_t XOR_RCX_RCX[] = { 0x48, 0x33, 0xC9 }; + static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 }; static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 }; @@ -202,6 +202,7 @@ namespace RandomX { static const uint8_t JZ[] = { 0x0f, 0x84 }; static const uint8_t RET = 0xc3; static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; + static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; static const uint8_t NOP1[] = { 0x90 }; static const uint8_t NOP2[] = { 0x66, 0x90 }; @@ -360,7 +361,7 @@ namespace RandomX { case RandomX::SuperscalarInstructionType::IADD_RS: emit(REX_LEA); emitByte(0x04 + 8 * instr.dst); - genSIB(instr.mod % 4, instr.src, instr.dst); + genSIB(instr.getModShift2(), instr.src, instr.dst); break; case RandomX::SuperscalarInstructionType::IMUL_R: emit(REX_IMUL_RR); @@ -445,7 +446,7 @@ namespace RandomX { emitByte(AND_EAX_I); else emit(AND_ECX_I); - emit32((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); + emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); } void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) { @@ -456,9 +457,14 @@ namespace RandomX { } emit32(instr.getImm32()); emitByte(AND_EAX_I); - int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; - int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; - emit32((instr.mod % 4) ? maskL1 : maskL2); + if (instr.getModCond()) { + int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; + int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; + emit32(instr.getModMem() ? maskL1 : maskL2); + } + else { + emit32(ScratchpadL3Mask); + } } void JitCompilerX86::genAddressImm(Instruction& instr) { @@ -485,7 +491,7 @@ namespace RandomX { emitByte(0xac); else emitByte(0x04 + 8 * instr.dst); - genSIB(instr.mod % 4, instr.src, instr.dst); + genSIB(instr.getModShift2(), instr.src, instr.dst); if (instr.dst == RegisterNeedsDisplacement) emit32(instr.getImm32()); } @@ -880,7 +886,7 @@ namespace RandomX { } static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { - switch (((instr.mod >> 2) & 7) ^ invert) + switch (instr.getModCond() ^ invert) { case 0: return 0x76; //jbe @@ -902,7 +908,7 @@ namespace RandomX { } static inline uint8_t condition(Instruction& instr) { - switch ((instr.mod >> 2) & 7) + switch (instr.getModCond()) { case 0: return 0x96; //setbe @@ -938,7 +944,7 @@ namespace RandomX { } void JitCompilerX86::handleCondition(Instruction& instr, int i) { - const int shift = (instr.mod >> 5); + const int shift = instr.getModShift3(); const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; int reg = getConditionRegister(); int target = registerUsage[reg] + 1; @@ -973,7 +979,7 @@ namespace RandomX { emit(NOP3); return; } - emit(XOR_RCX_RCX); + emit(XOR_ECX_ECX); emit(REX_CMP_R32I); emitByte(0xf8 + instr.src); emit32(instr.getImm32()); @@ -988,7 +994,7 @@ namespace RandomX { #ifdef RANDOMX_JUMP handleCondition(instr, i); #endif - emit(XOR_RCX_RCX); + emit(XOR_ECX_ECX); genAddressReg(instr); emit(REX_CMP_M32I); emit32(instr.getImm32()); @@ -1001,7 +1007,10 @@ namespace RandomX { void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { genAddressRegDst(instr); - emit(REX_MOV_MR); + //if (instr.getModCond()) + emit(REX_MOV_MR); + //else + // emit(MOVNTI); emitByte(0x04 + 8 * instr.src); emitByte(0x06); } diff --git a/src/configuration.h b/src/configuration.h index 80cf0c4..a266cb9 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -92,8 +92,8 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_ISMULH_M 1 #define RANDOMX_FREQ_IMUL_RCP 8 #define RANDOMX_FREQ_INEG_R 2 -#define RANDOMX_FREQ_IXOR_R 16 -#define RANDOMX_FREQ_IXOR_M 4 +#define RANDOMX_FREQ_IXOR_R 15 +#define RANDOMX_FREQ_IXOR_M 5 #define RANDOMX_FREQ_IROR_R 10 #define RANDOMX_FREQ_IROL_R 0 #define RANDOMX_FREQ_ISWAP_R 4 @@ -108,8 +108,8 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_FDIV_M 4 #define RANDOMX_FREQ_FSQRT_R 6 -#define RANDOMX_FREQ_COND_R 7 -#define RANDOMX_FREQ_COND_M 1 +#define RANDOMX_FREQ_COND_R 8 +#define RANDOMX_FREQ_COND_M 0 #define RANDOMX_FREQ_CFROUND 1 #define RANDOMX_FREQ_ISTORE 16 diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp index 8c9f566..a95a464 100644 --- a/src/instructionWeights.hpp +++ b/src/instructionWeights.hpp @@ -98,6 +98,7 @@ along with RandomX. If not, see. #define REPCASE32(x) REPCASE31(x) case __COUNTER__: #define REPCASE64(x) REPCASE32(x) REPCASE32(x) #define REPCASE128(x) REPCASE64(x) REPCASE64(x) +#define REPCASE256(x) REPCASE128(x) REPCASE128(x) #define REPCASENX(x,N) REPCASE##N(x) #define REPCASEN(x,N) REPCASENX(x,N) #define CASE_REP(x) REPCASEN(x, WT(x)) \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 2582d0f..6b8aa65 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -176,6 +176,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi store32(noncePtr, nonce); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); + //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt"); vm->resetRoundingMode(); vm->setScratchpad(scratchpad); for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { @@ -194,7 +195,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi } }*/ vm->getResult(scratchpad, RANDOMX_SCRATCHPAD_L3, hash); - //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad.txt"); + //dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-after.txt"); result.xorWith(hash); if (RandomX::trace) { std::cout << "Nonce: " << nonce << " "; diff --git a/src/program.inc b/src/program.inc index f3a36b8..cced195 100644 --- a/src/program.inc +++ b/src/program.inc @@ -56,7 +56,7 @@ randomx_isn_14: sqrtpd xmm6, xmm6 randomx_isn_15: ; IADD_RS r6, r2, LSH 1 - lea r14, [r14+r10*2] + lea r14, [r14+r10*8] randomx_isn_16: ; FSUB_M f2, L1[r1-1890725713] lea eax, [r9d-1890725713] @@ -68,9 +68,9 @@ randomx_isn_17: mov ecx, r11d ror r12, cl randomx_isn_18: - ; ISTORE L1[r4+1297827817], r4 + ; ISTORE L3[r4+1297827817], r4 lea eax, [r12d+1297827817] - and eax, 16376 + and eax, 2097144 mov qword ptr [rsi+rax], r12 randomx_isn_19: ; FMUL_R e1, a2 @@ -145,7 +145,7 @@ randomx_isn_35: imul r14, 835132161 randomx_isn_36: ; IADD_RS r3, r4, LSH 2 - lea r11, [r11+r12*4] + lea r11, [r11+r12*2] randomx_isn_37: ; ISUB_M r6, L2[r4+1885029796] lea eax, [r12d+1885029796] @@ -173,13 +173,13 @@ randomx_isn_44: ; FADD_R f1, a2 addpd xmm1, xmm10 randomx_isn_45: - ; ISTORE L1[r0+1805562386], r5 + ; ISTORE L3[r0+1805562386], r5 lea eax, [r8d+1805562386] - and eax, 16376 + and eax, 2097144 mov qword ptr [rsi+rax], r13 randomx_isn_46: ; IADD_RS r0, r7, LSH 0 - lea r8, [r8+r15*1] + lea r8, [r8+r15*8] randomx_isn_47: ; IXOR_R r5, r2 xor r13, r10 @@ -227,7 +227,7 @@ randomx_isn_57: imul r13, r9 randomx_isn_58: ; IADD_RS r5, r1, -999103579, LSH 0 - lea r13, [r13+r9*1-999103579] + lea r13, [r13+r9*8-999103579] randomx_isn_59: ; FMUL_R e2, a2 mulpd xmm6, xmm10 @@ -237,7 +237,7 @@ randomx_isn_60: ror r10, cl randomx_isn_61: ; IADD_RS r0, r3, LSH 1 - lea r8, [r8+r11*2] + lea r8, [r8+r11*1] randomx_isn_62: ; FSQRT_R e3 sqrtpd xmm7, xmm7 @@ -260,7 +260,7 @@ randomx_isn_66: sub r12, 841292629 randomx_isn_67: ; IADD_RS r4, r6, LSH 2 - lea r12, [r12+r14*4] + lea r12, [r12+r14*1] randomx_isn_68: ; FSUB_M f3, L1[r4+613549729] lea eax, [r12d+613549729] @@ -315,9 +315,9 @@ randomx_isn_79: ; IADD_RS r3, r1, LSH 1 lea r11, [r11+r9*2] randomx_isn_80: - ; ISTORE L1[r2+1885666804], r4 + ; ISTORE L3[r2+1885666804], r4 lea eax, [r10d+1885666804] - and eax, 16376 + and eax, 2097144 mov qword ptr [rsi+rax], r12 randomx_isn_81: ; IMULH_R r3, r0 @@ -348,14 +348,12 @@ randomx_isn_88: ; IMUL_R r1, r3 imul r9, r11 randomx_isn_89: - ; COND_M r2, no(L1[r0-122257389], -122257389), LSH 6 + ; COND_R r2, no(r0, -122257389), LSH 6 add r8, 64 test r8, 8128 jz randomx_isn_75 xor rcx, rcx - lea eax, [r8d-122257389] - and eax, 16376 - cmp dword ptr [rsi+rax], -122257389 + cmp r8d, -122257389 setno cl add r10, rcx randomx_isn_90: @@ -429,7 +427,7 @@ randomx_isn_107: mov r14, rdx randomx_isn_108: ; IADD_RS r7, r0, LSH 1 - lea r15, [r15+r8*2] + lea r15, [r15+r8*4] randomx_isn_109: ; IMUL_R r6, r5 imul r14, r13 @@ -444,13 +442,13 @@ randomx_isn_111: addpd xmm2, xmm12 randomx_isn_112: ; IADD_RS r0, r3, LSH 0 - lea r8, [r8+r11*1] + lea r8, [r8+r11*2] randomx_isn_113: ; IADD_RS r3, r4, LSH 1 lea r11, [r11+r12*2] randomx_isn_114: ; IADD_RS r2, r4, LSH 2 - lea r10, [r10+r12*4] + lea r10, [r10+r12*8] randomx_isn_115: ; IMUL_M r7, L1[r2-106928748] lea eax, [r10d-106928748] @@ -464,7 +462,7 @@ randomx_isn_117: subpd xmm2, xmm10 randomx_isn_118: ; IADD_RS r2, r2, LSH 0 - lea r10, [r10+r10*1] + lea r10, [r10+r10*2] randomx_isn_119: ; ISUB_R r7, -342152774 sub r15, -342152774 @@ -473,7 +471,7 @@ randomx_isn_120: lea r12, [r12+r9*2] randomx_isn_121: ; IADD_RS r4, r7, LSH 2 - lea r12, [r12+r15*4] + lea r12, [r12+r15*1] randomx_isn_122: ; FSUB_R f0, a1 subpd xmm0, xmm9 @@ -504,7 +502,7 @@ randomx_isn_128: subpd xmm3, xmm9 randomx_isn_129: ; IADD_RS r1, r2, LSH 2 - lea r9, [r9+r10*4] + lea r9, [r9+r10*2] randomx_isn_130: ; FSUB_R f1, a1 subpd xmm1, xmm9 @@ -531,7 +529,7 @@ randomx_isn_136: sub r11, r14 randomx_isn_137: ; IADD_RS r4, r1, LSH 0 - lea r12, [r12+r9*1] + lea r12, [r12+r9*8] randomx_isn_138: ; ISTORE L1[r0+56684410], r0 lea eax, [r8d+56684410] @@ -573,10 +571,10 @@ randomx_isn_145: sub r13, r11 randomx_isn_146: ; IADD_RS r0, r3, LSH 1 - lea r8, [r8+r11*2] + lea r8, [r8+r11*4] randomx_isn_147: ; IADD_RS r1, r3, LSH 1 - lea r9, [r9+r11*2] + lea r9, [r9+r11*1] randomx_isn_148: ; FSQRT_R e1 sqrtpd xmm5, xmm5 @@ -624,7 +622,7 @@ randomx_isn_158: mov qword ptr [rsi+rax], r12 randomx_isn_159: ; IADD_RS r7, r2, LSH 3 - lea r15, [r15+r10*8] + lea r15, [r15+r10*4] randomx_isn_160: ; IMUL_RCP r7, 2040763167 mov rax, 9705702723791900149 @@ -716,7 +714,7 @@ randomx_isn_182: mulpd xmm6, xmm10 randomx_isn_183: ; IADD_RS r6, r2, LSH 0 - lea r14, [r14+r10*1] + lea r14, [r14+r10*8] randomx_isn_184: ; FADD_R f2, a3 addpd xmm2, xmm11 @@ -728,7 +726,7 @@ randomx_isn_186: xorps xmm3, xmm15 randomx_isn_187: ; IADD_RS r6, r6, LSH 3 - lea r14, [r14+r14*8] + lea r14, [r14+r14*4] randomx_isn_188: ; FSCAL_R f2 xorps xmm2, xmm15 @@ -781,7 +779,7 @@ randomx_isn_199: subpd xmm3, xmm11 randomx_isn_200: ; IADD_RS r2, r5, LSH 2 - lea r10, [r10+r13*4] + lea r10, [r10+r13*1] randomx_isn_201: ; ISUB_M r6, L2[r3+376384700] lea eax, [r11d+376384700] @@ -811,7 +809,7 @@ randomx_isn_207: xorps xmm1, xmm15 randomx_isn_208: ; IADD_RS r6, r3, LSH 1 - lea r14, [r14+r11*2] + lea r14, [r14+r11*1] randomx_isn_209: ; FSUB_M f0, L1[r4-557177119] lea eax, [r12d-557177119] @@ -874,7 +872,7 @@ randomx_isn_223: xorps xmm2, xmm15 randomx_isn_224: ; IADD_RS r5, r4, 312567979, LSH 1 - lea r13, [r13+r12*2+312567979] + lea r13, [r13+r12*4+312567979] randomx_isn_225: ; ISTORE L2[r2+260885699], r1 lea eax, [r10d+260885699] @@ -899,7 +897,7 @@ randomx_isn_229: xchg r8, r14 randomx_isn_230: ; IADD_RS r2, r7, LSH 2 - lea r10, [r10+r15*4] + lea r10, [r10+r15*1] randomx_isn_231: ; FMUL_R e1, a0 mulpd xmm5, xmm8 @@ -925,7 +923,7 @@ randomx_isn_237: subpd xmm1, xmm11 randomx_isn_238: ; IADD_RS r4, r2, LSH 1 - lea r12, [r12+r10*2] + lea r12, [r12+r10*4] randomx_isn_239: ; IMUL_RCP r7, 3065786637 mov rax, 12921343181238534701 @@ -958,14 +956,12 @@ randomx_isn_246: and eax, 262136 sub r15, qword ptr [rsi+rax] randomx_isn_247: - ; COND_M r2, be(L1[r5-8545330], -8545330), LSH 2 + ; COND_R r2, be(r5, -8545330), LSH 2 add r9, 4 test r9, 508 jz randomx_isn_223 xor rcx, rcx - lea eax, [r13d-8545330] - and eax, 16376 - cmp dword ptr [rsi+rax], -8545330 + cmp r13d, -8545330 setbe cl add r10, rcx randomx_isn_248: @@ -981,13 +977,13 @@ randomx_isn_250: addpd xmm3, xmm8 randomx_isn_251: ; IADD_RS r0, r0, LSH 0 - lea r8, [r8+r8*1] + lea r8, [r8+r8*4] randomx_isn_252: ; ISUB_R r4, r2 sub r12, r10 randomx_isn_253: ; IADD_RS r5, r4, 256175395, LSH 0 - lea r13, [r13+r12*1+256175395] + lea r13, [r13+r12*4+256175395] randomx_isn_254: ; IADD_RS r6, r7, LSH 2 lea r14, [r14+r15*4] diff --git a/src/superscalarGenerator.cpp b/src/superscalarGenerator.cpp index e6420d1..8184045 100644 --- a/src/superscalarGenerator.cpp +++ b/src/superscalarGenerator.cpp @@ -348,7 +348,7 @@ namespace RandomX { instr.opcode = getType(); instr.dst = dst_; instr.src = src_ >= 0 ? src_ : dst_; - instr.mod = mod_; + instr.setMod(mod_); instr.setImm32(imm32_); }