Unique scratchpad addresses - interpreter

Additional writes to L3
This commit is contained in:
tevador 2019-04-16 18:58:44 +02:00
parent 33a2fd021d
commit 682000b1a9
11 changed files with 161 additions and 112 deletions

View file

@ -76,7 +76,7 @@ namespace RandomX {
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
break;
case RandomX::SuperscalarInstructionType::IADD_RS:
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl;
break;
case RandomX::SuperscalarInstructionType::IMUL_R:
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
@ -185,7 +185,7 @@ namespace RandomX {
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
break;
case RandomX::SuperscalarInstructionType::IADD_RS:
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << ";" << std::endl;
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << ";" << std::endl;
break;
case RandomX::SuperscalarInstructionType::IMUL_R:
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
@ -258,12 +258,19 @@ namespace RandomX {
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
}
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
int mask;
if (instr.getModCond()) {
mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask;
}
else {
mask = ScratchpadL3Mask;
}
asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl;
}
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
@ -274,9 +281,9 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if(instr.dst == RegisterNeedsDisplacement)
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
else
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl;
traceint(instr);
}
@ -607,7 +614,7 @@ namespace RandomX {
}
static inline const char* condition(Instruction& instr) {
switch ((instr.mod >> 2) & 7)
switch (instr.getModCond())
{
case 0:
return "be";
@ -631,7 +638,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
const int shift = (instr.mod >> 5);
const int shift = instr.getModShift3();
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
@ -647,7 +654,7 @@ namespace RandomX {
//4 uOPs
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
handleCondition(instr, i);
asmCode << "\txor rcx, rcx" << std::endl;
asmCode << "\txor ecx, ecx" << std::endl;
asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl;
@ -657,7 +664,7 @@ namespace RandomX {
//6 uOPs
void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) {
handleCondition(instr, i);
asmCode << "\txor rcx, rcx" << std::endl;
asmCode << "\txor ecx, ecx" << std::endl;
genAddressReg(instr);
asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl;
asmCode << "\tset" << condition(instr) << " cl" << std::endl;

View file

@ -33,7 +33,11 @@ namespace RandomX {
}
void Instruction::genAddressRegDst(std::ostream& os) const {
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
if (getModCond())
os << ((mod % 4) ? "L1" : "L2");
else
os << "L3";
os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressImm(std::ostream& os) const {

View file

@ -74,7 +74,6 @@ namespace RandomX {
uint8_t opcode;
uint8_t dst;
uint8_t src;
uint8_t mod;
uint32_t getImm32() const {
return load32(&imm32);
}
@ -88,7 +87,23 @@ namespace RandomX {
i.print(os);
return os;
}
int getModMem() const {
return mod % 4;
}
int getModCond() const {
return (mod >> 2) & 7;
}
int getModShift3() const {
return mod >> 5;
}
int getModShift2() const {
return mod >> 6;
}
void setMod(uint8_t val) {
mod = val;
}
private:
uint8_t mod;
uint32_t imm32;
void print(std::ostream&) const;
static const char* names[256];

View file

@ -45,6 +45,8 @@ constexpr bool fpuCheck = false;
namespace RandomX {
static int_reg_t Zero = 0;
template<bool superscalar>
void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
@ -108,6 +110,12 @@ namespace RandomX {
return std::fpclassify(x) == FP_SUBNORMAL;
}
template<bool superscalar>
FORCE_INLINE void* InterpretedVirtualMachine<superscalar>::getScratchpadAddress(InstructionByteCode& ibc) {
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
return scratchpad + addr;
}
template<bool superscalar>
FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic];
@ -120,7 +128,7 @@ namespace RandomX {
} break;
case InstructionType::IADD_M: {
*ibc.idst += load64(scratchpad + (*ibc.isrc & ibc.memMask));
*ibc.idst += load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IADD_RC: {
@ -132,7 +140,7 @@ namespace RandomX {
} break;
case InstructionType::ISUB_M: {
*ibc.idst -= load64(scratchpad + (*ibc.isrc & ibc.memMask));
*ibc.idst -= load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IMUL_9C: {
@ -144,7 +152,7 @@ namespace RandomX {
} break;
case InstructionType::IMUL_M: {
*ibc.idst *= load64(scratchpad + (*ibc.isrc & ibc.memMask));
*ibc.idst *= load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IMULH_R: {
@ -152,7 +160,7 @@ namespace RandomX {
} break;
case InstructionType::IMULH_M: {
*ibc.idst = mulh(*ibc.idst, load64(scratchpad + (*ibc.isrc & ibc.memMask)));
*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc)));
} break;
case InstructionType::ISMULH_R: {
@ -160,7 +168,7 @@ namespace RandomX {
} break;
case InstructionType::ISMULH_M: {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(scratchpad + (*ibc.isrc & ibc.memMask))));
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc))));
} break;
case InstructionType::INEG_R: {
@ -172,7 +180,7 @@ namespace RandomX {
} break;
case InstructionType::IXOR_M: {
*ibc.idst ^= load64(scratchpad + (*ibc.isrc & ibc.memMask));
*ibc.idst ^= load64(getScratchpadAddress(ibc));
} break;
case InstructionType::IROR_R: {
@ -198,7 +206,7 @@ namespace RandomX {
} break;
case InstructionType::FADD_M: {
__m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask));
__m128d fsrc = load_cvt_i32x2(getScratchpadAddress(ibc));
*ibc.fdst = _mm_add_pd(*ibc.fdst, fsrc);
} break;
@ -207,7 +215,7 @@ namespace RandomX {
} break;
case InstructionType::FSUB_M: {
__m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask));
__m128d fsrc = load_cvt_i32x2(getScratchpadAddress(ibc));
*ibc.fdst = _mm_sub_pd(*ibc.fdst, fsrc);
} break;
@ -221,7 +229,7 @@ namespace RandomX {
} break;
case InstructionType::FDIV_M: {
__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask)));
__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc)));
*ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc);
} break;
@ -262,7 +270,7 @@ namespace RandomX {
count_JUMP_not_taken++;
#endif
#endif
*ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0;
*ibc.idst += condition(ibc.condition, load64(getScratchpadAddress(ibc)), ibc.imm) ? 1 : 0;
} break;
case InstructionType::CFROUND: {
@ -270,7 +278,7 @@ namespace RandomX {
} break;
case InstructionType::ISTORE: {
store64(scratchpad + (*ibc.idst & ibc.memMask), *ibc.isrc);
store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
} break;
case InstructionType::NOP: {
@ -486,7 +494,7 @@ namespace RandomX {
r[instr.dst] ^= r[instr.src];
break;
case RandomX::SuperscalarInstructionType::IADD_RS:
r[instr.dst] += r[instr.src] << (instr.mod % 4);
r[instr.dst] += r[instr.src] << instr.getModShift2();
break;
case RandomX::SuperscalarInstructionType::IMUL_R:
r[instr.dst] *= r[instr.src];
@ -585,14 +593,14 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS;
ibc.idst = &r[dst];
if (dst != 5) {
if (dst != RegisterNeedsDisplacement) {
ibc.isrc = &r[src];
ibc.shift = instr.mod % 4;
ibc.shift = instr.getModShift2();
ibc.imm = 0;
}
else {
ibc.isrc = &r[src];
ibc.shift = instr.mod % 4;
ibc.shift = instr.getModShift2();
ibc.imm = signExtend2sCompl(instr.getImm32());
}
registerUsage[instr.dst] = i;
@ -603,13 +611,13 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
@ -645,13 +653,13 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
@ -685,13 +693,13 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
@ -711,13 +719,13 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
@ -737,13 +745,13 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
@ -791,13 +799,13 @@ namespace RandomX {
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.src != instr.dst) {
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[instr.dst] = i;
@ -871,7 +879,8 @@ namespace RandomX {
ibc.type = InstructionType::FADD_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
} break;
CASE_REP(FSUB_R) {
@ -888,7 +897,8 @@ namespace RandomX {
ibc.type = InstructionType::FSUB_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
} break;
CASE_REP(FSCAL_R) {
@ -911,7 +921,8 @@ namespace RandomX {
ibc.type = InstructionType::FDIV_M;
ibc.fdst = &e[dst];
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
} break;
CASE_REP(FSQRT_R) {
@ -926,12 +937,12 @@ namespace RandomX {
ibc.type = InstructionType::COND_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.condition = (instr.mod >> 2) & 7;
ibc.condition = instr.getModCond();
ibc.imm = instr.getImm32();
//jump condition
int reg = getConditionRegister(registerUsage);
ibc.target = registerUsage[reg];
ibc.shift = (instr.mod >> 5);
ibc.shift = instr.getModShift3();
ibc.creg = &r[reg];
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
registerUsage[j] = i;
@ -944,13 +955,13 @@ namespace RandomX {
ibc.type = InstructionType::COND_M;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.condition = (instr.mod >> 2) & 7;
ibc.condition = instr.getModCond();
ibc.imm = instr.getImm32();
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
//jump condition
int reg = getConditionRegister(registerUsage);
ibc.target = registerUsage[reg];
ibc.shift = (instr.mod >> 5);
ibc.shift = instr.getModShift3();
ibc.creg = &r[reg];
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
registerUsage[j] = i;
@ -970,7 +981,11 @@ namespace RandomX {
ibc.type = InstructionType::ISTORE;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.getModCond())
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
else
ibc.memMask = ScratchpadL3Mask;
} break;
CASE_REP(NOP) {

View file

@ -132,5 +132,6 @@ namespace RandomX {
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
void* getScratchpadAddress(InstructionByteCode& ibc);
};
}

View file

@ -182,7 +182,7 @@ namespace RandomX {
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xF8, 0x0F, 0xAE, 0x54, 0x24, 0xF8 };
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
static const uint8_t XOR_RCX_RCX[] = { 0x48, 0x33, 0xC9 };
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 };
static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 };
@ -202,6 +202,7 @@ namespace RandomX {
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
static const uint8_t NOP1[] = { 0x90 };
static const uint8_t NOP2[] = { 0x66, 0x90 };
@ -360,7 +361,7 @@ namespace RandomX {
case RandomX::SuperscalarInstructionType::IADD_RS:
emit(REX_LEA);
emitByte(0x04 + 8 * instr.dst);
genSIB(instr.mod % 4, instr.src, instr.dst);
genSIB(instr.getModShift2(), instr.src, instr.dst);
break;
case RandomX::SuperscalarInstructionType::IMUL_R:
emit(REX_IMUL_RR);
@ -445,7 +446,7 @@ namespace RandomX {
emitByte(AND_EAX_I);
else
emit(AND_ECX_I);
emit32((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) {
@ -456,9 +457,14 @@ namespace RandomX {
}
emit32(instr.getImm32());
emitByte(AND_EAX_I);
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
emit32((instr.mod % 4) ? maskL1 : maskL2);
if (instr.getModCond()) {
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
emit32(instr.getModMem() ? maskL1 : maskL2);
}
else {
emit32(ScratchpadL3Mask);
}
}
void JitCompilerX86::genAddressImm(Instruction& instr) {
@ -485,7 +491,7 @@ namespace RandomX {
emitByte(0xac);
else
emitByte(0x04 + 8 * instr.dst);
genSIB(instr.mod % 4, instr.src, instr.dst);
genSIB(instr.getModShift2(), instr.src, instr.dst);
if (instr.dst == RegisterNeedsDisplacement)
emit32(instr.getImm32());
}
@ -880,7 +886,7 @@ namespace RandomX {
}
static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
switch (((instr.mod >> 2) & 7) ^ invert)
switch (instr.getModCond() ^ invert)
{
case 0:
return 0x76; //jbe
@ -902,7 +908,7 @@ namespace RandomX {
}
static inline uint8_t condition(Instruction& instr) {
switch ((instr.mod >> 2) & 7)
switch (instr.getModCond())
{
case 0:
return 0x96; //setbe
@ -938,7 +944,7 @@ namespace RandomX {
}
void JitCompilerX86::handleCondition(Instruction& instr, int i) {
const int shift = (instr.mod >> 5);
const int shift = instr.getModShift3();
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
@ -973,7 +979,7 @@ namespace RandomX {
emit(NOP3);
return;
}
emit(XOR_RCX_RCX);
emit(XOR_ECX_ECX);
emit(REX_CMP_R32I);
emitByte(0xf8 + instr.src);
emit32(instr.getImm32());
@ -988,7 +994,7 @@ namespace RandomX {
#ifdef RANDOMX_JUMP
handleCondition(instr, i);
#endif
emit(XOR_RCX_RCX);
emit(XOR_ECX_ECX);
genAddressReg(instr);
emit(REX_CMP_M32I);
emit32(instr.getImm32());
@ -1001,7 +1007,10 @@ namespace RandomX {
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
emit(REX_MOV_MR);
//if (instr.getModCond())
emit(REX_MOV_MR);
//else
// emit(MOVNTI);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}

View file

@ -92,8 +92,8 @@ Total sum of frequencies must be 256
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 16
#define RANDOMX_FREQ_IXOR_M 4
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 10
#define RANDOMX_FREQ_IROL_R 0
#define RANDOMX_FREQ_ISWAP_R 4
@ -108,8 +108,8 @@ Total sum of frequencies must be 256
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
#define RANDOMX_FREQ_COND_R 7
#define RANDOMX_FREQ_COND_M 1
#define RANDOMX_FREQ_COND_R 8
#define RANDOMX_FREQ_COND_M 0
#define RANDOMX_FREQ_CFROUND 1
#define RANDOMX_FREQ_ISTORE 16

View file

@ -98,6 +98,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
#define REPCASE64(x) REPCASE32(x) REPCASE32(x)
#define REPCASE128(x) REPCASE64(x) REPCASE64(x)
#define REPCASE256(x) REPCASE128(x) REPCASE128(x)
#define REPCASENX(x,N) REPCASE##N(x)
#define REPCASEN(x,N) REPCASENX(x,N)
#define CASE_REP(x) REPCASEN(x, WT(x))

View file

@ -176,6 +176,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi
store32(noncePtr, nonce);
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
fillAes1Rx4<softAes>((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad);
//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt");
vm->resetRoundingMode();
vm->setScratchpad(scratchpad);
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
@ -194,7 +195,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi
}
}*/
vm->getResult<softAes>(scratchpad, RANDOMX_SCRATCHPAD_L3, hash);
//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad.txt");
//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-after.txt");
result.xorWith(hash);
if (RandomX::trace) {
std::cout << "Nonce: " << nonce << " ";

View file

@ -56,7 +56,7 @@ randomx_isn_14:
sqrtpd xmm6, xmm6
randomx_isn_15:
; IADD_RS r6, r2, LSH 1
lea r14, [r14+r10*2]
lea r14, [r14+r10*8]
randomx_isn_16:
; FSUB_M f2, L1[r1-1890725713]
lea eax, [r9d-1890725713]
@ -68,9 +68,9 @@ randomx_isn_17:
mov ecx, r11d
ror r12, cl
randomx_isn_18:
; ISTORE L1[r4+1297827817], r4
; ISTORE L3[r4+1297827817], r4
lea eax, [r12d+1297827817]
and eax, 16376
and eax, 2097144
mov qword ptr [rsi+rax], r12
randomx_isn_19:
; FMUL_R e1, a2
@ -145,7 +145,7 @@ randomx_isn_35:
imul r14, 835132161
randomx_isn_36:
; IADD_RS r3, r4, LSH 2
lea r11, [r11+r12*4]
lea r11, [r11+r12*2]
randomx_isn_37:
; ISUB_M r6, L2[r4+1885029796]
lea eax, [r12d+1885029796]
@ -173,13 +173,13 @@ randomx_isn_44:
; FADD_R f1, a2
addpd xmm1, xmm10
randomx_isn_45:
; ISTORE L1[r0+1805562386], r5
; ISTORE L3[r0+1805562386], r5
lea eax, [r8d+1805562386]
and eax, 16376
and eax, 2097144
mov qword ptr [rsi+rax], r13
randomx_isn_46:
; IADD_RS r0, r7, LSH 0
lea r8, [r8+r15*1]
lea r8, [r8+r15*8]
randomx_isn_47:
; IXOR_R r5, r2
xor r13, r10
@ -227,7 +227,7 @@ randomx_isn_57:
imul r13, r9
randomx_isn_58:
; IADD_RS r5, r1, -999103579, LSH 0
lea r13, [r13+r9*1-999103579]
lea r13, [r13+r9*8-999103579]
randomx_isn_59:
; FMUL_R e2, a2
mulpd xmm6, xmm10
@ -237,7 +237,7 @@ randomx_isn_60:
ror r10, cl
randomx_isn_61:
; IADD_RS r0, r3, LSH 1
lea r8, [r8+r11*2]
lea r8, [r8+r11*1]
randomx_isn_62:
; FSQRT_R e3
sqrtpd xmm7, xmm7
@ -260,7 +260,7 @@ randomx_isn_66:
sub r12, 841292629
randomx_isn_67:
; IADD_RS r4, r6, LSH 2
lea r12, [r12+r14*4]
lea r12, [r12+r14*1]
randomx_isn_68:
; FSUB_M f3, L1[r4+613549729]
lea eax, [r12d+613549729]
@ -315,9 +315,9 @@ randomx_isn_79:
; IADD_RS r3, r1, LSH 1
lea r11, [r11+r9*2]
randomx_isn_80:
; ISTORE L1[r2+1885666804], r4
; ISTORE L3[r2+1885666804], r4
lea eax, [r10d+1885666804]
and eax, 16376
and eax, 2097144
mov qword ptr [rsi+rax], r12
randomx_isn_81:
; IMULH_R r3, r0
@ -348,14 +348,12 @@ randomx_isn_88:
; IMUL_R r1, r3
imul r9, r11
randomx_isn_89:
; COND_M r2, no(L1[r0-122257389], -122257389), LSH 6
; COND_R r2, no(r0, -122257389), LSH 6
add r8, 64
test r8, 8128
jz randomx_isn_75
xor rcx, rcx
lea eax, [r8d-122257389]
and eax, 16376
cmp dword ptr [rsi+rax], -122257389
cmp r8d, -122257389
setno cl
add r10, rcx
randomx_isn_90:
@ -429,7 +427,7 @@ randomx_isn_107:
mov r14, rdx
randomx_isn_108:
; IADD_RS r7, r0, LSH 1
lea r15, [r15+r8*2]
lea r15, [r15+r8*4]
randomx_isn_109:
; IMUL_R r6, r5
imul r14, r13
@ -444,13 +442,13 @@ randomx_isn_111:
addpd xmm2, xmm12
randomx_isn_112:
; IADD_RS r0, r3, LSH 0
lea r8, [r8+r11*1]
lea r8, [r8+r11*2]
randomx_isn_113:
; IADD_RS r3, r4, LSH 1
lea r11, [r11+r12*2]
randomx_isn_114:
; IADD_RS r2, r4, LSH 2
lea r10, [r10+r12*4]
lea r10, [r10+r12*8]
randomx_isn_115:
; IMUL_M r7, L1[r2-106928748]
lea eax, [r10d-106928748]
@ -464,7 +462,7 @@ randomx_isn_117:
subpd xmm2, xmm10
randomx_isn_118:
; IADD_RS r2, r2, LSH 0
lea r10, [r10+r10*1]
lea r10, [r10+r10*2]
randomx_isn_119:
; ISUB_R r7, -342152774
sub r15, -342152774
@ -473,7 +471,7 @@ randomx_isn_120:
lea r12, [r12+r9*2]
randomx_isn_121:
; IADD_RS r4, r7, LSH 2
lea r12, [r12+r15*4]
lea r12, [r12+r15*1]
randomx_isn_122:
; FSUB_R f0, a1
subpd xmm0, xmm9
@ -504,7 +502,7 @@ randomx_isn_128:
subpd xmm3, xmm9
randomx_isn_129:
; IADD_RS r1, r2, LSH 2
lea r9, [r9+r10*4]
lea r9, [r9+r10*2]
randomx_isn_130:
; FSUB_R f1, a1
subpd xmm1, xmm9
@ -531,7 +529,7 @@ randomx_isn_136:
sub r11, r14
randomx_isn_137:
; IADD_RS r4, r1, LSH 0
lea r12, [r12+r9*1]
lea r12, [r12+r9*8]
randomx_isn_138:
; ISTORE L1[r0+56684410], r0
lea eax, [r8d+56684410]
@ -573,10 +571,10 @@ randomx_isn_145:
sub r13, r11
randomx_isn_146:
; IADD_RS r0, r3, LSH 1
lea r8, [r8+r11*2]
lea r8, [r8+r11*4]
randomx_isn_147:
; IADD_RS r1, r3, LSH 1
lea r9, [r9+r11*2]
lea r9, [r9+r11*1]
randomx_isn_148:
; FSQRT_R e1
sqrtpd xmm5, xmm5
@ -624,7 +622,7 @@ randomx_isn_158:
mov qword ptr [rsi+rax], r12
randomx_isn_159:
; IADD_RS r7, r2, LSH 3
lea r15, [r15+r10*8]
lea r15, [r15+r10*4]
randomx_isn_160:
; IMUL_RCP r7, 2040763167
mov rax, 9705702723791900149
@ -716,7 +714,7 @@ randomx_isn_182:
mulpd xmm6, xmm10
randomx_isn_183:
; IADD_RS r6, r2, LSH 0
lea r14, [r14+r10*1]
lea r14, [r14+r10*8]
randomx_isn_184:
; FADD_R f2, a3
addpd xmm2, xmm11
@ -728,7 +726,7 @@ randomx_isn_186:
xorps xmm3, xmm15
randomx_isn_187:
; IADD_RS r6, r6, LSH 3
lea r14, [r14+r14*8]
lea r14, [r14+r14*4]
randomx_isn_188:
; FSCAL_R f2
xorps xmm2, xmm15
@ -781,7 +779,7 @@ randomx_isn_199:
subpd xmm3, xmm11
randomx_isn_200:
; IADD_RS r2, r5, LSH 2
lea r10, [r10+r13*4]
lea r10, [r10+r13*1]
randomx_isn_201:
; ISUB_M r6, L2[r3+376384700]
lea eax, [r11d+376384700]
@ -811,7 +809,7 @@ randomx_isn_207:
xorps xmm1, xmm15
randomx_isn_208:
; IADD_RS r6, r3, LSH 1
lea r14, [r14+r11*2]
lea r14, [r14+r11*1]
randomx_isn_209:
; FSUB_M f0, L1[r4-557177119]
lea eax, [r12d-557177119]
@ -874,7 +872,7 @@ randomx_isn_223:
xorps xmm2, xmm15
randomx_isn_224:
; IADD_RS r5, r4, 312567979, LSH 1
lea r13, [r13+r12*2+312567979]
lea r13, [r13+r12*4+312567979]
randomx_isn_225:
; ISTORE L2[r2+260885699], r1
lea eax, [r10d+260885699]
@ -899,7 +897,7 @@ randomx_isn_229:
xchg r8, r14
randomx_isn_230:
; IADD_RS r2, r7, LSH 2
lea r10, [r10+r15*4]
lea r10, [r10+r15*1]
randomx_isn_231:
; FMUL_R e1, a0
mulpd xmm5, xmm8
@ -925,7 +923,7 @@ randomx_isn_237:
subpd xmm1, xmm11
randomx_isn_238:
; IADD_RS r4, r2, LSH 1
lea r12, [r12+r10*2]
lea r12, [r12+r10*4]
randomx_isn_239:
; IMUL_RCP r7, 3065786637
mov rax, 12921343181238534701
@ -958,14 +956,12 @@ randomx_isn_246:
and eax, 262136
sub r15, qword ptr [rsi+rax]
randomx_isn_247:
; COND_M r2, be(L1[r5-8545330], -8545330), LSH 2
; COND_R r2, be(r5, -8545330), LSH 2
add r9, 4
test r9, 508
jz randomx_isn_223
xor rcx, rcx
lea eax, [r13d-8545330]
and eax, 16376
cmp dword ptr [rsi+rax], -8545330
cmp r13d, -8545330
setbe cl
add r10, rcx
randomx_isn_248:
@ -981,13 +977,13 @@ randomx_isn_250:
addpd xmm3, xmm8
randomx_isn_251:
; IADD_RS r0, r0, LSH 0
lea r8, [r8+r8*1]
lea r8, [r8+r8*4]
randomx_isn_252:
; ISUB_R r4, r2
sub r12, r10
randomx_isn_253:
; IADD_RS r5, r4, 256175395, LSH 0
lea r13, [r13+r12*1+256175395]
lea r13, [r13+r12*4+256175395]
randomx_isn_254:
; IADD_RS r6, r7, LSH 2
lea r14, [r14+r15*4]

View file

@ -348,7 +348,7 @@ namespace RandomX {
instr.opcode = getType();
instr.dst = dst_;
instr.src = src_ >= 0 ? src_ : dst_;
instr.mod = mod_;
instr.setMod(mod_);
instr.setImm32(imm32_);
}