Changed IADD_RS to use mod.mem

This commit is contained in:
tevador 2019-04-27 23:52:26 +02:00
parent ff88a57a98
commit fd7186f873
8 changed files with 43 additions and 47 deletions

View file

@ -56,7 +56,7 @@ randomx_isn_14:
sqrtpd xmm6, xmm6 sqrtpd xmm6, xmm6
randomx_isn_15: randomx_isn_15:
; IADD_RS r6, r2, LSH 1 ; IADD_RS r6, r2, LSH 1
lea r14, [r14+r10*8] lea r14, [r14+r10*2]
randomx_isn_16: randomx_isn_16:
; FSUB_M f2, L1[r1-1890725713] ; FSUB_M f2, L1[r1-1890725713]
lea eax, [r9d-1890725713] lea eax, [r9d-1890725713]
@ -145,7 +145,7 @@ randomx_isn_35:
imul r14, 835132161 imul r14, 835132161
randomx_isn_36: randomx_isn_36:
; IADD_RS r3, r4, LSH 2 ; IADD_RS r3, r4, LSH 2
lea r11, [r11+r12*2] lea r11, [r11+r12*4]
randomx_isn_37: randomx_isn_37:
; ISUB_M r6, L2[r4+1885029796] ; ISUB_M r6, L2[r4+1885029796]
lea eax, [r12d+1885029796] lea eax, [r12d+1885029796]
@ -179,7 +179,7 @@ randomx_isn_45:
mov qword ptr [rsi+rax], r13 mov qword ptr [rsi+rax], r13
randomx_isn_46: randomx_isn_46:
; IADD_RS r0, r7, LSH 0 ; IADD_RS r0, r7, LSH 0
lea r8, [r8+r15*8] lea r8, [r8+r15*1]
randomx_isn_47: randomx_isn_47:
; IXOR_R r5, r2 ; IXOR_R r5, r2
xor r13, r10 xor r13, r10
@ -227,7 +227,7 @@ randomx_isn_57:
imul r13, r9 imul r13, r9
randomx_isn_58: randomx_isn_58:
; IADD_RS r5, r1, -999103579, LSH 0 ; IADD_RS r5, r1, -999103579, LSH 0
lea r13, [r13+r9*8-999103579] lea r13, [r13+r9*1-999103579]
randomx_isn_59: randomx_isn_59:
; FMUL_R e2, a2 ; FMUL_R e2, a2
mulpd xmm6, xmm10 mulpd xmm6, xmm10
@ -237,7 +237,7 @@ randomx_isn_60:
ror r10, cl ror r10, cl
randomx_isn_61: randomx_isn_61:
; IADD_RS r0, r3, LSH 1 ; IADD_RS r0, r3, LSH 1
lea r8, [r8+r11*1] lea r8, [r8+r11*2]
randomx_isn_62: randomx_isn_62:
; FSQRT_R e3 ; FSQRT_R e3
sqrtpd xmm7, xmm7 sqrtpd xmm7, xmm7
@ -260,7 +260,7 @@ randomx_isn_66:
sub r12, 841292629 sub r12, 841292629
randomx_isn_67: randomx_isn_67:
; IADD_RS r4, r6, LSH 2 ; IADD_RS r4, r6, LSH 2
lea r12, [r12+r14*1] lea r12, [r12+r14*4]
randomx_isn_68: randomx_isn_68:
; FSUB_M f3, L1[r4+613549729] ; FSUB_M f3, L1[r4+613549729]
lea eax, [r12d+613549729] lea eax, [r12d+613549729]
@ -427,7 +427,7 @@ randomx_isn_107:
mov r14, rdx mov r14, rdx
randomx_isn_108: randomx_isn_108:
; IADD_RS r7, r0, LSH 1 ; IADD_RS r7, r0, LSH 1
lea r15, [r15+r8*4] lea r15, [r15+r8*2]
randomx_isn_109: randomx_isn_109:
; IMUL_R r6, r5 ; IMUL_R r6, r5
imul r14, r13 imul r14, r13
@ -442,13 +442,13 @@ randomx_isn_111:
addpd xmm2, xmm12 addpd xmm2, xmm12
randomx_isn_112: randomx_isn_112:
; IADD_RS r0, r3, LSH 0 ; IADD_RS r0, r3, LSH 0
lea r8, [r8+r11*2] lea r8, [r8+r11*1]
randomx_isn_113: randomx_isn_113:
; IADD_RS r3, r4, LSH 1 ; IADD_RS r3, r4, LSH 1
lea r11, [r11+r12*2] lea r11, [r11+r12*2]
randomx_isn_114: randomx_isn_114:
; IADD_RS r2, r4, LSH 2 ; IADD_RS r2, r4, LSH 2
lea r10, [r10+r12*8] lea r10, [r10+r12*4]
randomx_isn_115: randomx_isn_115:
; IMUL_M r7, L1[r2-106928748] ; IMUL_M r7, L1[r2-106928748]
lea eax, [r10d-106928748] lea eax, [r10d-106928748]
@ -462,7 +462,7 @@ randomx_isn_117:
subpd xmm2, xmm10 subpd xmm2, xmm10
randomx_isn_118: randomx_isn_118:
; IADD_RS r2, r2, LSH 0 ; IADD_RS r2, r2, LSH 0
lea r10, [r10+r10*2] lea r10, [r10+r10*1]
randomx_isn_119: randomx_isn_119:
; ISUB_R r7, -342152774 ; ISUB_R r7, -342152774
sub r15, -342152774 sub r15, -342152774
@ -471,7 +471,7 @@ randomx_isn_120:
lea r12, [r12+r9*2] lea r12, [r12+r9*2]
randomx_isn_121: randomx_isn_121:
; IADD_RS r4, r7, LSH 2 ; IADD_RS r4, r7, LSH 2
lea r12, [r12+r15*1] lea r12, [r12+r15*4]
randomx_isn_122: randomx_isn_122:
; FSUB_R f0, a1 ; FSUB_R f0, a1
subpd xmm0, xmm9 subpd xmm0, xmm9
@ -502,7 +502,7 @@ randomx_isn_128:
subpd xmm3, xmm9 subpd xmm3, xmm9
randomx_isn_129: randomx_isn_129:
; IADD_RS r1, r2, LSH 2 ; IADD_RS r1, r2, LSH 2
lea r9, [r9+r10*2] lea r9, [r9+r10*4]
randomx_isn_130: randomx_isn_130:
; FSUB_R f1, a1 ; FSUB_R f1, a1
subpd xmm1, xmm9 subpd xmm1, xmm9
@ -529,7 +529,7 @@ randomx_isn_136:
sub r11, r14 sub r11, r14
randomx_isn_137: randomx_isn_137:
; IADD_RS r4, r1, LSH 0 ; IADD_RS r4, r1, LSH 0
lea r12, [r12+r9*8] lea r12, [r12+r9*1]
randomx_isn_138: randomx_isn_138:
; ISTORE L1[r0+56684410], r0 ; ISTORE L1[r0+56684410], r0
lea eax, [r8d+56684410] lea eax, [r8d+56684410]
@ -571,10 +571,10 @@ randomx_isn_145:
sub r13, r11 sub r13, r11
randomx_isn_146: randomx_isn_146:
; IADD_RS r0, r3, LSH 1 ; IADD_RS r0, r3, LSH 1
lea r8, [r8+r11*4] lea r8, [r8+r11*2]
randomx_isn_147: randomx_isn_147:
; IADD_RS r1, r3, LSH 1 ; IADD_RS r1, r3, LSH 1
lea r9, [r9+r11*1] lea r9, [r9+r11*2]
randomx_isn_148: randomx_isn_148:
; FSQRT_R e1 ; FSQRT_R e1
sqrtpd xmm5, xmm5 sqrtpd xmm5, xmm5
@ -623,7 +623,7 @@ randomx_isn_158:
mov qword ptr [rsi+rax], r12 mov qword ptr [rsi+rax], r12
randomx_isn_159: randomx_isn_159:
; IADD_RS r7, r2, LSH 3 ; IADD_RS r7, r2, LSH 3
lea r15, [r15+r10*4] lea r15, [r15+r10*8]
randomx_isn_160: randomx_isn_160:
; IMUL_RCP r7, 2040763167 ; IMUL_RCP r7, 2040763167
mov rax, 9705702723791900149 mov rax, 9705702723791900149
@ -715,7 +715,7 @@ randomx_isn_182:
mulpd xmm6, xmm10 mulpd xmm6, xmm10
randomx_isn_183: randomx_isn_183:
; IADD_RS r6, r2, LSH 0 ; IADD_RS r6, r2, LSH 0
lea r14, [r14+r10*8] lea r14, [r14+r10*1]
randomx_isn_184: randomx_isn_184:
; FADD_R f2, a3 ; FADD_R f2, a3
addpd xmm2, xmm11 addpd xmm2, xmm11
@ -727,7 +727,7 @@ randomx_isn_186:
xorps xmm3, xmm15 xorps xmm3, xmm15
randomx_isn_187: randomx_isn_187:
; IADD_RS r6, r6, LSH 3 ; IADD_RS r6, r6, LSH 3
lea r14, [r14+r14*4] lea r14, [r14+r14*8]
randomx_isn_188: randomx_isn_188:
; FSCAL_R f2 ; FSCAL_R f2
xorps xmm2, xmm15 xorps xmm2, xmm15
@ -780,7 +780,7 @@ randomx_isn_199:
subpd xmm3, xmm11 subpd xmm3, xmm11
randomx_isn_200: randomx_isn_200:
; IADD_RS r2, r5, LSH 2 ; IADD_RS r2, r5, LSH 2
lea r10, [r10+r13*1] lea r10, [r10+r13*4]
randomx_isn_201: randomx_isn_201:
; ISUB_M r6, L2[r3+376384700] ; ISUB_M r6, L2[r3+376384700]
lea eax, [r11d+376384700] lea eax, [r11d+376384700]
@ -810,7 +810,7 @@ randomx_isn_207:
xorps xmm1, xmm15 xorps xmm1, xmm15
randomx_isn_208: randomx_isn_208:
; IADD_RS r6, r3, LSH 1 ; IADD_RS r6, r3, LSH 1
lea r14, [r14+r11*1] lea r14, [r14+r11*2]
randomx_isn_209: randomx_isn_209:
; FSUB_M f0, L1[r4-557177119] ; FSUB_M f0, L1[r4-557177119]
lea eax, [r12d-557177119] lea eax, [r12d-557177119]
@ -873,7 +873,7 @@ randomx_isn_223:
xorps xmm2, xmm15 xorps xmm2, xmm15
randomx_isn_224: randomx_isn_224:
; IADD_RS r5, r4, 312567979, LSH 1 ; IADD_RS r5, r4, 312567979, LSH 1
lea r13, [r13+r12*4+312567979] lea r13, [r13+r12*2+312567979]
randomx_isn_225: randomx_isn_225:
; ISTORE L2[r2+260885699], r1 ; ISTORE L2[r2+260885699], r1
lea eax, [r10d+260885699] lea eax, [r10d+260885699]
@ -898,7 +898,7 @@ randomx_isn_229:
xchg r8, r14 xchg r8, r14
randomx_isn_230: randomx_isn_230:
; IADD_RS r2, r7, LSH 2 ; IADD_RS r2, r7, LSH 2
lea r10, [r10+r15*1] lea r10, [r10+r15*4]
randomx_isn_231: randomx_isn_231:
; FMUL_R e1, a0 ; FMUL_R e1, a0
mulpd xmm5, xmm8 mulpd xmm5, xmm8
@ -924,7 +924,7 @@ randomx_isn_237:
subpd xmm1, xmm11 subpd xmm1, xmm11
randomx_isn_238: randomx_isn_238:
; IADD_RS r4, r2, LSH 1 ; IADD_RS r4, r2, LSH 1
lea r12, [r12+r10*4] lea r12, [r12+r10*2]
randomx_isn_239: randomx_isn_239:
; IMUL_RCP r7, 3065786637 ; IMUL_RCP r7, 3065786637
mov rax, 12921343181238534701 mov rax, 12921343181238534701
@ -978,13 +978,13 @@ randomx_isn_250:
addpd xmm3, xmm8 addpd xmm3, xmm8
randomx_isn_251: randomx_isn_251:
; IADD_RS r0, r0, LSH 0 ; IADD_RS r0, r0, LSH 0
lea r8, [r8+r8*4] lea r8, [r8+r8*1]
randomx_isn_252: randomx_isn_252:
; ISUB_R r4, r2 ; ISUB_R r4, r2
sub r12, r10 sub r12, r10
randomx_isn_253: randomx_isn_253:
; IADD_RS r5, r4, 256175395, LSH 0 ; IADD_RS r5, r4, 256175395, LSH 0
lea r13, [r13+r12*4+256175395] lea r13, [r13+r12*1+256175395]
randomx_isn_254: randomx_isn_254:
; IADD_RS r6, r7, LSH 2 ; IADD_RS r6, r7, LSH 2
lea r14, [r14+r15*4] lea r14, [r14+r15*4]

View file

@ -71,7 +71,7 @@ namespace randomx {
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
break; break;
case SuperscalarInstructionType::IADD_RS: case SuperscalarInstructionType::IADD_RS:
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << "]" << std::endl;
break; break;
case SuperscalarInstructionType::IMUL_R: case SuperscalarInstructionType::IMUL_R:
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
@ -180,7 +180,7 @@ namespace randomx {
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
break; break;
case SuperscalarInstructionType::IADD_RS: case SuperscalarInstructionType::IADD_RS:
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << ";" << std::endl; asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModMem())) << ";" << std::endl;
break; break;
case SuperscalarInstructionType::IMUL_R: case SuperscalarInstructionType::IMUL_R:
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
@ -275,9 +275,9 @@ namespace randomx {
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i; registerUsage[instr.dst] = i;
if(instr.dst == RegisterNeedsDisplacement) if(instr.dst == RegisterNeedsDisplacement)
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
else else
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModMem())) << "]" << std::endl;
traceint(instr); traceint(instr);
} }
@ -442,7 +442,6 @@ namespace randomx {
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
if (instr.getImm32() != 0) { if (instr.getImm32() != 0) {
registerUsage[instr.dst] = i; registerUsage[instr.dst] = i;
uint32_t divisor = instr.getImm32();
asmCode << "\tmov rax, " << randomx_reciprocal(instr.getImm32()) << std::endl; asmCode << "\tmov rax, " << randomx_reciprocal(instr.getImm32()) << std::endl;
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
traceint(instr); traceint(instr);
@ -566,7 +565,7 @@ namespace randomx {
} }
void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
const int shift = instr.getModShift3(); const int shift = instr.getModShift();
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
int reg = getConditionRegister(); int reg = getConditionRegister();
int target = registerUsage[reg] + 1; int target = registerUsage[reg] + 1;

View file

@ -31,7 +31,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Argon2d salt //Argon2d salt
#define RANDOMX_ARGON_SALT "RandomX\x03" #define RANDOMX_ARGON_SALT "RandomX\x03"
//Number of random Cache accesses per Dataset block. Minimum is 2. //Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8 #define RANDOMX_CACHE_ACCESSES 8
#define RANDOMX_SUPERSCALAR_LATENCY 170 #define RANDOMX_SUPERSCALAR_LATENCY 170

View file

@ -78,16 +78,13 @@ namespace randomx {
return os; return os;
} }
int getModMem() const { int getModMem() const {
return mod % 4; return mod % 4; //bits 0-1
} }
int getModCond() const { int getModCond() const {
return (mod >> 2) & 7; return (mod >> 2) % 8; //bits 2-4
} }
int getModShift3() const { int getModShift() const {
return mod >> 5; return mod >> 5; //bits 5-7
}
int getModShift2() const {
return mod >> 6;
} }
void setMod(uint8_t val) { void setMod(uint8_t val) {
mod = val; mod = val;

View file

@ -357,7 +357,7 @@ namespace randomx {
case randomx::SuperscalarInstructionType::IADD_RS: case randomx::SuperscalarInstructionType::IADD_RS:
emit(REX_LEA); emit(REX_LEA);
emitByte(0x04 + 8 * instr.dst); emitByte(0x04 + 8 * instr.dst);
genSIB(instr.getModShift2(), instr.src, instr.dst); genSIB(instr.getModMem(), instr.src, instr.dst);
break; break;
case randomx::SuperscalarInstructionType::IMUL_R: case randomx::SuperscalarInstructionType::IMUL_R:
emit(REX_IMUL_RR); emit(REX_IMUL_RR);
@ -481,7 +481,7 @@ namespace randomx {
emitByte(0xac); emitByte(0xac);
else else
emitByte(0x04 + 8 * instr.dst); emitByte(0x04 + 8 * instr.dst);
genSIB(instr.getModShift2(), instr.src, instr.dst); genSIB(instr.getModMem(), instr.src, instr.dst);
if (instr.dst == RegisterNeedsDisplacement) if (instr.dst == RegisterNeedsDisplacement)
emit32(instr.getImm32()); emit32(instr.getImm32());
} }
@ -882,7 +882,7 @@ namespace randomx {
} }
void JitCompilerX86::handleCondition(Instruction& instr, int i) { void JitCompilerX86::handleCondition(Instruction& instr, int i) {
const int shift = instr.getModShift3(); const int shift = instr.getModShift();
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
int reg = getConditionRegister(); int reg = getConditionRegister();
int target = registerUsage[reg] + 1; int target = registerUsage[reg] + 1;

View file

@ -849,7 +849,7 @@ namespace randomx {
r[instr.dst] ^= r[instr.src]; r[instr.dst] ^= r[instr.src];
break; break;
case randomx::SuperscalarInstructionType::IADD_RS: case randomx::SuperscalarInstructionType::IADD_RS:
r[instr.dst] += r[instr.src] << instr.getModShift2(); r[instr.dst] += r[instr.src] << instr.getModMem();
break; break;
case randomx::SuperscalarInstructionType::IMUL_R: case randomx::SuperscalarInstructionType::IMUL_R:
r[instr.dst] *= r[instr.src]; r[instr.dst] *= r[instr.src];

View file

@ -225,7 +225,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
result.print(std::cout); result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0) if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: 918a8bc3ce0e537eec9d3c5e1a8bb3204ae3954f14c50c14810b38e49588a9e0" << std::endl; std::cout << "Reference result: 89336a85bf6d1e83eb20fbc92170705ded9b42285b30178ed8e855d65c4c4b69" << std::endl;
if (!miningMode) { if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
} }

View file

@ -434,12 +434,12 @@ namespace randomx {
ibc.idst = &r[dst]; ibc.idst = &r[dst];
if (dst != RegisterNeedsDisplacement) { if (dst != RegisterNeedsDisplacement) {
ibc.isrc = &r[src]; ibc.isrc = &r[src];
ibc.shift = instr.getModShift2(); ibc.shift = instr.getModMem();
ibc.imm = 0; ibc.imm = 0;
} }
else { else {
ibc.isrc = &r[src]; ibc.isrc = &r[src];
ibc.shift = instr.getModShift2(); ibc.shift = instr.getModMem();
ibc.imm = signExtend2sCompl(instr.getImm32()); ibc.imm = signExtend2sCompl(instr.getImm32());
} }
registerUsage[instr.dst] = i; registerUsage[instr.dst] = i;
@ -763,7 +763,7 @@ namespace randomx {
//jump condition //jump condition
int reg = getConditionRegister(registerUsage); int reg = getConditionRegister(registerUsage);
ibc.target = registerUsage[reg]; ibc.target = registerUsage[reg];
ibc.shift = instr.getModShift3(); ibc.shift = instr.getModShift();
ibc.creg = &r[reg]; ibc.creg = &r[reg];
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
registerUsage[j] = i; registerUsage[j] = i;