diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 27ec601..c20138e 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -58,6 +58,24 @@ namespace RandomX { } } + void AssemblyGeneratorX86::traceint(Instruction& instr) { + if (trace) { + asmCode << "\tpush " << regR[instr.dst] << std::endl; + } + } + + void AssemblyGeneratorX86::traceflt(Instruction& instr) { + if (trace) { + asmCode << "\tpush 0" << std::endl; + } + } + + void AssemblyGeneratorX86::tracenop(Instruction& instr) { + if (trace) { + asmCode << "\tpush 0" << std::endl; + } + } + void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) { asmCode << "\t; " << instr; auto generator = engine[instr.opcode]; @@ -86,6 +104,7 @@ namespace RandomX { else { asmCode << "\tadd " << regR[instr.dst] << ", " << (int32_t)instr.imm32 << std::endl; } + traceint(instr); } //2.75 uOP @@ -97,11 +116,13 @@ namespace RandomX { else { asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; } + traceint(instr); } //1 uOP void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) { asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << (int32_t)instr.imm32 << std::noshowpos << "]" << std::endl; + traceint(instr); } //1 uOP @@ -112,6 +133,7 @@ namespace RandomX { else { asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.imm32 << std::endl; } + traceint(instr); } //2.75 uOP @@ -123,11 +145,13 @@ namespace RandomX { else { asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; } + traceint(instr); } //1 uOP void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) { asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << (int32_t)instr.imm32 << std::noshowpos << "]" << std::endl; + traceint(instr); } //1 uOP @@ -138,6 +162,7 @@ namespace RandomX { else { asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.imm32 << std::endl; } + traceint(instr); } //2.75 uOP @@ -149,6 +174,7 @@ namespace RandomX { else { asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; } + traceint(instr); } //4 uOPs @@ -156,6 +182,7 @@ namespace RandomX { asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\tmul " << regR[instr.src] << std::endl; asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); } //5.75 uOPs @@ -170,6 +197,7 @@ namespace RandomX { asmCode << "\tmul qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; } asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); } //4 uOPs @@ -177,6 +205,7 @@ namespace RandomX { asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\timul " << regR[instr.src] << std::endl; asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); } //5.75 uOPs @@ -191,11 +220,13 @@ namespace RandomX { asmCode << "\timul qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; } asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); } //1 uOP void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { asmCode << "\tneg " << regR[instr.dst] << std::endl; + traceint(instr); } //1 uOP @@ -206,6 +237,7 @@ namespace RandomX { else { asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.imm32 << std::endl; } + traceint(instr); } //2.75 uOP @@ -217,6 +249,7 @@ namespace RandomX { else { asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; } + traceint(instr); } //1.75 uOPs @@ -228,6 +261,7 @@ namespace RandomX { else { asmCode << "\tror " << regR[instr.dst] << ", " << (instr.imm32 & 63) << std::endl; } + traceint(instr); } //1.75 uOPs @@ -239,6 +273,7 @@ namespace RandomX { else { asmCode << "\trol " << regR[instr.dst] << ", " << (instr.imm32 & 63) << std::endl; } + traceint(instr); } //~6 uOPs @@ -273,7 +308,11 @@ namespace RandomX { if(shift > 0) asmCode << "\tshr " << regR[instr.dst] << ", " << shift << std::endl; } - } + traceint(instr); + } + else { + tracenop(instr); + } } //~8.5 uOPs @@ -300,6 +339,7 @@ namespace RandomX { if (negative) asmCode << "\tneg rax" << std::endl; asmCode << "\tadd " << regR[instr.dst] << ", rax" << std::endl; + traceint(instr); } else if (divisor != 0) { magics_info mi = compute_signed_magic_info(divisor); @@ -325,6 +365,10 @@ namespace RandomX { asmCode << "\tsets al" << std::endl; asmCode << "\tadd rdx, rax" << std::endl; asmCode << "\tadd " << regR[instr.dst] << ", rdx" << std::endl; + traceint(instr); + } + else { + tracenop(instr); } } @@ -332,12 +376,17 @@ namespace RandomX { void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; + traceint(instr); + } + else { + tracenop(instr); } } //1 uOPs void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) { asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl; + traceflt(instr); } //1 uOP @@ -346,6 +395,7 @@ namespace RandomX { instr.src %= 4; asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; //asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl; + traceflt(instr); } //5 uOPs @@ -354,6 +404,7 @@ namespace RandomX { genAddressReg(instr); asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; asmCode << "\taddpd " << regF[instr.dst] << ", xmm12" << std::endl; + traceflt(instr); } //1 uOP @@ -362,6 +413,7 @@ namespace RandomX { instr.src %= 4; asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; //asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl; + traceflt(instr); } //5 uOPs @@ -370,12 +422,14 @@ namespace RandomX { genAddressReg(instr); asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; asmCode << "\tsubpd " << regF[instr.dst] << ", xmm12" << std::endl; + traceflt(instr); } //1 uOP void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { instr.dst %= 4; asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl; + traceflt(instr); } //1 uOPs @@ -383,6 +437,7 @@ namespace RandomX { instr.dst %= 4; instr.src %= 4; asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl; + traceflt(instr); } //7 uOPs @@ -393,6 +448,7 @@ namespace RandomX { asmCode << "\tandps xmm12, xmm14" << std::endl; asmCode << "\tmulpd " << regE[instr.dst] << ", xmm12" << std::endl; asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl; + traceflt(instr); } //2 uOPs @@ -401,6 +457,7 @@ namespace RandomX { instr.src %= 4; asmCode << "\tdivpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl; asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl; + traceflt(instr); } //7 uOPs @@ -411,12 +468,14 @@ namespace RandomX { asmCode << "\tandps xmm12, xmm14" << std::endl; asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl; asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl; + traceflt(instr); } //1 uOP void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) { instr.dst %= 4; asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl; + traceflt(instr); } //6 uOPs @@ -429,6 +488,7 @@ namespace RandomX { asmCode << "\tor eax, 40896" << std::endl; asmCode << "\tmov dword ptr [rsp-8], eax" << std::endl; asmCode << "\tldmxcsr dword ptr [rsp-8]" << std::endl; + tracenop(instr); } static inline const char* condition(Instruction& instr) { @@ -461,6 +521,7 @@ namespace RandomX { asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.imm32 << std::endl; asmCode << "\tset" << condition(instr) << " cl" << std::endl; asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; + traceint(instr); } //6 uOPs @@ -470,22 +531,26 @@ namespace RandomX { asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.imm32 << std::endl; asmCode << "\tset" << condition(instr) << " cl" << std::endl; asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; + traceint(instr); } //3 uOPs void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) { genAddressRegDst(instr); asmCode << "\tmov qword ptr [rsi+rax], " << regR[instr.src] << std::endl; + tracenop(instr); } //3 uOPs void AssemblyGeneratorX86::h_FSTORE(Instruction& instr, int i) { genAddressRegDst(instr, 16); asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl; + tracenop(instr); } void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) { asmCode << "\tnop" << std::endl; + tracenop(instr); } #include "instructionWeights.hpp" diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index affd65c..9968ebe 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -45,6 +45,10 @@ namespace RandomX { void generateCode(Instruction&, int); + void traceint(Instruction&); + void traceflt(Instruction&); + void tracenop(Instruction&); + void h_IADD_R(Instruction&, int); void h_IADD_M(Instruction&, int); void h_IADD_RC(Instruction&, int); diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 455c089..50347f1 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -82,6 +82,31 @@ namespace RandomX { void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { } + static void print(int_reg_t r) { + std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl; + } + + static void print(__m128d f) { + uint64_t lo = *(((uint64_t*)&f) + 0); + uint64_t hi = *(((uint64_t*)&f) + 1); + std::cout << std::hex << std::setw(16) << std::setfill('0') << hi << '-' << std::hex << std::setw(16) << std::setfill('0') << lo << std::endl; + } + + static void printState(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { + for (int i = 0; i < 8; ++i) { + std::cout << "r" << i << " = "; print(r[i]); + } + for (int i = 0; i < 4; ++i) { + std::cout << "f" << i << " = "; print(f[i]); + } + for (int i = 0; i < 4; ++i) { + std::cout << "e" << i << " = "; print(e[i]); + } + for (int i = 0; i < 4; ++i) { + std::cout << "a" << i << " = "; print(a[i]); + } + } + FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { auto& ibc = byteCode[i]; switch (ibc.type) @@ -107,7 +132,7 @@ namespace RandomX { } break; case InstructionType::IMUL_9C: { - *ibc.idst += 9 * *ibc.idst + ibc.imm; + *ibc.idst += 8 * *ibc.idst + ibc.imm; } break; case InstructionType::IMUL_R: { @@ -210,7 +235,7 @@ namespace RandomX { } break; case InstructionType::FDIV_M: { - __m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask)); + __m128d fsrc = _mm_abs(load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask))); __m128d fdst = _mm_div_pd(*ibc.fdst, fsrc); *ibc.fdst = _mm_max_pd(fdst, _mm_set_pd(DBL_MIN, DBL_MIN)); } break; @@ -220,11 +245,11 @@ namespace RandomX { } break; case InstructionType::COND_R: { - *ibc.idst += condition(*ibc.isrc, ibc.imm, ibc.condition) ? 1 : 0; + *ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0; } break; case InstructionType::COND_M: { - *ibc.idst += condition(load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm, ibc.condition) ? 1 : 0; + *ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0; } break; case InstructionType::CFROUND: { @@ -242,6 +267,13 @@ namespace RandomX { default: UNREACHABLE; } + if (trace) { + //std::cout << program(i); + if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32) + print(*ibc.idst); + else //if(ibc.type >= 20 && ibc.type <= 30) + print(0); + } } void InterpretedVirtualMachine::execute() { @@ -260,10 +292,20 @@ namespace RandomX { uint32_t spAddr0 = mem.mx; uint32_t spAddr1 = mem.ma; + if (trace) { + std::cout << "execute (reg: r" << readReg0 << ", r" << readReg1 << ", r" << readReg2 << ", r" << readReg3 << ")" << std::endl; + std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; + std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; + printState(r, f, e, a); + } + for(unsigned iter = 0; iter < InstructionCount; ++iter) { //std::cout << "Iteration " << iter << std::endl; - spAddr0 ^= r[readReg0]; + uint64_t spMix = r[readReg0] ^ r[readReg1]; + spAddr0 ^= spMix; spAddr0 &= ScratchpadL3Mask64; + spAddr1 ^= spMix >> 32; + spAddr1 &= ScratchpadL3Mask64; r[0] ^= load64(scratchpad + spAddr0 + 0); r[1] ^= load64(scratchpad + spAddr0 + 8); @@ -274,9 +316,6 @@ namespace RandomX { r[6] ^= load64(scratchpad + spAddr0 + 48); r[7] ^= load64(scratchpad + spAddr0 + 56); - spAddr1 ^= r[readReg1]; - spAddr1 &= ScratchpadL3Mask64; - f[0] = load_cvt_i32x2(scratchpad + spAddr1 + 0); f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8); f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16); @@ -286,6 +325,14 @@ namespace RandomX { e[2] = _mm_abs(load_cvt_i32x2(scratchpad + spAddr1 + 48)); e[3] = _mm_abs(load_cvt_i32x2(scratchpad + spAddr1 + 56)); + if (trace) { + std::cout << "iteration " << std::dec << iter << std::endl; + std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; + std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; + printState(r, f, e, a); + std::cout << "-----------------------------------" << std::endl; + } + executeBytecode<0>(r, f, e, a); if (asyncWorker) { @@ -309,6 +356,14 @@ namespace RandomX { std::swap(mem.mx, mem.ma); } + if (trace) { + std::cout << "iteration " << std::dec << iter << std::endl; + std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; + std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; + printState(r, f, e, a); + std::cout << "===================================" << std::endl; + } + store64(scratchpad + spAddr1 + 0, r[0]); store64(scratchpad + spAddr1 + 8, r[1]); store64(scratchpad + spAddr1 + 16, r[2]); @@ -318,10 +373,15 @@ namespace RandomX { store64(scratchpad + spAddr1 + 48, r[6]); store64(scratchpad + spAddr1 + 56, r[7]); - _mm_store_pd((double*)(scratchpad + spAddr0 + 0), _mm_mul_pd(f[0], e[0])); - _mm_store_pd((double*)(scratchpad + spAddr0 + 16), _mm_mul_pd(f[1], e[1])); - _mm_store_pd((double*)(scratchpad + spAddr0 + 32), _mm_mul_pd(f[2], e[2])); - _mm_store_pd((double*)(scratchpad + spAddr0 + 48), _mm_mul_pd(f[3], e[3])); + f[0] = _mm_mul_pd(f[0], e[0]); + f[1] = _mm_mul_pd(f[1], e[1]); + f[2] = _mm_mul_pd(f[2], e[2]); + f[3] = _mm_mul_pd(f[3], e[3]); + + _mm_store_pd((double*)(scratchpad + spAddr0 + 0), f[0]); + _mm_store_pd((double*)(scratchpad + spAddr0 + 16), f[1]); + _mm_store_pd((double*)(scratchpad + spAddr0 + 32), f[2]); + _mm_store_pd((double*)(scratchpad + spAddr0 + 48), f[3]); spAddr0 = 0; spAddr1 = 0; @@ -719,6 +779,7 @@ namespace RandomX { ibc.type = InstructionType::ISTORE; ibc.idst = &r[dst]; ibc.isrc = &r[src]; + ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); } break; CASE_REP(FSTORE) { diff --git a/src/executeProgram-win64.asm b/src/executeProgram-win64.asm index b3528f2..d7d6f87 100644 --- a/src/executeProgram-win64.asm +++ b/src/executeProgram-win64.asm @@ -152,7 +152,7 @@ program_begin: ;# 256 instructions include program.inc - +IF 1 mov eax, r12d ;# read address register 1 xor eax, r15d ;# read address register 2 xor rbp, rax ;# modify "mx" @@ -189,6 +189,26 @@ program_begin: movapd xmmword ptr [rcx+16], xmm1 movapd xmmword ptr [rcx+32], xmm2 movapd xmmword ptr [rcx+48], xmm3 +else + ; memcpy trace from stack to scratchpad + mov rax, rsi + mov rdx, rdi + + cld + mov rsi, rsp + mov rdi, rax + mov rcx, 1024 + + rep movsq + + add rsp, 8192 + + pop rcx + pop rcx + + mov rsi, rax + mov rdi, rdx +endif sub ebx, 1 jnz program_begin diff --git a/src/main.cpp b/src/main.cpp index 57bafe7..a09c175 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -183,6 +183,12 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, AtomicHash fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); vm->initialize(); vm->execute(); + /*if (RandomX::trace) { + for (int j = 0; j < RandomX::ProgramLength; ++j) { + uint64_t res = *(uint64_t*)(scratchpad + 8 * (RandomX::ProgramLength - 1 - j)); + std::cout << std::hex << std::setw(16) << std::setfill('0') << res << std::endl; + } + }*/ vm->getResult(scratchpad, RandomX::ScratchpadSize, hash); result.xorWith(hash); if (RandomX::trace) {