Fixed JIT compiler not producing the same code as genAsm and genNative

This commit is contained in:
tevador 2019-02-15 16:43:52 +01:00
parent f0d52fcf4d
commit a145caa185
3 changed files with 50 additions and 39 deletions

View file

@ -130,13 +130,13 @@ namespace RandomX {
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 }; static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 }; static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 }; static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
static const uint8_t ADD_R_RAX[] = { 0x49, 0x01 }; static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
static const uint8_t XOR_EAX_EAX[] = { 0x31, 0xC0 }; static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 }; static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 }; static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA }; static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 }; static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x01, 0xC2 }; static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
static const uint8_t REX_NEG[] = { 0x49, 0xF7 }; static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 }; static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 }; static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
@ -272,7 +272,7 @@ namespace RandomX {
} }
void JitCompilerX86::genSIB(int scale, int index, int base) { void JitCompilerX86::genSIB(int scale, int index, int base) {
emitByte((scale << 5) | (index << 3) | base); emitByte((scale << 6) | (index << 3) | base);
} }
void JitCompilerX86::h_IADD_RC(Instruction& instr) { void JitCompilerX86::h_IADD_RC(Instruction& instr) {
@ -290,7 +290,7 @@ namespace RandomX {
else { else {
emit(REX_81); emit(REX_81);
emitByte(0xe8 + instr.dst); emitByte(0xe8 + instr.dst);
genAddressImm(instr); emit32(instr.imm32);
} }
} }
@ -311,7 +311,7 @@ namespace RandomX {
void JitCompilerX86::h_IMUL_9C(Instruction& instr) { void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
emit(REX_LEA); emit(REX_LEA);
emitByte(0x84 + 8 * instr.dst); emitByte(0x84 + 8 * instr.dst);
genSIB(3, instr.src, instr.dst); genSIB(3, instr.dst, instr.dst);
emit32(instr.imm32); emit32(instr.imm32);
} }
@ -323,7 +323,7 @@ namespace RandomX {
else { else {
emit(REX_IMUL_RRI); emit(REX_IMUL_RRI);
emitByte(0xc0 + 9 * instr.dst); emitByte(0xc0 + 9 * instr.dst);
genAddressImm(instr); emit32(instr.imm32);
} }
} }
@ -424,7 +424,7 @@ namespace RandomX {
emit(REX_SHR_RDX); emit(REX_SHR_RDX);
emitByte(mi.post_shift); emitByte(mi.post_shift);
} }
emit(REX_ADD_RR); emit(REX_ADD_RM);
emitByte(0xc2 + 8 * instr.dst); emitByte(0xc2 + 8 * instr.dst);
} }
else { //divisor is a power of two else { //divisor is a power of two
@ -440,7 +440,7 @@ namespace RandomX {
} }
void JitCompilerX86::h_ISDIV_C(Instruction& instr) { void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
int64_t divisor = instr.imm32; int64_t divisor = (int32_t)instr.imm32;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) { if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
emit(REX_MOV_RR64); emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst); emitByte(0xc0 + instr.dst);
@ -493,7 +493,7 @@ namespace RandomX {
emit(TEST_RDX_RDX); emit(TEST_RDX_RDX);
emit(SETS_AL_ADD_RDX_RAX); emit(SETS_AL_ADD_RDX_RAX);
emit(ADD_R_RAX); emit(ADD_R_RAX);
emitByte(0xd0 + instr.dst); emitByte(0xc2 + 8 * instr.dst);
} }
} }
@ -559,7 +559,7 @@ namespace RandomX {
void JitCompilerX86::h_ISWAP_R(Instruction& instr) { void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
if (instr.src != instr.dst) { if (instr.src != instr.dst) {
emit(REX_XCHG); emit(REX_XCHG);
emitByte(0xc0 + instr.dst + 8 * instr.src); emitByte(0xc0 + instr.src + 8 * instr.dst);
} }
} }

View file

@ -78,13 +78,14 @@ executeProgram PROC
movdqu xmmword ptr [rsp+16], xmm14 movdqu xmmword ptr [rsp+16], xmm14
movdqu xmmword ptr [rsp+0], xmm15 movdqu xmmword ptr [rsp+0], xmm15
; function arguments ;# function arguments
push rcx ; RegisterFile& registerFile push rcx ;# RegisterFile& registerFile
mov rbp, qword ptr [rdx] ; "mx", "ma" mov rbp, qword ptr [rdx] ;# "mx", "ma"
mov eax, ebp ; "mx" mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset mov rsi, r8 ;# uint8_t* scratchpad
mov rsi, r8 ; convertible_t* scratchpad mov rbx, r9 ;# loop counter
mov rbx, r9 ; loop counter
mov rax, rbp
;# zero integer registers ;# zero integer registers
xor r8, r8 xor r8, r8
@ -114,16 +115,16 @@ minDbl:
absMask: absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127 db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask: signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128 db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
ALIGN 64 ALIGN 64
program_begin: program_begin:
xor rax, r8 ;# read address register 1 xor rax, r8 ;# read address register 1
xor rax, r9 xor rax, r10
mov rdx, rax mov rdx, rax
and eax, 1048512 and eax, 2097088
push rax
lea rcx, [rsi+rax] lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0] xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8] xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16] xor r10, qword ptr [rcx+16]
@ -133,9 +134,9 @@ program_begin:
xor r14, qword ptr [rcx+48] xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56] xor r15, qword ptr [rcx+56]
ror rdx, 32 ror rdx, 32
and edx, 1048512 and edx, 2097088
push rdx
lea rcx, [rsi+rdx] lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0] cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8] cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16] cvtdq2pd xmm2, qword ptr [rcx+16]
@ -152,9 +153,10 @@ program_begin:
;# 256 instructions ;# 256 instructions
include program.inc include program.inc
mov eax, r8d ;# read address register 1 mov eax, r12d ;# read address register 1
xor eax, r9d ;# read address register 2 xor eax, r15d ;# read address register 2
xor rbp, rax ;# modify "mx" xor rbp, rax ;# modify "mx"
xor eax, eax
and rbp, -64 ;# align "mx" to the start of a cache line and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rdi+rdx] prefetchnta byte ptr [rdi+rdx]
@ -169,8 +171,7 @@ program_begin:
xor r13, qword ptr [rcx+40] xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48] xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56] xor r15, qword ptr [rcx+56]
pop rax pop rcx
lea rcx, [rsi+rax]
mov qword ptr [rcx+0], r8 mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9 mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10 mov qword ptr [rcx+16], r10
@ -179,8 +180,7 @@ program_begin:
mov qword ptr [rcx+40], r13 mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14 mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15 mov qword ptr [rcx+56], r15
pop rax pop rcx
lea rcx, [rsi+rax]
mulpd xmm0, xmm4 mulpd xmm0, xmm4
mulpd xmm1, xmm5 mulpd xmm1, xmm5
mulpd xmm2, xmm6 mulpd xmm2, xmm6
@ -189,8 +189,7 @@ program_begin:
movapd xmmword ptr [rcx+16], xmm1 movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2 movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3 movapd xmmword ptr [rcx+48], xmm3
xor eax, eax sub ebx, 1
dec ebx
jnz program_begin jnz program_begin
rx_finish: rx_finish:

View file

@ -123,29 +123,35 @@ void printUsage(const char* executable) {
std::cout << " --genNative generate RandomX code for nonce N" << std::endl; std::cout << " --genNative generate RandomX code for nonce N" << std::endl;
} }
template<bool softAes>
void generateAsm(int nonce) { void generateAsm(int nonce) {
uint64_t hash[8]; alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)]; uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
int* noncePtr = (int*)(blockTemplate + 39); int* noncePtr = (int*)(blockTemplate + 39);
*noncePtr = nonce; *noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
uint8_t scratchpad[RandomX::ScratchpadSize];
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
RandomX::AssemblyGeneratorX86 asmX86; RandomX::AssemblyGeneratorX86 asmX86;
RandomX::Program p; RandomX::Program p;
fillAes1Rx4<false>(hash, sizeof(p), &p); fillAes1Rx4<softAes>(hash, sizeof(p), &p);
asmX86.generateProgram(p); asmX86.generateProgram(p);
asmX86.printCode(std::cout); asmX86.printCode(std::cout);
} }
template<bool softAes>
void generateNative(int nonce) { void generateNative(int nonce) {
uint64_t hash[4]; alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)]; uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
int* noncePtr = (int*)(blockTemplate + 39); int* noncePtr = (int*)(blockTemplate + 39);
*noncePtr = nonce; *noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
uint8_t scratchpad[RandomX::ScratchpadSize];
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
alignas(16) RandomX::Program prog; alignas(16) RandomX::Program prog;
fillAes1Rx4<false>((void*)hash, sizeof(prog), &prog); fillAes1Rx4<softAes>((void*)hash, sizeof(prog), &prog);
for (int i = 0; i < RandomX::ProgramLength; ++i) { for (int i = 0; i < RandomX::ProgramLength; ++i) {
prog(i).dst %= 8; prog(i).dst %= 8;
prog(i).src %= 8; prog(i).src %= 8;
@ -181,7 +187,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
result.xorWith(hash); result.xorWith(hash);
if (RandomX::trace) { if (RandomX::trace) {
std::cout << "Nonce: " << nonce << " "; std::cout << "Nonce: " << nonce << " ";
outputHex(std::cout, (char*)hash, sizeof(hash)); outputHex(std::cout, (char*)hash, 16);
std::cout << std::endl; std::cout << std::endl;
} }
nonce = atomicNonce.fetch_add(1); nonce = atomicNonce.fetch_add(1);
@ -208,12 +214,18 @@ int main(int argc, char** argv) {
readOption("--genNative", argc, argv, genNative); readOption("--genNative", argc, argv, genNative);
if (genAsm) { if (genAsm) {
generateAsm(programCount); if (softAes)
generateAsm<true>(programCount);
else
generateAsm<false>(programCount);
return 0; return 0;
} }
if (genNative) { if (genNative) {
generateNative(programCount); if (softAes)
generateNative<true>(programCount);
else
generateNative<false>(programCount);
return 0; return 0;
} }