Fixed JIT compiler not producing the same code as genAsm and genNative

This commit is contained in:
tevador 2019-02-15 16:43:52 +01:00
parent f0d52fcf4d
commit a145caa185
3 changed files with 50 additions and 39 deletions

View file

@ -130,13 +130,13 @@ namespace RandomX {
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
static const uint8_t ADD_R_RAX[] = { 0x49, 0x01 };
static const uint8_t XOR_EAX_EAX[] = { 0x31, 0xC0 };
static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x01, 0xC2 };
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
@ -272,7 +272,7 @@ namespace RandomX {
}
void JitCompilerX86::genSIB(int scale, int index, int base) {
emitByte((scale << 5) | (index << 3) | base);
emitByte((scale << 6) | (index << 3) | base);
}
void JitCompilerX86::h_IADD_RC(Instruction& instr) {
@ -290,7 +290,7 @@ namespace RandomX {
else {
emit(REX_81);
emitByte(0xe8 + instr.dst);
genAddressImm(instr);
emit32(instr.imm32);
}
}
@ -311,7 +311,7 @@ namespace RandomX {
void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
emit(REX_LEA);
emitByte(0x84 + 8 * instr.dst);
genSIB(3, instr.src, instr.dst);
genSIB(3, instr.dst, instr.dst);
emit32(instr.imm32);
}
@ -323,7 +323,7 @@ namespace RandomX {
else {
emit(REX_IMUL_RRI);
emitByte(0xc0 + 9 * instr.dst);
genAddressImm(instr);
emit32(instr.imm32);
}
}
@ -424,7 +424,7 @@ namespace RandomX {
emit(REX_SHR_RDX);
emitByte(mi.post_shift);
}
emit(REX_ADD_RR);
emit(REX_ADD_RM);
emitByte(0xc2 + 8 * instr.dst);
}
else { //divisor is a power of two
@ -440,7 +440,7 @@ namespace RandomX {
}
void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
int64_t divisor = instr.imm32;
int64_t divisor = (int32_t)instr.imm32;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
@ -493,7 +493,7 @@ namespace RandomX {
emit(TEST_RDX_RDX);
emit(SETS_AL_ADD_RDX_RAX);
emit(ADD_R_RAX);
emitByte(0xd0 + instr.dst);
emitByte(0xc2 + 8 * instr.dst);
}
}
@ -559,7 +559,7 @@ namespace RandomX {
void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
if (instr.src != instr.dst) {
emit(REX_XCHG);
emitByte(0xc0 + instr.dst + 8 * instr.src);
emitByte(0xc0 + instr.src + 8 * instr.dst);
}
}

View file

@ -78,13 +78,14 @@ executeProgram PROC
movdqu xmmword ptr [rsp+16], xmm14
movdqu xmmword ptr [rsp+0], xmm15
; function arguments
push rcx ; RegisterFile& registerFile
mov rbp, qword ptr [rdx] ; "mx", "ma"
mov eax, ebp ; "mx"
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
mov rsi, r8 ; convertible_t* scratchpad
mov rbx, r9 ; loop counter
;# function arguments
push rcx ;# RegisterFile& registerFile
mov rbp, qword ptr [rdx] ;# "mx", "ma"
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
mov rsi, r8 ;# uint8_t* scratchpad
mov rbx, r9 ;# loop counter
mov rax, rbp
;# zero integer registers
xor r8, r8
@ -114,16 +115,16 @@ minDbl:
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
ALIGN 64
program_begin:
xor rax, r8 ;# read address register 1
xor rax, r9
xor rax, r10
mov rdx, rax
and eax, 1048512
push rax
and eax, 2097088
lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
@ -133,9 +134,9 @@ program_begin:
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
ror rdx, 32
and edx, 1048512
push rdx
and edx, 2097088
lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
@ -152,9 +153,10 @@ program_begin:
;# 256 instructions
include program.inc
mov eax, r8d ;# read address register 1
xor eax, r9d ;# read address register 2
mov eax, r12d ;# read address register 1
xor eax, r15d ;# read address register 2
xor rbp, rax ;# modify "mx"
xor eax, eax
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rdi+rdx]
@ -169,8 +171,7 @@ program_begin:
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
pop rax
lea rcx, [rsi+rax]
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
@ -179,8 +180,7 @@ program_begin:
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rax
lea rcx, [rsi+rax]
pop rcx
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
@ -189,8 +189,7 @@ program_begin:
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3
xor eax, eax
dec ebx
sub ebx, 1
jnz program_begin
rx_finish:

View file

@ -123,29 +123,35 @@ void printUsage(const char* executable) {
std::cout << " --genNative generate RandomX code for nonce N" << std::endl;
}
template<bool softAes>
void generateAsm(int nonce) {
uint64_t hash[8];
alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
int* noncePtr = (int*)(blockTemplate + 39);
*noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
uint8_t scratchpad[RandomX::ScratchpadSize];
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
RandomX::AssemblyGeneratorX86 asmX86;
RandomX::Program p;
fillAes1Rx4<false>(hash, sizeof(p), &p);
fillAes1Rx4<softAes>(hash, sizeof(p), &p);
asmX86.generateProgram(p);
asmX86.printCode(std::cout);
}
template<bool softAes>
void generateNative(int nonce) {
uint64_t hash[4];
alignas(16) uint64_t hash[8];
uint8_t blockTemplate[sizeof(blockTemplate__)];
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
int* noncePtr = (int*)(blockTemplate + 39);
*noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
uint8_t scratchpad[RandomX::ScratchpadSize];
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
alignas(16) RandomX::Program prog;
fillAes1Rx4<false>((void*)hash, sizeof(prog), &prog);
fillAes1Rx4<softAes>((void*)hash, sizeof(prog), &prog);
for (int i = 0; i < RandomX::ProgramLength; ++i) {
prog(i).dst %= 8;
prog(i).src %= 8;
@ -181,7 +187,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
result.xorWith(hash);
if (RandomX::trace) {
std::cout << "Nonce: " << nonce << " ";
outputHex(std::cout, (char*)hash, sizeof(hash));
outputHex(std::cout, (char*)hash, 16);
std::cout << std::endl;
}
nonce = atomicNonce.fetch_add(1);
@ -208,12 +214,18 @@ int main(int argc, char** argv) {
readOption("--genNative", argc, argv, genNative);
if (genAsm) {
generateAsm(programCount);
if (softAes)
generateAsm<true>(programCount);
else
generateAsm<false>(programCount);
return 0;
}
if (genNative) {
generateNative(programCount);
if (softAes)
generateNative<true>(programCount);
else
generateNative<false>(programCount);
return 0;
}