mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Fixed JIT compiler not producing the same code as genAsm and genNative
This commit is contained in:
parent
f0d52fcf4d
commit
a145caa185
3 changed files with 50 additions and 39 deletions
|
@ -130,13 +130,13 @@ namespace RandomX {
|
||||||
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
|
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
|
||||||
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
|
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
|
||||||
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
|
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
|
||||||
static const uint8_t ADD_R_RAX[] = { 0x49, 0x01 };
|
static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
|
||||||
static const uint8_t XOR_EAX_EAX[] = { 0x31, 0xC0 };
|
static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
|
||||||
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
|
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
|
||||||
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
|
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
|
||||||
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
|
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
|
||||||
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
|
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
|
||||||
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x01, 0xC2 };
|
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
|
||||||
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
|
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
|
||||||
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
|
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
|
||||||
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
|
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
|
||||||
|
@ -272,7 +272,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genSIB(int scale, int index, int base) {
|
void JitCompilerX86::genSIB(int scale, int index, int base) {
|
||||||
emitByte((scale << 5) | (index << 3) | base);
|
emitByte((scale << 6) | (index << 3) | base);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IADD_RC(Instruction& instr) {
|
void JitCompilerX86::h_IADD_RC(Instruction& instr) {
|
||||||
|
@ -290,7 +290,7 @@ namespace RandomX {
|
||||||
else {
|
else {
|
||||||
emit(REX_81);
|
emit(REX_81);
|
||||||
emitByte(0xe8 + instr.dst);
|
emitByte(0xe8 + instr.dst);
|
||||||
genAddressImm(instr);
|
emit32(instr.imm32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,7 +311,7 @@ namespace RandomX {
|
||||||
void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
|
void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
|
||||||
emit(REX_LEA);
|
emit(REX_LEA);
|
||||||
emitByte(0x84 + 8 * instr.dst);
|
emitByte(0x84 + 8 * instr.dst);
|
||||||
genSIB(3, instr.src, instr.dst);
|
genSIB(3, instr.dst, instr.dst);
|
||||||
emit32(instr.imm32);
|
emit32(instr.imm32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,7 +323,7 @@ namespace RandomX {
|
||||||
else {
|
else {
|
||||||
emit(REX_IMUL_RRI);
|
emit(REX_IMUL_RRI);
|
||||||
emitByte(0xc0 + 9 * instr.dst);
|
emitByte(0xc0 + 9 * instr.dst);
|
||||||
genAddressImm(instr);
|
emit32(instr.imm32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -424,7 +424,7 @@ namespace RandomX {
|
||||||
emit(REX_SHR_RDX);
|
emit(REX_SHR_RDX);
|
||||||
emitByte(mi.post_shift);
|
emitByte(mi.post_shift);
|
||||||
}
|
}
|
||||||
emit(REX_ADD_RR);
|
emit(REX_ADD_RM);
|
||||||
emitByte(0xc2 + 8 * instr.dst);
|
emitByte(0xc2 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
else { //divisor is a power of two
|
else { //divisor is a power of two
|
||||||
|
@ -440,7 +440,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
|
void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
|
||||||
int64_t divisor = instr.imm32;
|
int64_t divisor = (int32_t)instr.imm32;
|
||||||
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||||
emit(REX_MOV_RR64);
|
emit(REX_MOV_RR64);
|
||||||
emitByte(0xc0 + instr.dst);
|
emitByte(0xc0 + instr.dst);
|
||||||
|
@ -493,7 +493,7 @@ namespace RandomX {
|
||||||
emit(TEST_RDX_RDX);
|
emit(TEST_RDX_RDX);
|
||||||
emit(SETS_AL_ADD_RDX_RAX);
|
emit(SETS_AL_ADD_RDX_RAX);
|
||||||
emit(ADD_R_RAX);
|
emit(ADD_R_RAX);
|
||||||
emitByte(0xd0 + instr.dst);
|
emitByte(0xc2 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -559,7 +559,7 @@ namespace RandomX {
|
||||||
void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
|
void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
|
||||||
if (instr.src != instr.dst) {
|
if (instr.src != instr.dst) {
|
||||||
emit(REX_XCHG);
|
emit(REX_XCHG);
|
||||||
emitByte(0xc0 + instr.dst + 8 * instr.src);
|
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,13 +78,14 @@ executeProgram PROC
|
||||||
movdqu xmmword ptr [rsp+16], xmm14
|
movdqu xmmword ptr [rsp+16], xmm14
|
||||||
movdqu xmmword ptr [rsp+0], xmm15
|
movdqu xmmword ptr [rsp+0], xmm15
|
||||||
|
|
||||||
; function arguments
|
;# function arguments
|
||||||
push rcx ; RegisterFile& registerFile
|
push rcx ;# RegisterFile& registerFile
|
||||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||||
mov eax, ebp ; "mx"
|
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||||
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
mov rsi, r8 ;# uint8_t* scratchpad
|
||||||
mov rsi, r8 ; convertible_t* scratchpad
|
mov rbx, r9 ;# loop counter
|
||||||
mov rbx, r9 ; loop counter
|
|
||||||
|
mov rax, rbp
|
||||||
|
|
||||||
;# zero integer registers
|
;# zero integer registers
|
||||||
xor r8, r8
|
xor r8, r8
|
||||||
|
@ -114,16 +115,16 @@ minDbl:
|
||||||
absMask:
|
absMask:
|
||||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||||
signMask:
|
signMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
program_begin:
|
program_begin:
|
||||||
xor rax, r8 ;# read address register 1
|
xor rax, r8 ;# read address register 1
|
||||||
xor rax, r9
|
xor rax, r10
|
||||||
mov rdx, rax
|
mov rdx, rax
|
||||||
and eax, 1048512
|
and eax, 2097088
|
||||||
push rax
|
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
|
push rcx
|
||||||
xor r8, qword ptr [rcx+0]
|
xor r8, qword ptr [rcx+0]
|
||||||
xor r9, qword ptr [rcx+8]
|
xor r9, qword ptr [rcx+8]
|
||||||
xor r10, qword ptr [rcx+16]
|
xor r10, qword ptr [rcx+16]
|
||||||
|
@ -133,9 +134,9 @@ program_begin:
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
ror rdx, 32
|
ror rdx, 32
|
||||||
and edx, 1048512
|
and edx, 2097088
|
||||||
push rdx
|
|
||||||
lea rcx, [rsi+rdx]
|
lea rcx, [rsi+rdx]
|
||||||
|
push rcx
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||||
|
@ -152,9 +153,10 @@ program_begin:
|
||||||
;# 256 instructions
|
;# 256 instructions
|
||||||
include program.inc
|
include program.inc
|
||||||
|
|
||||||
mov eax, r8d ;# read address register 1
|
mov eax, r12d ;# read address register 1
|
||||||
xor eax, r9d ;# read address register 2
|
xor eax, r15d ;# read address register 2
|
||||||
xor rbp, rax ;# modify "mx"
|
xor rbp, rax ;# modify "mx"
|
||||||
|
xor eax, eax
|
||||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||||
mov edx, ebp ;# edx = mx
|
mov edx, ebp ;# edx = mx
|
||||||
prefetchnta byte ptr [rdi+rdx]
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
|
@ -169,8 +171,7 @@ program_begin:
|
||||||
xor r13, qword ptr [rcx+40]
|
xor r13, qword ptr [rcx+40]
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
pop rax
|
pop rcx
|
||||||
lea rcx, [rsi+rax]
|
|
||||||
mov qword ptr [rcx+0], r8
|
mov qword ptr [rcx+0], r8
|
||||||
mov qword ptr [rcx+8], r9
|
mov qword ptr [rcx+8], r9
|
||||||
mov qword ptr [rcx+16], r10
|
mov qword ptr [rcx+16], r10
|
||||||
|
@ -179,8 +180,7 @@ program_begin:
|
||||||
mov qword ptr [rcx+40], r13
|
mov qword ptr [rcx+40], r13
|
||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
pop rax
|
pop rcx
|
||||||
lea rcx, [rsi+rax]
|
|
||||||
mulpd xmm0, xmm4
|
mulpd xmm0, xmm4
|
||||||
mulpd xmm1, xmm5
|
mulpd xmm1, xmm5
|
||||||
mulpd xmm2, xmm6
|
mulpd xmm2, xmm6
|
||||||
|
@ -189,8 +189,7 @@ program_begin:
|
||||||
movapd xmmword ptr [rcx+16], xmm1
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
movapd xmmword ptr [rcx+32], xmm2
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
movapd xmmword ptr [rcx+48], xmm3
|
movapd xmmword ptr [rcx+48], xmm3
|
||||||
xor eax, eax
|
sub ebx, 1
|
||||||
dec ebx
|
|
||||||
jnz program_begin
|
jnz program_begin
|
||||||
|
|
||||||
rx_finish:
|
rx_finish:
|
||||||
|
|
26
src/main.cpp
26
src/main.cpp
|
@ -123,29 +123,35 @@ void printUsage(const char* executable) {
|
||||||
std::cout << " --genNative generate RandomX code for nonce N" << std::endl;
|
std::cout << " --genNative generate RandomX code for nonce N" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
void generateAsm(int nonce) {
|
void generateAsm(int nonce) {
|
||||||
uint64_t hash[8];
|
alignas(16) uint64_t hash[8];
|
||||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||||
int* noncePtr = (int*)(blockTemplate + 39);
|
int* noncePtr = (int*)(blockTemplate + 39);
|
||||||
*noncePtr = nonce;
|
*noncePtr = nonce;
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
|
uint8_t scratchpad[RandomX::ScratchpadSize];
|
||||||
|
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
|
||||||
RandomX::AssemblyGeneratorX86 asmX86;
|
RandomX::AssemblyGeneratorX86 asmX86;
|
||||||
RandomX::Program p;
|
RandomX::Program p;
|
||||||
fillAes1Rx4<false>(hash, sizeof(p), &p);
|
fillAes1Rx4<softAes>(hash, sizeof(p), &p);
|
||||||
asmX86.generateProgram(p);
|
asmX86.generateProgram(p);
|
||||||
asmX86.printCode(std::cout);
|
asmX86.printCode(std::cout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
void generateNative(int nonce) {
|
void generateNative(int nonce) {
|
||||||
uint64_t hash[4];
|
alignas(16) uint64_t hash[8];
|
||||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||||
int* noncePtr = (int*)(blockTemplate + 39);
|
int* noncePtr = (int*)(blockTemplate + 39);
|
||||||
*noncePtr = nonce;
|
*noncePtr = nonce;
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
|
uint8_t scratchpad[RandomX::ScratchpadSize];
|
||||||
|
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
|
||||||
alignas(16) RandomX::Program prog;
|
alignas(16) RandomX::Program prog;
|
||||||
fillAes1Rx4<false>((void*)hash, sizeof(prog), &prog);
|
fillAes1Rx4<softAes>((void*)hash, sizeof(prog), &prog);
|
||||||
for (int i = 0; i < RandomX::ProgramLength; ++i) {
|
for (int i = 0; i < RandomX::ProgramLength; ++i) {
|
||||||
prog(i).dst %= 8;
|
prog(i).dst %= 8;
|
||||||
prog(i).src %= 8;
|
prog(i).src %= 8;
|
||||||
|
@ -181,7 +187,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||||
result.xorWith(hash);
|
result.xorWith(hash);
|
||||||
if (RandomX::trace) {
|
if (RandomX::trace) {
|
||||||
std::cout << "Nonce: " << nonce << " ";
|
std::cout << "Nonce: " << nonce << " ";
|
||||||
outputHex(std::cout, (char*)hash, sizeof(hash));
|
outputHex(std::cout, (char*)hash, 16);
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
nonce = atomicNonce.fetch_add(1);
|
nonce = atomicNonce.fetch_add(1);
|
||||||
|
@ -208,12 +214,18 @@ int main(int argc, char** argv) {
|
||||||
readOption("--genNative", argc, argv, genNative);
|
readOption("--genNative", argc, argv, genNative);
|
||||||
|
|
||||||
if (genAsm) {
|
if (genAsm) {
|
||||||
generateAsm(programCount);
|
if (softAes)
|
||||||
|
generateAsm<true>(programCount);
|
||||||
|
else
|
||||||
|
generateAsm<false>(programCount);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (genNative) {
|
if (genNative) {
|
||||||
generateNative(programCount);
|
if (softAes)
|
||||||
|
generateNative<true>(programCount);
|
||||||
|
else
|
||||||
|
generateNative<false>(programCount);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue