mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Fixed JIT compiler not producing the same code as genAsm and genNative
This commit is contained in:
parent
f0d52fcf4d
commit
a145caa185
3 changed files with 50 additions and 39 deletions
|
@ -130,13 +130,13 @@ namespace RandomX {
|
|||
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
|
||||
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
|
||||
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
|
||||
static const uint8_t ADD_R_RAX[] = { 0x49, 0x01 };
|
||||
static const uint8_t XOR_EAX_EAX[] = { 0x31, 0xC0 };
|
||||
static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
|
||||
static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
|
||||
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
|
||||
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
|
||||
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
|
||||
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
|
||||
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x01, 0xC2 };
|
||||
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
|
||||
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
|
||||
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
|
||||
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
|
||||
|
@ -272,7 +272,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void JitCompilerX86::genSIB(int scale, int index, int base) {
|
||||
emitByte((scale << 5) | (index << 3) | base);
|
||||
emitByte((scale << 6) | (index << 3) | base);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_RC(Instruction& instr) {
|
||||
|
@ -290,7 +290,7 @@ namespace RandomX {
|
|||
else {
|
||||
emit(REX_81);
|
||||
emitByte(0xe8 + instr.dst);
|
||||
genAddressImm(instr);
|
||||
emit32(instr.imm32);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -311,7 +311,7 @@ namespace RandomX {
|
|||
void JitCompilerX86::h_IMUL_9C(Instruction& instr) {
|
||||
emit(REX_LEA);
|
||||
emitByte(0x84 + 8 * instr.dst);
|
||||
genSIB(3, instr.src, instr.dst);
|
||||
genSIB(3, instr.dst, instr.dst);
|
||||
emit32(instr.imm32);
|
||||
}
|
||||
|
||||
|
@ -323,7 +323,7 @@ namespace RandomX {
|
|||
else {
|
||||
emit(REX_IMUL_RRI);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
emit32(instr.imm32);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -424,7 +424,7 @@ namespace RandomX {
|
|||
emit(REX_SHR_RDX);
|
||||
emitByte(mi.post_shift);
|
||||
}
|
||||
emit(REX_ADD_RR);
|
||||
emit(REX_ADD_RM);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
else { //divisor is a power of two
|
||||
|
@ -440,7 +440,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void JitCompilerX86::h_ISDIV_C(Instruction& instr) {
|
||||
int64_t divisor = instr.imm32;
|
||||
int64_t divisor = (int32_t)instr.imm32;
|
||||
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
|
@ -493,7 +493,7 @@ namespace RandomX {
|
|||
emit(TEST_RDX_RDX);
|
||||
emit(SETS_AL_ADD_RDX_RAX);
|
||||
emit(ADD_R_RAX);
|
||||
emitByte(0xd0 + instr.dst);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -559,7 +559,7 @@ namespace RandomX {
|
|||
void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_XCHG);
|
||||
emitByte(0xc0 + instr.dst + 8 * instr.src);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -78,13 +78,14 @@ executeProgram PROC
|
|||
movdqu xmmword ptr [rsp+16], xmm14
|
||||
movdqu xmmword ptr [rsp+0], xmm15
|
||||
|
||||
; function arguments
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
||||
mov eax, ebp ; "mx"
|
||||
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
||||
mov rsi, r8 ; convertible_t* scratchpad
|
||||
mov rbx, r9 ; loop counter
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
mov rsi, r8 ;# uint8_t* scratchpad
|
||||
mov rbx, r9 ;# loop counter
|
||||
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
|
@ -114,16 +115,16 @@ minDbl:
|
|||
absMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||
signMask:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
||||
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
||||
|
||||
ALIGN 64
|
||||
program_begin:
|
||||
xor rax, r8 ;# read address register 1
|
||||
xor rax, r9
|
||||
xor rax, r10
|
||||
mov rdx, rax
|
||||
and eax, 1048512
|
||||
push rax
|
||||
and eax, 2097088
|
||||
lea rcx, [rsi+rax]
|
||||
push rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
|
@ -133,9 +134,9 @@ program_begin:
|
|||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
ror rdx, 32
|
||||
and edx, 1048512
|
||||
push rdx
|
||||
and edx, 2097088
|
||||
lea rcx, [rsi+rdx]
|
||||
push rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
|
@ -152,9 +153,10 @@ program_begin:
|
|||
;# 256 instructions
|
||||
include program.inc
|
||||
|
||||
mov eax, r8d ;# read address register 1
|
||||
xor eax, r9d ;# read address register 2
|
||||
mov eax, r12d ;# read address register 1
|
||||
xor eax, r15d ;# read address register 2
|
||||
xor rbp, rax ;# modify "mx"
|
||||
xor eax, eax
|
||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, ebp ;# edx = mx
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
|
@ -169,8 +171,7 @@ program_begin:
|
|||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
pop rax
|
||||
lea rcx, [rsi+rax]
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
|
@ -179,8 +180,7 @@ program_begin:
|
|||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
pop rax
|
||||
lea rcx, [rsi+rax]
|
||||
pop rcx
|
||||
mulpd xmm0, xmm4
|
||||
mulpd xmm1, xmm5
|
||||
mulpd xmm2, xmm6
|
||||
|
@ -189,8 +189,7 @@ program_begin:
|
|||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
xor eax, eax
|
||||
dec ebx
|
||||
sub ebx, 1
|
||||
jnz program_begin
|
||||
|
||||
rx_finish:
|
||||
|
|
26
src/main.cpp
26
src/main.cpp
|
@ -123,29 +123,35 @@ void printUsage(const char* executable) {
|
|||
std::cout << " --genNative generate RandomX code for nonce N" << std::endl;
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void generateAsm(int nonce) {
|
||||
uint64_t hash[8];
|
||||
alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||
int* noncePtr = (int*)(blockTemplate + 39);
|
||||
*noncePtr = nonce;
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
uint8_t scratchpad[RandomX::ScratchpadSize];
|
||||
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
|
||||
RandomX::AssemblyGeneratorX86 asmX86;
|
||||
RandomX::Program p;
|
||||
fillAes1Rx4<false>(hash, sizeof(p), &p);
|
||||
fillAes1Rx4<softAes>(hash, sizeof(p), &p);
|
||||
asmX86.generateProgram(p);
|
||||
asmX86.printCode(std::cout);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void generateNative(int nonce) {
|
||||
uint64_t hash[4];
|
||||
alignas(16) uint64_t hash[8];
|
||||
uint8_t blockTemplate[sizeof(blockTemplate__)];
|
||||
memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate));
|
||||
int* noncePtr = (int*)(blockTemplate + 39);
|
||||
*noncePtr = nonce;
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
uint8_t scratchpad[RandomX::ScratchpadSize];
|
||||
fillAes1Rx4<softAes>((void*)hash, RandomX::ScratchpadSize, scratchpad);
|
||||
alignas(16) RandomX::Program prog;
|
||||
fillAes1Rx4<false>((void*)hash, sizeof(prog), &prog);
|
||||
fillAes1Rx4<softAes>((void*)hash, sizeof(prog), &prog);
|
||||
for (int i = 0; i < RandomX::ProgramLength; ++i) {
|
||||
prog(i).dst %= 8;
|
||||
prog(i).src %= 8;
|
||||
|
@ -181,7 +187,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||
result.xorWith(hash);
|
||||
if (RandomX::trace) {
|
||||
std::cout << "Nonce: " << nonce << " ";
|
||||
outputHex(std::cout, (char*)hash, sizeof(hash));
|
||||
outputHex(std::cout, (char*)hash, 16);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
nonce = atomicNonce.fetch_add(1);
|
||||
|
@ -208,12 +214,18 @@ int main(int argc, char** argv) {
|
|||
readOption("--genNative", argc, argv, genNative);
|
||||
|
||||
if (genAsm) {
|
||||
generateAsm(programCount);
|
||||
if (softAes)
|
||||
generateAsm<true>(programCount);
|
||||
else
|
||||
generateAsm<false>(programCount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (genNative) {
|
||||
generateNative(programCount);
|
||||
if (softAes)
|
||||
generateNative<true>(programCount);
|
||||
else
|
||||
generateNative<false>(programCount);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue