mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Inlined calls for memory read
This commit is contained in:
parent
6519fed4d1
commit
2f6a599ff6
5 changed files with 3658 additions and 598 deletions
|
@ -59,37 +59,55 @@ namespace RandomX {
|
|||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genar(Instruction& instr) {
|
||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\ttest ebp, 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
asmCode << "\tcall rx_readint_l1" << std::endl;
|
||||
return;
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
asmCode << "\txor rdi, rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
break;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_readint_l2" << std::endl;
|
||||
return;
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
asmCode << "\txor rdi, rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
break;
|
||||
}
|
||||
asmCode << "\tmov rax, qword ptr [rsi+rcx*8]" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
void AssemblyGeneratorX86::genaf(Instruction& instr) {
|
||||
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\ttest ebp, 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
asmCode << "\tcall rx_readfloat_l1" << std::endl;
|
||||
return;
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
asmCode << "\txor rdi, rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
break;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_readfloat_l2" << std::endl;
|
||||
return;
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
asmCode << "\txor rdi, rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
break;
|
||||
}
|
||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi+rcx*8]" << std::endl;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
|
||||
|
@ -209,35 +227,35 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tadd rax, ";
|
||||
genbr1(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tadd eax, ";
|
||||
genbr132(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tsub rax, ";
|
||||
genbr1(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tsub eax, ";
|
||||
genbr132(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\timul rax, ";
|
||||
if ((instr.locb & 7) >= 6) {
|
||||
asmCode << "rax, ";
|
||||
|
@ -247,7 +265,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tmov rcx, ";
|
||||
genbr1(instr);
|
||||
asmCode << "\tmul rcx" << std::endl;
|
||||
|
@ -256,7 +274,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tmov ecx, eax" << std::endl;
|
||||
asmCode << "\tmov eax, ";
|
||||
genbr132(instr);
|
||||
|
@ -265,7 +283,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tmovsxd rcx, eax" << std::endl;
|
||||
if ((instr.locb & 7) >= 6) {
|
||||
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
|
||||
|
@ -278,7 +296,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tmov rcx, ";
|
||||
genbr1(instr);
|
||||
asmCode << "\timul rcx" << std::endl;
|
||||
|
@ -287,7 +305,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
if ((instr.locb & 7) >= 6) {
|
||||
if (instr.imm32 == 0) {
|
||||
asmCode << "\tmov ecx, 1" << std::endl;
|
||||
|
@ -308,7 +326,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tmov edx, ";
|
||||
genbr132(instr);
|
||||
asmCode << "\tcmp edx, -1" << std::endl;
|
||||
|
@ -329,91 +347,91 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tand rax, ";
|
||||
genbr1(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tand eax, ";
|
||||
genbr132(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tor rax, ";
|
||||
genbr1(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tor eax, ";
|
||||
genbr132(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\txor rax, ";
|
||||
genbr1(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\txor eax, ";
|
||||
genbr132(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
genbr0(instr, "shl");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
genbr0(instr, "shr");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
genbr0(instr, "sar");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
genbr0(instr, "rol");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
genbr0(instr, "ror");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
|
||||
genaf(instr);
|
||||
genaf(instr, i);
|
||||
genbf(instr, "addpd");
|
||||
gencf(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
|
||||
genaf(instr);
|
||||
genaf(instr, i);
|
||||
genbf(instr, "subpd");
|
||||
gencf(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
|
||||
genaf(instr);
|
||||
genaf(instr, i);
|
||||
genbf(instr, "mulpd");
|
||||
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
||||
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
||||
|
@ -422,7 +440,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
|
||||
genaf(instr);
|
||||
genaf(instr, i);
|
||||
genbf(instr, "divpd");
|
||||
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
|
||||
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
|
||||
|
@ -431,14 +449,14 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
|
||||
genaf(instr);
|
||||
genaf(instr, i);
|
||||
asmCode << "\tandps xmm0, xmm10" << std::endl;
|
||||
asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
|
||||
gencf(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
//asmCode << "\tmov rcx, rax" << std::endl;
|
||||
asmCode << "\tshl eax, 13" << std::endl;
|
||||
//asmCode << "\tand rcx, -2048" << std::endl;
|
||||
|
@ -472,7 +490,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
||||
asmCode << "\t" << jumpCondition(instr);
|
||||
asmCode << " short taken_call_" << i << std::endl;
|
||||
|
@ -487,7 +505,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genar(instr, i);
|
||||
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
|
||||
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
||||
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
||||
|
|
|
@ -38,8 +38,8 @@ namespace RandomX {
|
|||
static InstructionGenerator engine[256];
|
||||
std::stringstream asmCode;
|
||||
|
||||
void genar(Instruction&);
|
||||
void genaf(Instruction&);
|
||||
void genar(Instruction&, int);
|
||||
void genaf(Instruction&, int);
|
||||
void genbr0(Instruction&, const char*);
|
||||
void genbr1(Instruction&);
|
||||
void genbr132(Instruction&);
|
||||
|
|
|
@ -98,7 +98,7 @@ namespace RandomX {
|
|||
};
|
||||
|
||||
struct MemoryRegisters {
|
||||
addr_t ma, mx;
|
||||
addr_t mx, ma;
|
||||
dataset_t ds;
|
||||
};
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ executeProgram PROC
|
|||
|
||||
; function arguments
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov edi, dword ptr [rdx] ; "mx"
|
||||
mov rdi, qword ptr [rdx] ; "mx", "ma"
|
||||
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, r8 ; convertible_t* scratchpad
|
||||
|
@ -216,7 +216,7 @@ TransformAddress MACRO reg32, reg64
|
|||
;xor reg32, -8 ;# C = all except 0 to 7
|
||||
ENDM
|
||||
|
||||
ReadMemoryRandom MACRO spmask, float
|
||||
ReadMemoryRandom MACRO spmask
|
||||
;# IN ecx = random 32-bit address
|
||||
;# OUT rax = 64-bit integer return value
|
||||
;# OUT xmm0 = 128-bit floating point return value
|
||||
|
@ -225,19 +225,6 @@ ReadMemoryRandom MACRO spmask, float
|
|||
;# GLOBAL rsi = address of the scratchpad
|
||||
;# GLOBAL rdi = low 32 bits = "mx", high 32 bits = "ma"
|
||||
;# MODIFY rcx, rdx
|
||||
LOCAL L_prefetch_read, L_return
|
||||
test ebp, 63
|
||||
jz short L_prefetch_read ;# "ic" divisible by 64 -> prefetch + read
|
||||
xor rdi, rcx ;# randomize "mx"
|
||||
L_return:
|
||||
and ecx, spmask ;# limit address to the specified scratchpad size
|
||||
IF float
|
||||
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
|
||||
ELSE
|
||||
mov rax, qword ptr [rsi+rcx*8]
|
||||
ENDIF
|
||||
ret
|
||||
L_prefetch_read:
|
||||
; prefetch cacheline "mx"
|
||||
mov rax, qword ptr [rbx] ;# load the dataset address
|
||||
and rdi, -64 ;# align "mx" to the start of a cache line
|
||||
|
@ -249,34 +236,6 @@ L_prefetch_read:
|
|||
push rcx
|
||||
TransformAddress ecx, rcx ;# TransformAddress function
|
||||
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
|
||||
call rx_read_dataset
|
||||
pop rcx
|
||||
jmp short L_return
|
||||
ENDM
|
||||
|
||||
ALIGN 64
|
||||
rx_readint_l1:
|
||||
ReadMemoryRandom 2047, 0
|
||||
|
||||
ALIGN 64
|
||||
rx_readint_l2:
|
||||
ReadMemoryRandom 32767, 0
|
||||
|
||||
ALIGN 64
|
||||
rx_readfloat_l1:
|
||||
ReadMemoryRandom 2047, 1
|
||||
|
||||
ALIGN 64
|
||||
rx_readfloat_l2:
|
||||
ReadMemoryRandom 32767, 1
|
||||
|
||||
ALIGN 64
|
||||
rx_read_dataset:
|
||||
;# IN rax = dataset address
|
||||
;# IN ecx = scratchpad index - must be divisible by 8
|
||||
;# IN edx = dataset index - must be divisible by 64
|
||||
;# GLOBAL rsi = address of the scratchpad
|
||||
;# MODIFY rax, rcx, rdx
|
||||
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
|
||||
lea rax, [rax+rdx] ;# dataset cache line
|
||||
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
|
||||
|
@ -295,7 +254,18 @@ rx_read_dataset:
|
|||
xor qword ptr [rcx+48], rdx
|
||||
mov rdx, qword ptr [rax+56]
|
||||
xor qword ptr [rcx+56], rdx
|
||||
pop rcx
|
||||
ret
|
||||
ENDM
|
||||
|
||||
ALIGN 64
|
||||
rx_read_l1:
|
||||
ReadMemoryRandom 2047
|
||||
|
||||
ALIGN 64
|
||||
rx_read_l2:
|
||||
ReadMemoryRandom 32767
|
||||
|
||||
executeProgram ENDP
|
||||
|
||||
_RANDOMX_EXECUTE_PROGRAM ENDS
|
||||
|
|
4096
src/program.inc
4096
src/program.inc
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue