Inlined calls for memory read

This commit is contained in:
tevador 2019-01-07 17:44:43 +01:00
parent 6519fed4d1
commit 2f6a599ff6
5 changed files with 3658 additions and 598 deletions

View file

@ -59,37 +59,55 @@ namespace RandomX {
(this->*generator)(instr, i);
}
void AssemblyGeneratorX86::genar(Instruction& instr) {
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest ebp, 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
switch (instr.loca & 3)
{
case 0:
case 1:
case 2:
asmCode << "\tcall rx_readint_l1" << std::endl;
return;
asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
asmCode << "\txor rdi, rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
break;
default: //3
asmCode << "\tcall rx_readint_l2" << std::endl;
return;
asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
asmCode << "\txor rdi, rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
break;
}
asmCode << "\tmov rax, qword ptr [rsi+rcx*8]" << std::endl;
}
void AssemblyGeneratorX86::genaf(Instruction& instr) {
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest ebp, 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
switch (instr.loca & 3)
{
case 0:
case 1:
case 2:
asmCode << "\tcall rx_readfloat_l1" << std::endl;
return;
asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
asmCode << "\txor rdi, rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
break;
default: //3
asmCode << "\tcall rx_readfloat_l2" << std::endl;
return;
asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
asmCode << "\txor rdi, rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
break;
}
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi+rcx*8]" << std::endl;
}
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
@ -209,35 +227,35 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tadd rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tadd eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tsub rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tsub eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\timul rax, ";
if ((instr.locb & 7) >= 6) {
asmCode << "rax, ";
@ -247,7 +265,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tmov rcx, ";
genbr1(instr);
asmCode << "\tmul rcx" << std::endl;
@ -256,7 +274,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tmov ecx, eax" << std::endl;
asmCode << "\tmov eax, ";
genbr132(instr);
@ -265,7 +283,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tmovsxd rcx, eax" << std::endl;
if ((instr.locb & 7) >= 6) {
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
@ -278,7 +296,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tmov rcx, ";
genbr1(instr);
asmCode << "\timul rcx" << std::endl;
@ -287,7 +305,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
if ((instr.locb & 7) >= 6) {
if (instr.imm32 == 0) {
asmCode << "\tmov ecx, 1" << std::endl;
@ -308,7 +326,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tmov edx, ";
genbr132(instr);
asmCode << "\tcmp edx, -1" << std::endl;
@ -329,91 +347,91 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tand rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tand eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tor rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tor eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\txor rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\txor eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
genbr0(instr, "shl");
gencr(instr);
}
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
genbr0(instr, "shr");
gencr(instr);
}
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
genbr0(instr, "sar");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
genbr0(instr, "rol");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
genbr0(instr, "ror");
gencr(instr);
}
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
genaf(instr);
genaf(instr, i);
genbf(instr, "addpd");
gencf(instr);
}
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
genaf(instr);
genaf(instr, i);
genbf(instr, "subpd");
gencf(instr);
}
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
genaf(instr);
genaf(instr, i);
genbf(instr, "mulpd");
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
@ -422,7 +440,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
genaf(instr);
genaf(instr, i);
genbf(instr, "divpd");
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
@ -431,14 +449,14 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
genaf(instr);
genaf(instr, i);
asmCode << "\tandps xmm0, xmm10" << std::endl;
asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
//asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tshl eax, 13" << std::endl;
//asmCode << "\tand rcx, -2048" << std::endl;
@ -472,7 +490,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\t" << jumpCondition(instr);
asmCode << " short taken_call_" << i << std::endl;
@ -487,7 +505,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
genar(instr);
genar(instr, i);
asmCode << "\tcmp rsp, " << regStackBeginAddr << std::endl;
asmCode << "\tje short not_taken_ret_" << i << std::endl;
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;

View file

@ -38,8 +38,8 @@ namespace RandomX {
static InstructionGenerator engine[256];
std::stringstream asmCode;
void genar(Instruction&);
void genaf(Instruction&);
void genar(Instruction&, int);
void genaf(Instruction&, int);
void genbr0(Instruction&, const char*);
void genbr1(Instruction&);
void genbr132(Instruction&);

View file

@ -98,7 +98,7 @@ namespace RandomX {
};
struct MemoryRegisters {
addr_t ma, mx;
addr_t mx, ma;
dataset_t ds;
};

View file

@ -82,7 +82,7 @@ executeProgram PROC
; function arguments
push rcx ; RegisterFile& registerFile
mov edi, dword ptr [rdx] ; "mx"
mov rdi, qword ptr [rdx] ; "mx", "ma"
mov rax, qword ptr [rdx+8] ; uint8_t* dataset
push rax
mov rsi, r8 ; convertible_t* scratchpad
@ -216,7 +216,7 @@ TransformAddress MACRO reg32, reg64
;xor reg32, -8 ;# C = all except 0 to 7
ENDM
ReadMemoryRandom MACRO spmask, float
ReadMemoryRandom MACRO spmask
;# IN ecx = random 32-bit address
;# OUT rax = 64-bit integer return value
;# OUT xmm0 = 128-bit floating point return value
@ -225,19 +225,6 @@ ReadMemoryRandom MACRO spmask, float
;# GLOBAL rsi = address of the scratchpad
;# GLOBAL rdi = low 32 bits = "mx", high 32 bits = "ma"
;# MODIFY rcx, rdx
LOCAL L_prefetch_read, L_return
test ebp, 63
jz short L_prefetch_read ;# "ic" divisible by 64 -> prefetch + read
xor rdi, rcx ;# randomize "mx"
L_return:
and ecx, spmask ;# limit address to the specified scratchpad size
IF float
cvtdq2pd xmm0, qword ptr [rsi+rcx*8]
ELSE
mov rax, qword ptr [rsi+rcx*8]
ENDIF
ret
L_prefetch_read:
; prefetch cacheline "mx"
mov rax, qword ptr [rbx] ;# load the dataset address
and rdi, -64 ;# align "mx" to the start of a cache line
@ -249,34 +236,6 @@ L_prefetch_read:
push rcx
TransformAddress ecx, rcx ;# TransformAddress function
and ecx, spmask-7 ;# limit address to the specified scratchpad size aligned to multiple of 8
call rx_read_dataset
pop rcx
jmp short L_return
ENDM
ALIGN 64
rx_readint_l1:
ReadMemoryRandom 2047, 0
ALIGN 64
rx_readint_l2:
ReadMemoryRandom 32767, 0
ALIGN 64
rx_readfloat_l1:
ReadMemoryRandom 2047, 1
ALIGN 64
rx_readfloat_l2:
ReadMemoryRandom 32767, 1
ALIGN 64
rx_read_dataset:
;# IN rax = dataset address
;# IN ecx = scratchpad index - must be divisible by 8
;# IN edx = dataset index - must be divisible by 64
;# GLOBAL rsi = address of the scratchpad
;# MODIFY rax, rcx, rdx
lea rcx, [rsi+rcx*8] ;# scratchpad cache line
lea rax, [rax+rdx] ;# dataset cache line
mov rdx, qword ptr [rax+0] ;# load first dataset quadword (prefetched into the cache by now)
@ -295,7 +254,18 @@ rx_read_dataset:
xor qword ptr [rcx+48], rdx
mov rdx, qword ptr [rax+56]
xor qword ptr [rcx+56], rdx
pop rcx
ret
ENDM
ALIGN 64
rx_read_l1:
ReadMemoryRandom 2047
ALIGN 64
rx_read_l2:
ReadMemoryRandom 32767
executeProgram ENDP
_RANDOMX_EXECUTE_PROGRAM ENDS

File diff suppressed because it is too large Load diff