mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Merge pull request #124 from SChernykh/master
Optimized loading from scratchpad
This commit is contained in:
commit
5fb26fc607
7 changed files with 63 additions and 14 deletions
|
@ -1,5 +1,3 @@
|
||||||
mov rdx, rax
|
|
||||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
push rcx
|
push rcx
|
||||||
xor r8, qword ptr [rcx+0]
|
xor r8, qword ptr [rcx+0]
|
||||||
|
@ -10,8 +8,6 @@
|
||||||
xor r13, qword ptr [rcx+40]
|
xor r13, qword ptr [rcx+40]
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
ror rdx, 32
|
|
||||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
|
||||||
lea rcx, [rsi+rdx]
|
lea rcx, [rsi+rdx]
|
||||||
push rcx
|
push rcx
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
xor eax, eax
|
|
||||||
pop rcx
|
pop rcx
|
||||||
mov qword ptr [rcx+0], r8
|
mov qword ptr [rcx+0], r8
|
||||||
mov qword ptr [rcx+8], r9
|
mov qword ptr [rcx+8], r9
|
||||||
|
|
|
@ -243,7 +243,7 @@ namespace randomx {
|
||||||
generateProgramPrologue(prog, pcfg);
|
generateProgramPrologue(prog, pcfg);
|
||||||
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
||||||
codePos += readDatasetSize;
|
codePos += readDatasetSize;
|
||||||
generateProgramEpilogue(prog);
|
generateProgramEpilogue(prog, pcfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
|
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
|
||||||
|
@ -254,7 +254,7 @@ namespace randomx {
|
||||||
emitByte(CALL);
|
emitByte(CALL);
|
||||||
emit32(superScalarHashOffset - (codePos + 4));
|
emit32(superScalarHashOffset - (codePos + 4));
|
||||||
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
|
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
|
||||||
generateProgramEpilogue(prog);
|
generateProgramEpilogue(prog, pcfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t N>
|
template<size_t N>
|
||||||
|
@ -298,12 +298,13 @@ namespace randomx {
|
||||||
for (unsigned i = 0; i < 8; ++i) {
|
for (unsigned i = 0; i < 8; ++i) {
|
||||||
registerUsage[i] = -1;
|
registerUsage[i] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
|
||||||
|
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
|
||||||
|
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;
|
||||||
|
|
||||||
codePos = prologueSize;
|
codePos = prologueSize;
|
||||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||||
emit(REX_XOR_RAX_R64);
|
|
||||||
emitByte(0xc0 + pcfg.readReg0);
|
|
||||||
emit(REX_XOR_RAX_R64);
|
|
||||||
emitByte(0xc0 + pcfg.readReg1);
|
|
||||||
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||||
codePos += loopLoadSize;
|
codePos += loopLoadSize;
|
||||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||||
|
@ -318,7 +319,12 @@ namespace randomx {
|
||||||
emitByte(0xc0 + pcfg.readReg3);
|
emitByte(0xc0 + pcfg.readReg3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::generateProgramEpilogue(Program& prog) {
|
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
|
||||||
|
emit(REX_MOV_RR64);
|
||||||
|
emitByte(0xc0 + pcfg.readReg0);
|
||||||
|
emit(REX_XOR_RAX_R64);
|
||||||
|
emitByte(0xc0 + pcfg.readReg1);
|
||||||
|
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
|
||||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||||
codePos += loopStoreSize;
|
codePos += loopStoreSize;
|
||||||
emit(SUB_EBX);
|
emit(SUB_EBX);
|
||||||
|
|
|
@ -73,7 +73,7 @@ namespace randomx {
|
||||||
int32_t codePos;
|
int32_t codePos;
|
||||||
|
|
||||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||||
void generateProgramEpilogue(Program&);
|
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||||
void genAddressReg(Instruction&, bool);
|
void genAddressReg(Instruction&, bool);
|
||||||
void genAddressRegDst(Instruction&);
|
void genAddressRegDst(Instruction&);
|
||||||
void genAddressImm(Instruction&);
|
void genAddressImm(Instruction&);
|
||||||
|
|
|
@ -37,7 +37,10 @@
|
||||||
#define WINABI
|
#define WINABI
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
.global DECL(randomx_prefetch_scratchpad)
|
||||||
|
.global DECL(randomx_prefetch_scratchpad_end)
|
||||||
.global DECL(randomx_program_prologue)
|
.global DECL(randomx_program_prologue)
|
||||||
|
.global DECL(randomx_program_prologue_first_load)
|
||||||
.global DECL(randomx_program_loop_begin)
|
.global DECL(randomx_program_loop_begin)
|
||||||
.global DECL(randomx_program_loop_load)
|
.global DECL(randomx_program_loop_load)
|
||||||
.global DECL(randomx_program_start)
|
.global DECL(randomx_program_start)
|
||||||
|
@ -65,6 +68,16 @@
|
||||||
|
|
||||||
#define db .byte
|
#define db .byte
|
||||||
|
|
||||||
|
DECL(randomx_prefetch_scratchpad):
|
||||||
|
mov rdx, rax
|
||||||
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rax]
|
||||||
|
ror rdx, 32
|
||||||
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rdx]
|
||||||
|
|
||||||
|
DECL(randomx_prefetch_scratchpad_end):
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
DECL(randomx_program_prologue):
|
DECL(randomx_program_prologue):
|
||||||
#if defined(WINABI)
|
#if defined(WINABI)
|
||||||
|
@ -75,6 +88,14 @@ DECL(randomx_program_prologue):
|
||||||
movapd xmm13, xmmword ptr [mantissaMask+rip]
|
movapd xmm13, xmmword ptr [mantissaMask+rip]
|
||||||
movapd xmm14, xmmword ptr [exp240+rip]
|
movapd xmm14, xmmword ptr [exp240+rip]
|
||||||
movapd xmm15, xmmword ptr [scaleMask+rip]
|
movapd xmm15, xmmword ptr [scaleMask+rip]
|
||||||
|
|
||||||
|
DECL(randomx_program_prologue_first_load):
|
||||||
|
xor rax, r8
|
||||||
|
xor rax, r8
|
||||||
|
mov rdx, rax
|
||||||
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
ror rdx, 32
|
||||||
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
jmp DECL(randomx_program_loop_begin)
|
jmp DECL(randomx_program_loop_begin)
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
|
|
|
@ -28,7 +28,10 @@ IFDEF RAX
|
||||||
|
|
||||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||||
|
|
||||||
|
PUBLIC randomx_prefetch_scratchpad
|
||||||
|
PUBLIC randomx_prefetch_scratchpad_end
|
||||||
PUBLIC randomx_program_prologue
|
PUBLIC randomx_program_prologue
|
||||||
|
PUBLIC randomx_program_prologue_first_load
|
||||||
PUBLIC randomx_program_loop_begin
|
PUBLIC randomx_program_loop_begin
|
||||||
PUBLIC randomx_program_loop_load
|
PUBLIC randomx_program_loop_load
|
||||||
PUBLIC randomx_program_start
|
PUBLIC randomx_program_start
|
||||||
|
@ -54,15 +57,36 @@ RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1)
|
||||||
RANDOMX_ALIGN EQU 4096
|
RANDOMX_ALIGN EQU 4096
|
||||||
SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
|
SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
|
||||||
|
|
||||||
|
randomx_prefetch_scratchpad PROC
|
||||||
|
mov rdx, rax
|
||||||
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rax]
|
||||||
|
ror rdx, 32
|
||||||
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
prefetcht0 [rsi+rdx]
|
||||||
|
randomx_prefetch_scratchpad ENDP
|
||||||
|
|
||||||
|
randomx_prefetch_scratchpad_end PROC
|
||||||
|
randomx_prefetch_scratchpad_end ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
randomx_program_prologue PROC
|
randomx_program_prologue PROC
|
||||||
include asm/program_prologue_win64.inc
|
include asm/program_prologue_win64.inc
|
||||||
movapd xmm13, xmmword ptr [mantissaMask]
|
movapd xmm13, xmmword ptr [mantissaMask]
|
||||||
movapd xmm14, xmmword ptr [exp240]
|
movapd xmm14, xmmword ptr [exp240]
|
||||||
movapd xmm15, xmmword ptr [scaleMask]
|
movapd xmm15, xmmword ptr [scaleMask]
|
||||||
jmp randomx_program_loop_begin
|
|
||||||
randomx_program_prologue ENDP
|
randomx_program_prologue ENDP
|
||||||
|
|
||||||
|
randomx_program_prologue_first_load PROC
|
||||||
|
xor rax, r8
|
||||||
|
xor rax, r8
|
||||||
|
mov rdx, rax
|
||||||
|
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
ror rdx, 32
|
||||||
|
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||||
|
jmp randomx_program_loop_begin
|
||||||
|
randomx_program_prologue_first_load ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
include asm/program_xmm_constants.inc
|
include asm/program_xmm_constants.inc
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
void randomx_prefetch_scratchpad();
|
||||||
|
void randomx_prefetch_scratchpad_end();
|
||||||
void randomx_program_prologue();
|
void randomx_program_prologue();
|
||||||
|
void randomx_program_prologue_first_load();
|
||||||
void randomx_program_loop_begin();
|
void randomx_program_loop_begin();
|
||||||
void randomx_program_loop_load();
|
void randomx_program_loop_load();
|
||||||
void randomx_program_start();
|
void randomx_program_start();
|
||||||
|
|
Loading…
Reference in a new issue