mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Merge pull request #124 from SChernykh/master
Optimized loading from scratchpad
This commit is contained in:
		
						commit
						5fb26fc607
					
				
					 7 changed files with 63 additions and 14 deletions
				
			
		|  | @ -1,5 +1,3 @@ | |||
| 	mov rdx, rax | ||||
| 	and eax, RANDOMX_SCRATCHPAD_MASK | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	push rcx | ||||
| 	xor r8,  qword ptr [rcx+0] | ||||
|  | @ -10,8 +8,6 @@ | |||
| 	xor r13, qword ptr [rcx+40] | ||||
| 	xor r14, qword ptr [rcx+48] | ||||
| 	xor r15, qword ptr [rcx+56] | ||||
| 	ror rdx, 32 | ||||
| 	and edx, RANDOMX_SCRATCHPAD_MASK | ||||
| 	lea rcx, [rsi+rdx] | ||||
| 	push rcx | ||||
| 	cvtdq2pd xmm0, qword ptr [rcx+0] | ||||
|  |  | |||
|  | @ -1,4 +1,3 @@ | |||
| 	xor eax, eax | ||||
| 	pop rcx | ||||
| 	mov qword ptr [rcx+0], r8 | ||||
| 	mov qword ptr [rcx+8], r9 | ||||
|  |  | |||
|  | @ -243,7 +243,7 @@ namespace randomx { | |||
| 		generateProgramPrologue(prog, pcfg); | ||||
| 		memcpy(code + codePos, codeReadDataset, readDatasetSize); | ||||
| 		codePos += readDatasetSize; | ||||
| 		generateProgramEpilogue(prog); | ||||
| 		generateProgramEpilogue(prog, pcfg); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { | ||||
|  | @ -254,7 +254,7 @@ namespace randomx { | |||
| 		emitByte(CALL); | ||||
| 		emit32(superScalarHashOffset - (codePos + 4)); | ||||
| 		emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); | ||||
| 		generateProgramEpilogue(prog); | ||||
| 		generateProgramEpilogue(prog, pcfg); | ||||
| 	} | ||||
| 
 | ||||
| 	template<size_t N> | ||||
|  | @ -298,12 +298,13 @@ namespace randomx { | |||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| 
 | ||||
| 		codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue); | ||||
| 		code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0; | ||||
| 		code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1; | ||||
| 
 | ||||
| 		codePos = prologueSize; | ||||
| 		memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); | ||||
| 		emit(REX_XOR_RAX_R64); | ||||
| 		emitByte(0xc0 + pcfg.readReg0); | ||||
| 		emit(REX_XOR_RAX_R64); | ||||
| 		emitByte(0xc0 + pcfg.readReg1); | ||||
| 		memcpy(code + codePos, codeLoopLoad, loopLoadSize); | ||||
| 		codePos += loopLoadSize; | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
|  | @ -318,7 +319,12 @@ namespace randomx { | |||
| 		emitByte(0xc0 + pcfg.readReg3); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramEpilogue(Program& prog) { | ||||
| 	void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) { | ||||
| 		emit(REX_MOV_RR64); | ||||
| 		emitByte(0xc0 + pcfg.readReg0); | ||||
| 		emit(REX_XOR_RAX_R64); | ||||
| 		emitByte(0xc0 + pcfg.readReg1); | ||||
| 		emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad)); | ||||
| 		memcpy(code + codePos, codeLoopStore, loopStoreSize); | ||||
| 		codePos += loopStoreSize; | ||||
| 		emit(SUB_EBX); | ||||
|  |  | |||
|  | @ -73,7 +73,7 @@ namespace randomx { | |||
| 		int32_t codePos; | ||||
| 
 | ||||
| 		void generateProgramPrologue(Program&, ProgramConfiguration&); | ||||
| 		void generateProgramEpilogue(Program&); | ||||
| 		void generateProgramEpilogue(Program&, ProgramConfiguration&); | ||||
| 		void genAddressReg(Instruction&, bool); | ||||
| 		void genAddressRegDst(Instruction&); | ||||
| 		void genAddressImm(Instruction&); | ||||
|  |  | |||
|  | @ -37,7 +37,10 @@ | |||
| #define WINABI | ||||
| #endif | ||||
| 
 | ||||
| .global DECL(randomx_prefetch_scratchpad) | ||||
| .global DECL(randomx_prefetch_scratchpad_end) | ||||
| .global DECL(randomx_program_prologue) | ||||
| .global DECL(randomx_program_prologue_first_load) | ||||
| .global DECL(randomx_program_loop_begin) | ||||
| .global DECL(randomx_program_loop_load) | ||||
| .global DECL(randomx_program_start) | ||||
|  | @ -65,6 +68,16 @@ | |||
| 
 | ||||
| #define db .byte | ||||
| 
 | ||||
| DECL(randomx_prefetch_scratchpad): | ||||
| 	mov rdx, rax | ||||
| 	and eax, RANDOMX_SCRATCHPAD_MASK | ||||
| 	prefetcht0 [rsi+rax] | ||||
| 	ror rdx, 32 | ||||
| 	and edx, RANDOMX_SCRATCHPAD_MASK | ||||
| 	prefetcht0 [rsi+rdx] | ||||
| 
 | ||||
| DECL(randomx_prefetch_scratchpad_end): | ||||
| 
 | ||||
| .balign 64
 | ||||
| DECL(randomx_program_prologue): | ||||
| #if defined(WINABI) | ||||
|  | @ -75,6 +88,14 @@ DECL(randomx_program_prologue): | |||
| 	movapd xmm13, xmmword ptr [mantissaMask+rip] | ||||
| 	movapd xmm14, xmmword ptr [exp240+rip] | ||||
| 	movapd xmm15, xmmword ptr [scaleMask+rip] | ||||
| 
 | ||||
| DECL(randomx_program_prologue_first_load): | ||||
| 	xor rax, r8 | ||||
| 	xor rax, r8 | ||||
| 	mov rdx, rax | ||||
| 	and eax, RANDOMX_SCRATCHPAD_MASK | ||||
| 	ror rdx, 32 | ||||
| 	and edx, RANDOMX_SCRATCHPAD_MASK | ||||
| 	jmp DECL(randomx_program_loop_begin) | ||||
| 
 | ||||
| .balign 64
 | ||||
|  |  | |||
|  | @ -28,7 +28,10 @@ IFDEF RAX | |||
| 
 | ||||
| _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE | ||||
| 
 | ||||
| PUBLIC randomx_prefetch_scratchpad | ||||
| PUBLIC randomx_prefetch_scratchpad_end | ||||
| PUBLIC randomx_program_prologue | ||||
| PUBLIC randomx_program_prologue_first_load | ||||
| PUBLIC randomx_program_loop_begin | ||||
| PUBLIC randomx_program_loop_load | ||||
| PUBLIC randomx_program_start | ||||
|  | @ -54,15 +57,36 @@ RANDOMX_CACHE_MASK          EQU (RANDOMX_ARGON_MEMORY*16-1) | |||
| RANDOMX_ALIGN               EQU 4096 | ||||
| SUPERSCALAR_OFFSET          EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN)) | ||||
| 
 | ||||
| randomx_prefetch_scratchpad PROC | ||||
| 	mov rdx, rax | ||||
| 	and eax, RANDOMX_SCRATCHPAD_MASK | ||||
| 	prefetcht0 [rsi+rax] | ||||
| 	ror rdx, 32 | ||||
| 	and edx, RANDOMX_SCRATCHPAD_MASK | ||||
| 	prefetcht0 [rsi+rdx] | ||||
| randomx_prefetch_scratchpad ENDP | ||||
| 
 | ||||
| randomx_prefetch_scratchpad_end PROC | ||||
| randomx_prefetch_scratchpad_end ENDP | ||||
| 
 | ||||
| ALIGN 64 | ||||
| randomx_program_prologue PROC | ||||
| 	include asm/program_prologue_win64.inc | ||||
| 	movapd xmm13, xmmword ptr [mantissaMask] | ||||
| 	movapd xmm14, xmmword ptr [exp240] | ||||
| 	movapd xmm15, xmmword ptr [scaleMask] | ||||
| 	jmp randomx_program_loop_begin | ||||
| randomx_program_prologue ENDP | ||||
| 
 | ||||
| randomx_program_prologue_first_load PROC | ||||
| 	xor rax, r8 | ||||
| 	xor rax, r8 | ||||
| 	mov rdx, rax | ||||
| 	and eax, RANDOMX_SCRATCHPAD_MASK | ||||
| 	ror rdx, 32 | ||||
| 	and edx, RANDOMX_SCRATCHPAD_MASK | ||||
| 	jmp randomx_program_loop_begin | ||||
| randomx_program_prologue_first_load ENDP | ||||
| 
 | ||||
| ALIGN 64 | ||||
| 	include asm/program_xmm_constants.inc | ||||
| 
 | ||||
|  |  | |||
|  | @ -29,7 +29,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #pragma once | ||||
| 
 | ||||
| extern "C" { | ||||
| 	void randomx_prefetch_scratchpad(); | ||||
| 	void randomx_prefetch_scratchpad_end(); | ||||
| 	void randomx_program_prologue(); | ||||
| 	void randomx_program_prologue_first_load(); | ||||
| 	void randomx_program_loop_begin(); | ||||
| 	void randomx_program_loop_load(); | ||||
| 	void randomx_program_start(); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue