mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Updated JIT compiler and assembly generator for new int -> float conversion
This commit is contained in:
		
							parent
							
								
									790b382eda
								
							
						
					
					
						commit
						d9bc6cfeda
					
				
					 10 changed files with 56 additions and 49 deletions
				
			
		|  | @ -40,6 +40,8 @@ For floating point instructions, the destination can be a group F or group E reg | |||
| 
 | ||||
| Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`. | ||||
| 
 | ||||
| Memory operands for group E registers are loaded as described above, then their sign bit is cleared and their exponent value is set to `0x30F` (corresponds to 2<sup>-240</sup>). | ||||
| 
 | ||||
| |frequency|instruction|dst|src|operation| | ||||
| |-|-|-|-|-| | ||||
| |8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`| | ||||
|  | @ -58,8 +60,7 @@ This instruction negates the number and multiplies it by <code>2<sup>x</sup></co | |||
| The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`. | ||||
| 
 | ||||
| #### Denormal and NaN values | ||||
| Due to restrictions on the values of the floating point registers, no operation results in `NaN`. | ||||
| `FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number. | ||||
| Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number. | ||||
| 
 | ||||
| #### Rounding | ||||
| All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register: | ||||
|  |  | |||
|  | @ -385,9 +385,9 @@ namespace RandomX { | |||
| 		instr.dst %= 4; | ||||
| 		genAddressReg(instr); | ||||
| 		asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; | ||||
| 		asmCode << "\tandps xmm12, xmm14" << std::endl; | ||||
| 		asmCode << "\tandps xmm12, xmm13" << std::endl; | ||||
| 		asmCode << "\torps xmm12, xmm14" << std::endl; | ||||
| 		asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl; | ||||
| 		asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl; | ||||
| 		traceflt(instr); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -73,9 +73,9 @@ namespace RandomX { | |||
| 	; xmm10 -> "a2" | ||||
| 	; xmm11 -> "a3" | ||||
| 	; xmm12 -> temporary | ||||
| 	; xmm13 -> DBL_MIN | ||||
| 	; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff | ||||
| 	; xmm15 -> sign mask           0x80000000000000008000000000000000 | ||||
| 	; xmm13 -> mantissa mask    = 0x000fffffffffffff000fffffffffffff | ||||
| 	; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000 | ||||
| 	; xmm15 -> scale mask       = 0x81f000000000000081f0000000000000 | ||||
| 
 | ||||
| 	*/ | ||||
| 
 | ||||
|  | @ -165,7 +165,7 @@ namespace RandomX { | |||
| 	static const uint8_t JMP = 0xe9; | ||||
| 	static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; | ||||
| 	static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; | ||||
| 	static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0f, 0x54, 0xe6 }; | ||||
| 	static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 }; | ||||
| 	static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; | ||||
| 	static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; | ||||
| 
 | ||||
|  | @ -556,8 +556,6 @@ namespace RandomX { | |||
| 		emit(REX_ANDPS_XMM12); | ||||
| 		emit(REX_DIVPD); | ||||
| 		emitByte(0xe4 + 8 * instr.dst); | ||||
| 		emit(REX_MAXPD); | ||||
| 		emitByte(0xe5 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSQRT_R(Instruction& instr) { | ||||
|  |  | |||
|  | @ -22,7 +22,11 @@ | |||
| 	cvtdq2pd xmm5, qword ptr [rcx+40] | ||||
| 	cvtdq2pd xmm6, qword ptr [rcx+48] | ||||
| 	cvtdq2pd xmm7, qword ptr [rcx+56] | ||||
| 	andps xmm4, xmm14 | ||||
| 	andps xmm5, xmm14 | ||||
| 	andps xmm6, xmm14 | ||||
| 	andps xmm7, xmm14 | ||||
| 	andps xmm4, xmm13 | ||||
| 	andps xmm5, xmm13 | ||||
| 	andps xmm6, xmm13 | ||||
| 	andps xmm7, xmm13 | ||||
| 	orps xmm4, xmm14 | ||||
| 	orps xmm5, xmm14 | ||||
| 	orps xmm6, xmm14 | ||||
| 	orps xmm7, xmm14 | ||||
|  |  | |||
|  | @ -8,10 +8,10 @@ | |||
| 	mov qword ptr [rcx+48], r14 | ||||
| 	mov qword ptr [rcx+56], r15 | ||||
| 	pop rcx | ||||
| 	mulpd xmm0, xmm4 | ||||
| 	mulpd xmm1, xmm5 | ||||
| 	mulpd xmm2, xmm6 | ||||
| 	mulpd xmm3, xmm7 | ||||
| 	xorpd xmm0, xmm4 | ||||
| 	xorpd xmm1, xmm5 | ||||
| 	xorpd xmm2, xmm6 | ||||
| 	xorpd xmm3, xmm7 | ||||
| 	movapd xmmword ptr [rcx+0], xmm0 | ||||
| 	movapd xmmword ptr [rcx+16], xmm1 | ||||
| 	movapd xmmword ptr [rcx+32], xmm2 | ||||
|  |  | |||
|  | @ -32,8 +32,8 @@ | |||
| 	movapd xmm9, xmmword ptr [rcx+88] | ||||
| 	movapd xmm10, xmmword ptr [rcx+104] | ||||
| 	movapd xmm11, xmmword ptr [rcx+120] | ||||
| 	movapd xmm13, xmmword ptr minDbl[rip] | ||||
| 	movapd xmm14, xmmword ptr absMask[rip] | ||||
| 	movapd xmm15, xmmword ptr signMask[rip] | ||||
| 	movapd xmm13, xmmword ptr mantissaMask[rip] | ||||
| 	movapd xmm14, xmmword ptr exp240[rip] | ||||
| 	movapd xmm15, xmmword ptr scaleMask[rip] | ||||
| 
 | ||||
| 	jmp DECL(randomx_program_loop_begin) | ||||
|  | @ -45,8 +45,8 @@ | |||
| 	movapd xmm9, xmmword ptr [rcx+88] | ||||
| 	movapd xmm10, xmmword ptr [rcx+104] | ||||
| 	movapd xmm11, xmmword ptr [rcx+120] | ||||
| 	movapd xmm13, xmmword ptr [minDbl] | ||||
| 	movapd xmm14, xmmword ptr [absMask] | ||||
| 	movapd xmm15, xmmword ptr [signMask] | ||||
| 	movapd xmm13, xmmword ptr [mantissaMask] | ||||
| 	movapd xmm14, xmmword ptr [exp240] | ||||
| 	movapd xmm15, xmmword ptr [scaleMask] | ||||
| 
 | ||||
| 	jmp randomx_program_loop_begin | ||||
|  | @ -1,6 +1,6 @@ | |||
| minDbl: | ||||
| 	db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0 | ||||
| absMask: | ||||
| 	db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127 | ||||
| signMask: | ||||
| mantissaMask: | ||||
| 	db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0 | ||||
| exp240: | ||||
| 	db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48 | ||||
| scaleMask: | ||||
| 	db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129 | ||||
|  | @ -52,9 +52,9 @@ executeProgram PROC | |||
| 	; xmm10 -> "a2" | ||||
| 	; xmm11 -> "a3" | ||||
| 	; xmm12 -> temporary | ||||
| 	; xmm13 -> DBL_MIN | ||||
| 	; xmm14 -> absolute value mask | ||||
| 	; xmm15 -> sign mask | ||||
| 	; xmm13 -> mantissa mask    = 0x000fffffffffffff000fffffffffffff | ||||
| 	; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000 | ||||
| 	; xmm15 -> scale mask       = 0x81f000000000000081f0000000000000 | ||||
| 
 | ||||
| 	; store callee-saved registers | ||||
| 	push rbx | ||||
|  | @ -103,18 +103,18 @@ executeProgram PROC | |||
| 	movapd xmm9, xmmword ptr [rcx+88] | ||||
| 	movapd xmm10, xmmword ptr [rcx+104] | ||||
| 	movapd xmm11, xmmword ptr [rcx+120] | ||||
| 	movapd xmm13, xmmword ptr [minDbl] | ||||
| 	movapd xmm14, xmmword ptr [absMask] | ||||
| 	movapd xmm15, xmmword ptr [signMask] | ||||
| 	movapd xmm13, xmmword ptr [mantissaMask] | ||||
| 	movapd xmm14, xmmword ptr [exp240] | ||||
| 	movapd xmm15, xmmword ptr [scaleMask] | ||||
| 
 | ||||
| 	jmp program_begin | ||||
| 
 | ||||
| ALIGN 64 | ||||
| minDbl: | ||||
| 	db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0 | ||||
| absMask: | ||||
| 	db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127 | ||||
| signMask: | ||||
| mantissaMask: | ||||
| 	db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0 | ||||
| exp240: | ||||
| 	db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48 | ||||
| scaleMask: | ||||
| 	db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129 | ||||
| 
 | ||||
| ALIGN 64 | ||||
|  | @ -145,10 +145,14 @@ program_begin: | |||
| 	cvtdq2pd xmm5, qword ptr [rcx+40] | ||||
| 	cvtdq2pd xmm6, qword ptr [rcx+48] | ||||
| 	cvtdq2pd xmm7, qword ptr [rcx+56] | ||||
| 	andps xmm4, xmm14 | ||||
| 	andps xmm5, xmm14 | ||||
| 	andps xmm6, xmm14 | ||||
| 	andps xmm7, xmm14 | ||||
| 	andps xmm4, xmm13 | ||||
| 	andps xmm5, xmm13 | ||||
| 	andps xmm6, xmm13 | ||||
| 	andps xmm7, xmm13 | ||||
| 	orps xmm4, xmm14 | ||||
| 	orps xmm5, xmm14 | ||||
| 	orps xmm6, xmm14 | ||||
| 	orps xmm7, xmm14 | ||||
| 
 | ||||
| 	;# 256 instructions | ||||
| 	include program.inc | ||||
|  | @ -181,10 +185,10 @@ IF 1 | |||
| 	mov qword ptr [rcx+48], r14 | ||||
| 	mov qword ptr [rcx+56], r15 | ||||
| 	pop rcx | ||||
| 	mulpd xmm0, xmm4 | ||||
| 	mulpd xmm1, xmm5 | ||||
| 	mulpd xmm2, xmm6 | ||||
| 	mulpd xmm3, xmm7 | ||||
| 	xorpd xmm0, xmm4 | ||||
| 	xorpd xmm1, xmm5 | ||||
| 	xorpd xmm2, xmm6 | ||||
| 	xorpd xmm3, xmm7 | ||||
| 	movapd xmmword ptr [rcx+0], xmm0 | ||||
| 	movapd xmmword ptr [rcx+16], xmm1 | ||||
| 	movapd xmmword ptr [rcx+32], xmm2 | ||||
|  |  | |||
|  | @ -341,7 +341,7 @@ int main(int argc, char** argv) { | |||
| 		std::cout << "Calculated result: "; | ||||
| 		result.print(std::cout); | ||||
| 		if(programCount == 1000) | ||||
| 		std::cout << "Reference result:  d3ae5a9365196ed48bb98ebfc3316498e29443ea7f056ecbd272f749c6af7730" << std::endl; | ||||
| 		std::cout << "Reference result:  e1b4144293ff9ab5aa4c98f2389bb18950d8c3fd874891ac64628e028a286006" << std::endl; | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; | ||||
| 		} | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue