mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Removed FPNEG instruction
Optimized instruction frequencies Increased the range for A registers from [1,65536) to [1, 4294967296)
This commit is contained in:
		
							parent
							
								
									ac4462ad42
								
							
						
					
					
						commit
						a586751f6b
					
				
					 12 changed files with 738 additions and 759 deletions
				
			
		|  | @ -35,6 +35,8 @@ namespace RandomX { | |||
| 	static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" }; | ||||
| 	static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" }; | ||||
| 
 | ||||
| 	static const char* fsumInstr[4] = { "paddb", "paddw", "paddd", "paddq" }; | ||||
| 
 | ||||
| 	static const char* regA4 = "xmm12"; | ||||
| 	static const char* dblMin = "xmm13"; | ||||
| 	static const char* absMask = "xmm14"; | ||||
|  | @ -365,6 +367,7 @@ namespace RandomX { | |||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; | ||||
| 		//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
 | ||||
| 	} | ||||
| 
 | ||||
| 	//5 uOPs
 | ||||
|  | @ -380,6 +383,7 @@ namespace RandomX { | |||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; | ||||
| 		//asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl;
 | ||||
| 	} | ||||
| 
 | ||||
| 	//5 uOPs
 | ||||
|  | @ -391,9 +395,9 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_FPNEG_R(Instruction& instr, int i) { | ||||
| 	void AssemblyGeneratorX86::h_CFSUM_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl; | ||||
| 		asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	//1 uOPs
 | ||||
|  | @ -538,7 +542,7 @@ namespace RandomX { | |||
| 		INST_HANDLE(FADD_M) | ||||
| 		INST_HANDLE(FSUB_R) | ||||
| 		INST_HANDLE(FSUB_M) | ||||
| 		INST_HANDLE(FPNEG_R) | ||||
| 		INST_HANDLE(CFSUM_R) | ||||
| 
 | ||||
| 		//Floating point group E
 | ||||
| 		INST_HANDLE(FMUL_R) | ||||
|  |  | |||
|  | @ -69,7 +69,7 @@ namespace RandomX { | |||
| 		void  h_FADD_M(Instruction&, int); | ||||
| 		void  h_FSUB_R(Instruction&, int); | ||||
| 		void  h_FSUB_M(Instruction&, int); | ||||
| 		void  h_FPNEG_R(Instruction&, int); | ||||
| 		void  h_CFSUM_R(Instruction&, int); | ||||
| 		void  h_FMUL_R(Instruction&, int); | ||||
| 		void  h_FMUL_M(Instruction&, int); | ||||
| 		void  h_FDIV_R(Instruction&, int); | ||||
|  |  | |||
|  | @ -44,7 +44,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	static uint64_t getSmallPositiveFloatBits(uint64_t entropy) { | ||||
| 		auto exponent = entropy >> 60; //0..15
 | ||||
| 		auto exponent = entropy >> 59; //0..31
 | ||||
| 		auto mantissa = entropy & mantissaMask; | ||||
| 		exponent += exponentBias; | ||||
| 		exponent &= exponentMask; | ||||
|  |  | |||
|  | @ -247,9 +247,9 @@ namespace RandomX { | |||
| 		os << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FPNEG_R(std::ostream& os) const { | ||||
| 	void Instruction::h_CFSUM_R(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		os << "f" << dstIndex << std::endl; | ||||
| 		os << "f" << dstIndex << ", " << (1 << ((mod % 4) + 3)) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FMUL_R(std::ostream& os) const { | ||||
|  | @ -370,7 +370,7 @@ namespace RandomX { | |||
| 		INST_NAME(FADD_M) | ||||
| 		INST_NAME(FSUB_R) | ||||
| 		INST_NAME(FSUB_M) | ||||
| 		INST_NAME(FPNEG_R) | ||||
| 		INST_NAME(CFSUM_R) | ||||
| 
 | ||||
| 		//Floating point group E
 | ||||
| 		INST_NAME(FMUL_R) | ||||
|  | @ -421,7 +421,7 @@ namespace RandomX { | |||
| 		INST_HANDLE(FADD_M) | ||||
| 		INST_HANDLE(FSUB_R) | ||||
| 		INST_HANDLE(FSUB_M) | ||||
| 		INST_HANDLE(FPNEG_R) | ||||
| 		INST_HANDLE(CFSUM_R) | ||||
| 
 | ||||
| 		//Floating point group E
 | ||||
| 		INST_HANDLE(FMUL_R) | ||||
|  |  | |||
|  | @ -54,7 +54,7 @@ namespace RandomX { | |||
| 		constexpr int FADD_M = 22; | ||||
| 		constexpr int FSUB_R = 23; | ||||
| 		constexpr int FSUB_M = 24; | ||||
| 		constexpr int FPNEG_R = 25; | ||||
| 		constexpr int CFSUM_R = 25; | ||||
| 		constexpr int FMUL_R = 26; | ||||
| 		constexpr int FMUL_M = 27; | ||||
| 		constexpr int FDIV_R = 28; | ||||
|  | @ -116,7 +116,7 @@ namespace RandomX { | |||
| 		void  h_FADD_M(std::ostream&) const; | ||||
| 		void  h_FSUB_R(std::ostream&) const; | ||||
| 		void  h_FSUB_M(std::ostream&) const; | ||||
| 		void  h_FPNEG_R(std::ostream&) const; | ||||
| 		void  h_CFSUM_R(std::ostream&) const; | ||||
| 		void  h_FMUL_R(std::ostream&) const; | ||||
| 		void  h_FMUL_M(std::ostream&) const; | ||||
| 		void  h_FDIV_R(std::ostream&) const; | ||||
|  |  | |||
|  | @ -87,7 +87,7 @@ namespace RandomX { | |||
| 	; xmm12 -> temporary | ||||
| 	; xmm13 -> DBL_MIN | ||||
| 	; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff | ||||
| 	; xmm15 -> sign mask           0x80000000000000008000000000000000 | ||||
| 	; xmm15 -> unused | ||||
| 
 | ||||
| 	*/ | ||||
| 
 | ||||
|  | @ -178,6 +178,8 @@ namespace RandomX { | |||
| 	static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 }; | ||||
| 	static const uint8_t REX_XCHG[] = { 0x4d, 0x87 }; | ||||
| 	static const uint8_t REX_ANDPS_XMM12[] = { 0x41, 0x0f, 0x54, 0xe6 }; | ||||
| 	static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; | ||||
| 	static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; | ||||
| 
 | ||||
| 	size_t JitCompilerX86::getCodeSize() { | ||||
| 		return codePos - prologueSize; | ||||
|  | @ -615,6 +617,9 @@ namespace RandomX { | |||
| 		instr.src %= 4; | ||||
| 		emit(REX_ADDPD); | ||||
| 		emitByte(0xc0 + instr.src + 8 * instr.dst); | ||||
| 		//emit(REX_PADD);
 | ||||
| 		//emitByte(PADD_OPCODES[instr.mod % 4]);
 | ||||
| 		//emitByte(0xf8 + instr.dst);
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FADD_M(Instruction& instr) { | ||||
|  | @ -630,6 +635,9 @@ namespace RandomX { | |||
| 		instr.src %= 4; | ||||
| 		emit(REX_SUBPD); | ||||
| 		emitByte(0xc0 + instr.src + 8 * instr.dst); | ||||
| 		//emit(REX_PADD);
 | ||||
| 		//emitByte(PADD_OPCODES[instr.mod % 4]);
 | ||||
| 		//emitByte(0xf8 + instr.dst);
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSUB_M(Instruction& instr) { | ||||
|  | @ -640,7 +648,7 @@ namespace RandomX { | |||
| 		emitByte(0xc4 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FPNEG_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_CFSUM_R(Instruction& instr) { | ||||
| 		instr.dst %= 4; | ||||
| 		emit(REX_XORPS); | ||||
| 		emitByte(0xc7 + 8 * instr.dst); | ||||
|  | @ -794,7 +802,7 @@ namespace RandomX { | |||
| 		INST_HANDLE(FADD_M) | ||||
| 		INST_HANDLE(FSUB_R) | ||||
| 		INST_HANDLE(FSUB_M) | ||||
| 		INST_HANDLE(FPNEG_R) | ||||
| 		INST_HANDLE(CFSUM_R) | ||||
| 		INST_HANDLE(FMUL_R) | ||||
| 		INST_HANDLE(FMUL_M) | ||||
| 		INST_HANDLE(FDIV_R) | ||||
|  |  | |||
|  | @ -115,7 +115,7 @@ namespace RandomX { | |||
| 		void  h_FADD_M(Instruction&); | ||||
| 		void  h_FSUB_R(Instruction&); | ||||
| 		void  h_FSUB_M(Instruction&); | ||||
| 		void  h_FPNEG_R(Instruction&); | ||||
| 		void  h_CFSUM_R(Instruction&); | ||||
| 		void  h_FMUL_R(Instruction&); | ||||
| 		void  h_FMUL_M(Instruction&); | ||||
| 		void  h_FDIV_R(Instruction&); | ||||
|  |  | |||
|  | @ -12,6 +12,10 @@ | |||
| 	mulpd xmm1, xmm5 | ||||
| 	mulpd xmm2, xmm6 | ||||
| 	mulpd xmm3, xmm7 | ||||
| 	;# xorpd xmm0, xmm15
 | ||||
| 	;# xorpd xmm1, xmm15
 | ||||
| 	;# xorpd xmm2, xmm15
 | ||||
| 	;# xorpd xmm3, xmm15
 | ||||
| 	movapd xmmword ptr [rcx+0], xmm0 | ||||
| 	movapd xmmword ptr [rcx+16], xmm1 | ||||
| 	movapd xmmword ptr [rcx+32], xmm2 | ||||
|  |  | |||
|  | @ -18,5 +18,5 @@ | |||
| 	movapd xmm11, xmmword ptr [rcx+120] | ||||
| 	movapd xmm13, xmmword ptr [minDbl] | ||||
| 	movapd xmm14, xmmword ptr [absMask] | ||||
| 	movapd xmm15, xmmword ptr [signMask] | ||||
| 	;# xorpd xmm15, xmm15
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -54,7 +54,7 @@ executeProgram PROC | |||
| 	; xmm12 -> temporary | ||||
| 	; xmm13 -> DBL_MIN | ||||
| 	; xmm14 -> absolute value mask | ||||
| 	; xmm15 -> sign mask | ||||
| 	; xmm15 -> unused | ||||
| 
 | ||||
| 	; store callee-saved registers | ||||
| 	push rbx | ||||
|  | @ -104,7 +104,7 @@ executeProgram PROC | |||
| 	movapd xmm11, xmmword ptr [rcx+120] | ||||
| 	movapd xmm13, xmmword ptr [minDbl] | ||||
| 	movapd xmm14, xmmword ptr [absMask] | ||||
| 	movapd xmm15, xmmword ptr [signMask] | ||||
| 	;# xorps xmm15, xmm15 | ||||
| 
 | ||||
| 	jmp program_begin | ||||
| 
 | ||||
|  |  | |||
|  | @ -21,10 +21,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| //Integer
 | ||||
| #define WT_IADD_R 12 | ||||
| #define WT_IADD_M 3 | ||||
| #define WT_IADD_RC 12 | ||||
| #define WT_IADD_M 7 | ||||
| #define WT_IADD_RC 16 | ||||
| #define WT_ISUB_R 12 | ||||
| #define WT_ISUB_M 3 | ||||
| #define WT_ISUB_M 7 | ||||
| #define WT_IMUL_9C 9 | ||||
| #define WT_IMUL_R 16 | ||||
| #define WT_IMUL_M 4 | ||||
|  | @ -35,10 +35,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #define WT_IDIV_C 4 | ||||
| #define WT_ISDIV_C 4 | ||||
| #define WT_INEG_R 2 | ||||
| #define WT_IXOR_R 12 | ||||
| #define WT_IXOR_R 16 | ||||
| #define WT_IXOR_M 4 | ||||
| #define WT_IROR_R 10 | ||||
| #define WT_IROL_R 10 | ||||
| #define WT_IROR_R 8 | ||||
| #define WT_IROL_R 8 | ||||
| #define WT_ISWAP_R 4 | ||||
| 
 | ||||
| //Common floating point
 | ||||
|  | @ -49,22 +49,22 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #define WT_FADD_M 5 | ||||
| #define WT_FSUB_R 20 | ||||
| #define WT_FSUB_M 5 | ||||
| #define WT_FPNEG_R 6 | ||||
| 
 | ||||
| //Floating point group E
 | ||||
| #define WT_FMUL_R 16 | ||||
| #define WT_FMUL_M 4 | ||||
| #define WT_FDIV_R 7 | ||||
| #define WT_FDIV_M 1 | ||||
| #define WT_FMUL_R 20 | ||||
| #define WT_FMUL_M 0 | ||||
| #define WT_FDIV_R 0 | ||||
| #define WT_FDIV_M 4 | ||||
| #define WT_FSQRT_R 6 | ||||
| 
 | ||||
| //Control
 | ||||
| #define WT_COND_R 7 | ||||
| #define WT_COND_M 1 | ||||
| #define WT_CFROUND 1 | ||||
| #define WT_CFSUM_R 0 | ||||
| 
 | ||||
| //Store
 | ||||
| #define WT_ISTORE 18 | ||||
| #define WT_ISTORE 16 | ||||
| #define WT_FSTORE 0 | ||||
| 
 | ||||
| #define WT_NOP 0 | ||||
|  | @ -74,7 +74,7 @@ WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \ | |||
| WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \ | ||||
| WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \ | ||||
| WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \ | ||||
| WT_FPNEG_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \ | ||||
| WT_CFSUM_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \ | ||||
| WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP; | ||||
| 
 | ||||
| static_assert(wtSum == 256, | ||||
|  |  | |||
							
								
								
									
										1419
									
								
								src/program.inc
									
										
									
									
									
								
							
							
						
						
									
										1419
									
								
								src/program.inc
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue