mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Fixed IMUL_RCP if divisor is a power of 2
This commit is contained in:
		
							parent
							
								
									197cd90e07
								
							
						
					
					
						commit
						b1f1e1d6ad
					
				
					 5 changed files with 14 additions and 10 deletions
				
			
		|  | @ -519,7 +519,7 @@ This instructions adds the values of two registers (modulo 2<sup>64</sup>). The | |||
| These instructions output the high 64 bits of the whole 128-bit multiplication result. The result differs for signed and unsigned multiplication (IMULH is unsigned, ISMULH is signed). The variants with a register source operand perform a squaring operation if `dst` equals `src`. | ||||
| 
 | ||||
| #### 5.2.6 IMUL_RCP | ||||
| This instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as <code>rcp = 2<sup>x</sup> / imm32</code> by choosing the largest integer `x` such that <code>rcp < 2<sup>64</sup></code>. If `imm32` equals 0, IMUL_RCP is a no-op. | ||||
| If `imm32` equals 0 or is a power of 2, IMUL_RCP is a no-op. In other cases, the instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as <code>rcp = 2<sup>x</sup> / imm32</code> by choosing the largest integer `x` such that <code>rcp < 2<sup>64</sup></code>. | ||||
| 
 | ||||
| #### 5.2.7 INEG_R | ||||
| Performs two's complement negation of the destination register. | ||||
|  | @ -607,7 +607,7 @@ This instruction performs a conditional jump in the Program Buffer. It uses an i | |||
| A register is considered as modified by an instruction in the following cases: | ||||
| 
 | ||||
| * It is the destination register of an integer instruction except IMUL_RCP and ISWAP_R. | ||||
| * It is the destination register of IMUL_RCP and `imm32` is not zero. | ||||
| * It is the destination register of IMUL_RCP and `imm32` is not zero or a power of 2. | ||||
| * It is the source or the destination register of ISWAP_R and the destination and source registers are distinct. | ||||
| * The CBRANCH instruction is considered to modify all integer registers. | ||||
| 
 | ||||
|  |  | |||
|  | @ -428,9 +428,10 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { | ||||
| 		if (instr.getImm32() != 0) { | ||||
| 		uint64_t divisor = instr.getImm32(); | ||||
| 		if (!isPowerOf2(divisor)) { | ||||
| 			registerUsage[instr.dst].lastUsed = i; | ||||
| 			asmCode << "\tmov rax, " << randomx_reciprocal(instr.getImm32()) << std::endl; | ||||
| 			asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; | ||||
| 			asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; | ||||
| 			traceint(instr); | ||||
| 		} | ||||
|  |  | |||
|  | @ -126,6 +126,10 @@ namespace randomx { | |||
| 		return minIndex; | ||||
| 	} | ||||
| 
 | ||||
| 	inline bool isPowerOf2(uint64_t x) { | ||||
| 		return (x & (x - 1)) == 0; | ||||
| 	} | ||||
| 
 | ||||
| 	constexpr int mantissaSize = 52; | ||||
| 	constexpr int exponentSize = 11; | ||||
| 	constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; | ||||
|  |  | |||
|  | @ -266,8 +266,6 @@ namespace randomx { | |||
| 			SuperscalarProgram& prog = programs[j]; | ||||
| 			for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| 				Instruction& instr = prog(i); | ||||
| 				instr.src %= RegistersCount; | ||||
| 				instr.dst %= RegistersCount; | ||||
| 				generateSuperscalarCode(instr, reciprocalCache); | ||||
| 			} | ||||
| 			emit(codeShhLoad, codeSshLoadSize); | ||||
|  | @ -614,10 +612,11 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { | ||||
| 		if (instr.getImm32() != 0) { | ||||
| 		uint64_t divisor = instr.getImm32(); | ||||
| 		if (!isPowerOf2(divisor)) { | ||||
| 			registerUsage[instr.dst].lastUsed = i; | ||||
| 			emit(MOV_RAX_I); | ||||
| 			emit64(randomx_reciprocal(instr.getImm32())); | ||||
| 			emit64(randomx_reciprocal(divisor)); | ||||
| 			emit(REX_IMUL_RM); | ||||
| 			emitByte(0xc0 + 8 * instr.dst); | ||||
| 		} | ||||
|  |  | |||
|  | @ -435,8 +435,8 @@ namespace randomx { | |||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_RCP) { | ||||
| 					uint32_t divisor = instr.getImm32(); | ||||
| 					if (divisor != 0) { | ||||
| 					uint64_t divisor = instr.getImm32(); | ||||
| 					if (!isPowerOf2(divisor)) { | ||||
| 						auto dst = instr.dst % RegistersCount; | ||||
| 						ibc.type = InstructionType::IMUL_R; | ||||
| 						ibc.idst = &r[dst]; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue