mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Merge pull request #284 from SChernykh/opt-rcp
Optimized randomx_reciprocal
This commit is contained in:
		
						commit
						7db92b73f7
					
				
					 8 changed files with 23 additions and 42 deletions
				
			
		|  | @ -445,7 +445,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { | ||||
| 		uint64_t divisor = instr.getImm32(); | ||||
| 		const uint32_t divisor = instr.getImm32(); | ||||
| 		if (!isZeroOrPowerOf2(divisor)) { | ||||
| 			registerUsage[instr.dst] = i; | ||||
| 			asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; | ||||
|  |  | |||
|  | @ -243,7 +243,7 @@ namespace randomx { | |||
| 		} | ||||
| 
 | ||||
| 		if (opcode < ceil_IMUL_RCP) { | ||||
| 			uint64_t divisor = instr.getImm32(); | ||||
| 			const uint32_t divisor = instr.getImm32(); | ||||
| 			if (!isZeroOrPowerOf2(divisor)) { | ||||
| 				auto dst = instr.dst % RegistersCount; | ||||
| 				ibc.type = InstructionType::IMUL_R; | ||||
|  |  | |||
|  | @ -686,7 +686,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) | ||||
| { | ||||
| 	const uint64_t divisor = instr.getImm32(); | ||||
| 	const uint32_t divisor = instr.getImm32(); | ||||
| 	if (isZeroOrPowerOf2(divisor)) | ||||
| 		return; | ||||
| 
 | ||||
|  | @ -695,22 +695,11 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) | |||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint64_t N = 1ULL << 63; | ||||
| 	const uint64_t q = N / divisor; | ||||
| 	const uint64_t r = N % divisor; | ||||
| #ifdef __GNUC__ | ||||
| 	const uint64_t shift = 64 - __builtin_clzll(divisor); | ||||
| #else | ||||
| 	uint64_t shift = 32; | ||||
| 	for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1) | ||||
| 		--shift; | ||||
| #endif | ||||
| 
 | ||||
| 	const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t); | ||||
| 
 | ||||
| 	literalPos -= sizeof(uint64_t); | ||||
| 	const uint64_t randomx_reciprocal = (q << shift) + ((r << shift) / divisor); | ||||
| 	memcpy(code + literalPos, &randomx_reciprocal, sizeof(randomx_reciprocal)); | ||||
| 
 | ||||
| 	const uint64_t reciprocal = randomx_reciprocal_fast(divisor); | ||||
| 	memcpy(code + literalPos, &reciprocal, sizeof(reciprocal)); | ||||
| 
 | ||||
| 	if (literal_id < 12) | ||||
| 	{ | ||||
|  |  | |||
|  | @ -776,7 +776,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	static void v1_IMUL_RCP(HANDLER_ARGS) { | ||||
| 		uint64_t divisor = isn.getImm32(); | ||||
| 		const uint32_t divisor = isn.getImm32(); | ||||
| 		if (!isZeroOrPowerOf2(divisor)) { | ||||
| 			state.registerUsage[isn.dst] = i; | ||||
| 			if (state.rcpCount < 4) { | ||||
|  |  | |||
|  | @ -618,7 +618,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { | ||||
| 		uint64_t divisor = instr.getImm32(); | ||||
| 		const uint32_t divisor = instr.getImm32(); | ||||
| 		if (!isZeroOrPowerOf2(divisor)) { | ||||
| 			registerUsage[instr.dst] = i; | ||||
| 			emit(MOV_RAX_I); | ||||
|  |  | |||
|  | @ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| 	ret | ||||
| 
 | ||||
| */ | ||||
| uint64_t randomx_reciprocal(uint64_t divisor) { | ||||
| uint64_t randomx_reciprocal(uint32_t divisor) { | ||||
| 
 | ||||
| 	assert(divisor != 0); | ||||
| 
 | ||||
| 	const uint64_t p2exp63 = 1ULL << 63; | ||||
| 	const uint64_t q = p2exp63 / divisor; | ||||
| 	const uint64_t r = p2exp63 % divisor; | ||||
| 
 | ||||
| 	uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor; | ||||
| #ifdef __GNUC__ | ||||
| 	const uint32_t shift = 64 - __builtin_clzll(divisor); | ||||
| #else | ||||
| 	uint32_t shift = 32; | ||||
| 	for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1) | ||||
| 		--shift; | ||||
| #endif | ||||
| 
 | ||||
| 	unsigned bsr = 0; //highest set bit in divisor
 | ||||
| 
 | ||||
| 	for (uint64_t bit = divisor; bit > 0; bit >>= 1) | ||||
| 		bsr++; | ||||
| 
 | ||||
| 	for (unsigned shift = 0; shift < bsr; shift++) { | ||||
| 		if (remainder >= divisor - remainder) { | ||||
| 			quotient = quotient * 2 + 1; | ||||
| 			remainder = remainder * 2 - divisor; | ||||
| 		} | ||||
| 		else { | ||||
| 			quotient = quotient * 2; | ||||
| 			remainder = remainder * 2; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return quotient; | ||||
| 	return (q << shift) + ((r << shift) / divisor); | ||||
| } | ||||
| 
 | ||||
| #if !RANDOMX_HAVE_FAST_RECIPROCAL | ||||
| 
 | ||||
| uint64_t randomx_reciprocal_fast(uint64_t divisor) { | ||||
| uint64_t randomx_reciprocal_fast(uint32_t divisor) { | ||||
| 	return randomx_reciprocal(divisor); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| uint64_t randomx_reciprocal(uint64_t); | ||||
| uint64_t randomx_reciprocal_fast(uint64_t); | ||||
| uint64_t randomx_reciprocal(uint32_t); | ||||
| uint64_t randomx_reciprocal_fast(uint32_t); | ||||
| 
 | ||||
| #if defined(__cplusplus) | ||||
| } | ||||
|  |  | |||
|  | @ -477,7 +477,7 @@ int analyze(randomx::Program& p) { | |||
| 		} | ||||
| 
 | ||||
| 		if (opcode < randomx::ceil_IMUL_RCP) { | ||||
| 			uint64_t divisor = instr.getImm32(); | ||||
| 			const uint32_t divisor = instr.getImm32(); | ||||
| 			if (!randomx::isZeroOrPowerOf2(divisor)) { | ||||
| 				instr.dst = instr.dst % randomx::RegistersCount; | ||||
| 				instr.opcode |= DST_INT; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue