mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Unique scratchpad addresses
This commit is contained in:
		
							parent
							
								
									2c87a058ec
								
							
						
					
					
						commit
						33a2fd021d
					
				
					 6 changed files with 287 additions and 288 deletions
				
			
		|  | @ -257,12 +257,12 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") { | ||||
| 		asmCode << "\tmov " << reg << ", " << regR32[instr.src] << std::endl; | ||||
| 		asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { | ||||
| 		asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl; | ||||
| 		asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -273,7 +273,7 @@ namespace RandomX { | |||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if(instr.dst == 5) | ||||
| 		if(instr.dst == RegisterNeedsDisplacement) | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		else | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; | ||||
|  |  | |||
|  | @ -29,11 +29,11 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressReg(std::ostream& os) const { | ||||
| 		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << "]"; | ||||
| 		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressRegDst(std::ostream& os) const { | ||||
| 		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << "]"; | ||||
| 		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressImm(std::ostream& os) const { | ||||
|  | @ -41,12 +41,11 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_IADD_RS(std::ostream& os) const { | ||||
| 		if (src != dst) { | ||||
| 			os << "r" << (int)dst << ", r" << (int)src << ", LSH " << (int)(mod % 4) << std::endl; | ||||
| 		} | ||||
| 		else { | ||||
| 			os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl; | ||||
| 		os << "r" << (int)dst << ", r" << (int)src; | ||||
| 		if(dst == RegisterNeedsDisplacement) { | ||||
| 			os << ", " << (int32_t)getImm32(); | ||||
| 		} | ||||
| 		os << ", LSH " << (int)(mod % 4) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_IADD_M(std::ostream& os) const { | ||||
|  |  | |||
|  | @ -201,6 +201,7 @@ namespace RandomX { | |||
| 	static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; | ||||
| 	static const uint8_t JZ[] = { 0x0f, 0x84 }; | ||||
| 	static const uint8_t RET = 0xc3; | ||||
| 	static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; | ||||
| 
 | ||||
| 	static const uint8_t NOP1[] = { 0x90 }; | ||||
| 	static const uint8_t NOP2[] = { 0x66, 0x90 }; | ||||
|  | @ -434,8 +435,12 @@ namespace RandomX { | |||
| 	template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i); | ||||
| 
 | ||||
| 	void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { | ||||
| 		emit(REX_MOV_RR); | ||||
| 		emitByte((rax ? 0xc0 : 0xc8) + instr.src); | ||||
| 		emit(LEA_32); | ||||
| 		emitByte(0x80 + instr.src + (rax ? 0 : 8)); | ||||
| 		if (instr.src == RegisterNeedsSib) { | ||||
| 			emitByte(0x24); | ||||
| 		} | ||||
| 		emit32(instr.getImm32()); | ||||
| 		if (rax) | ||||
| 			emitByte(AND_EAX_I); | ||||
| 		else | ||||
|  | @ -444,8 +449,12 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) { | ||||
| 		emit(REX_MOV_RR); | ||||
| 		emitByte(0xc0 + instr.dst); | ||||
| 		emit(LEA_32); | ||||
| 		emitByte(0x80 + instr.dst); | ||||
| 		if (instr.dst == RegisterNeedsSib) { | ||||
| 			emitByte(0x24); | ||||
| 		} | ||||
| 		emit32(instr.getImm32()); | ||||
| 		emitByte(AND_EAX_I); | ||||
| 		int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; | ||||
| 		int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; | ||||
|  | @ -472,12 +481,12 @@ namespace RandomX { | |||
| 			return; | ||||
| 		} | ||||
| 		emit(REX_LEA); | ||||
| 		if (instr.dst == 5) //rbp,r13 cannot be the base register without offset
 | ||||
| 		if (instr.dst == RegisterNeedsDisplacement) | ||||
| 			emitByte(0xac); | ||||
| 		else | ||||
| 			emitByte(0x04 + 8 * instr.dst); | ||||
| 		genSIB(instr.mod % 4, instr.src, instr.dst); | ||||
| 		if (instr.dst == 5) | ||||
| 		if (instr.dst == RegisterNeedsDisplacement) | ||||
| 			emit32(instr.getImm32()); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -95,7 +95,8 @@ namespace RandomX { | |||
| 	constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; | ||||
| 	constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; | ||||
| 	constexpr int RegistersCount = 8; | ||||
| 	constexpr int LimitedAddressRegister = 5; //x86 r13 register
 | ||||
| 	constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
 | ||||
| 	constexpr int RegisterNeedsSib = 4; //x86 r12 register
 | ||||
| 
 | ||||
| 	struct Cache { | ||||
| 		uint8_t* memory; | ||||
|  |  | |||
							
								
								
									
										526
									
								
								src/program.inc
									
										
									
									
									
								
							
							
						
						
									
										526
									
								
								src/program.inc
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -495,7 +495,7 @@ namespace RandomX { | |||
| 			//   - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
 | ||||
| 			// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
 | ||||
| 			for (unsigned i = 0; i < 8; ++i) { | ||||
| 				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) | ||||
| 				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement)) | ||||
| 					availableRegisters.push_back(i); | ||||
| 			} | ||||
| 			return selectRegister(availableRegisters, gen, dst_); | ||||
|  | @ -510,8 +510,8 @@ namespace RandomX { | |||
| 			} | ||||
| 			//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
 | ||||
| 			if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) { | ||||
| 				if (availableRegisters[0] == LimitedAddressRegister || availableRegisters[1] == LimitedAddressRegister) { | ||||
| 					opGroupPar_ = src_ = LimitedAddressRegister; | ||||
| 				if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) { | ||||
| 					opGroupPar_ = src_ = RegisterNeedsDisplacement; | ||||
| 					return true; | ||||
| 				} | ||||
| 			} | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue