mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Unique scratchpad addresses - interpreter
Additional writes to L3
This commit is contained in:
		
							parent
							
								
									33a2fd021d
								
							
						
					
					
						commit
						682000b1a9
					
				
					 11 changed files with 161 additions and 112 deletions
				
			
		|  | @ -76,7 +76,7 @@ namespace RandomX { | |||
| 				asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::SuperscalarInstructionType::IADD_RS: | ||||
| 				asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; | ||||
| 				asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::SuperscalarInstructionType::IMUL_R: | ||||
| 				asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
|  | @ -185,7 +185,7 @@ namespace RandomX { | |||
| 				asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::SuperscalarInstructionType::IADD_RS: | ||||
| 				asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << ";" << std::endl; | ||||
| 				asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::SuperscalarInstructionType::IMUL_R: | ||||
| 				asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; | ||||
|  | @ -258,12 +258,19 @@ namespace RandomX { | |||
| 
 | ||||
| 	void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") { | ||||
| 		asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; | ||||
| 		asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) { | ||||
| 		asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl; | ||||
| 		int mask; | ||||
| 		if (instr.getModCond()) { | ||||
| 			mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask; | ||||
| 		} | ||||
| 		else { | ||||
| 			mask = ScratchpadL3Mask; | ||||
| 		} | ||||
| 		asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) { | ||||
|  | @ -274,9 +281,9 @@ namespace RandomX { | |||
| 	void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if(instr.dst == RegisterNeedsDisplacement) | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		else | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; | ||||
| 			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift2())) << "]" << std::endl; | ||||
| 		traceint(instr); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -607,7 +614,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	static inline const char* condition(Instruction& instr) { | ||||
| 		switch ((instr.mod >> 2) & 7) | ||||
| 		switch (instr.getModCond()) | ||||
| 		{ | ||||
| 			case 0: | ||||
| 				return "be"; | ||||
|  | @ -631,7 +638,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = (instr.mod >> 5); | ||||
| 		const int shift = instr.getModShift3(); | ||||
| 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
|  | @ -647,7 +654,7 @@ namespace RandomX { | |||
| 	//4 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { | ||||
| 		handleCondition(instr, i); | ||||
| 		asmCode << "\txor rcx, rcx" << std::endl; | ||||
| 		asmCode << "\txor ecx, ecx" << std::endl; | ||||
| 		asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 		asmCode << "\tset" << condition(instr) << " cl" << std::endl; | ||||
| 		asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; | ||||
|  | @ -657,7 +664,7 @@ namespace RandomX { | |||
| 	//6 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) { | ||||
| 		handleCondition(instr, i); | ||||
| 		asmCode << "\txor rcx, rcx" << std::endl; | ||||
| 		asmCode << "\txor ecx, ecx" << std::endl; | ||||
| 		genAddressReg(instr); | ||||
| 		asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl; | ||||
| 		asmCode << "\tset" << condition(instr) << " cl" << std::endl; | ||||
|  |  | |||
|  | @ -33,7 +33,11 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressRegDst(std::ostream& os) const { | ||||
| 		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
| 		if (getModCond()) | ||||
| 			os << ((mod % 4) ? "L1" : "L2"); | ||||
| 		else | ||||
| 			os << "L3"; | ||||
| 		os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressImm(std::ostream& os) const { | ||||
|  |  | |||
|  | @ -74,7 +74,6 @@ namespace RandomX { | |||
| 		uint8_t opcode; | ||||
| 		uint8_t dst; | ||||
| 		uint8_t src; | ||||
| 		uint8_t mod; | ||||
| 		uint32_t getImm32() const { | ||||
| 			return load32(&imm32); | ||||
| 		} | ||||
|  | @ -88,7 +87,23 @@ namespace RandomX { | |||
| 			i.print(os); | ||||
| 			return os; | ||||
| 		} | ||||
| 		int getModMem() const { | ||||
| 			return mod % 4; | ||||
| 		} | ||||
| 		int getModCond() const { | ||||
| 			return (mod >> 2) & 7; | ||||
| 		} | ||||
| 		int getModShift3() const { | ||||
| 			return mod >> 5; | ||||
| 		} | ||||
| 		int getModShift2() const { | ||||
| 			return mod >> 6; | ||||
| 		} | ||||
| 		void setMod(uint8_t val) { | ||||
| 			mod = val; | ||||
| 		} | ||||
| 	private: | ||||
| 		uint8_t mod; | ||||
| 		uint32_t imm32; | ||||
| 		void print(std::ostream&) const; | ||||
| 		static const char* names[256]; | ||||
|  |  | |||
|  | @ -45,6 +45,8 @@ constexpr bool fpuCheck = false; | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	static int_reg_t Zero = 0; | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
|  | @ -108,6 +110,12 @@ namespace RandomX { | |||
| 		return std::fpclassify(x) == FP_SUBNORMAL; | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	FORCE_INLINE void* InterpretedVirtualMachine<superscalar>::getScratchpadAddress(InstructionByteCode& ibc) { | ||||
| 		uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; | ||||
| 		return scratchpad + addr; | ||||
| 	} | ||||
| 
 | ||||
| 	 template<bool superscalar> | ||||
| 	 FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[ic]; | ||||
|  | @ -120,7 +128,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IADD_M: { | ||||
| 				*ibc.idst += load64(scratchpad + (*ibc.isrc & ibc.memMask)); | ||||
| 				*ibc.idst += load64(getScratchpadAddress(ibc)); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IADD_RC: { | ||||
|  | @ -132,7 +140,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::ISUB_M: { | ||||
| 				*ibc.idst -= load64(scratchpad + (*ibc.isrc & ibc.memMask)); | ||||
| 				*ibc.idst -= load64(getScratchpadAddress(ibc)); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IMUL_9C: { | ||||
|  | @ -144,7 +152,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IMUL_M: { | ||||
| 				*ibc.idst *= load64(scratchpad + (*ibc.isrc & ibc.memMask)); | ||||
| 				*ibc.idst *= load64(getScratchpadAddress(ibc)); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IMULH_R: { | ||||
|  | @ -152,7 +160,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IMULH_M: { | ||||
| 				*ibc.idst = mulh(*ibc.idst, load64(scratchpad + (*ibc.isrc & ibc.memMask))); | ||||
| 				*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc))); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::ISMULH_R: { | ||||
|  | @ -160,7 +168,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::ISMULH_M: { | ||||
| 				*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(scratchpad + (*ibc.isrc & ibc.memMask)))); | ||||
| 				*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc)))); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::INEG_R: { | ||||
|  | @ -172,7 +180,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IXOR_M: { | ||||
| 				*ibc.idst ^= load64(scratchpad + (*ibc.isrc & ibc.memMask)); | ||||
| 				*ibc.idst ^= load64(getScratchpadAddress(ibc)); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IROR_R: { | ||||
|  | @ -198,7 +206,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::FADD_M: { | ||||
| 				__m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask)); | ||||
| 				__m128d fsrc = load_cvt_i32x2(getScratchpadAddress(ibc)); | ||||
| 				*ibc.fdst = _mm_add_pd(*ibc.fdst, fsrc); | ||||
| 			} break; | ||||
| 
 | ||||
|  | @ -207,7 +215,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::FSUB_M: { | ||||
| 				__m128d fsrc = load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask)); | ||||
| 				__m128d fsrc = load_cvt_i32x2(getScratchpadAddress(ibc)); | ||||
| 				*ibc.fdst = _mm_sub_pd(*ibc.fdst, fsrc); | ||||
| 			} break; | ||||
| 
 | ||||
|  | @ -221,7 +229,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::FDIV_M: { | ||||
| 				__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + (*ibc.isrc & ibc.memMask))); | ||||
| 				__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc))); | ||||
| 				*ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc); | ||||
| 			} break; | ||||
| 
 | ||||
|  | @ -262,7 +270,7 @@ namespace RandomX { | |||
| 				count_JUMP_not_taken++; | ||||
| #endif | ||||
| #endif | ||||
| 				*ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0; | ||||
| 				*ibc.idst += condition(ibc.condition, load64(getScratchpadAddress(ibc)), ibc.imm) ? 1 : 0; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::CFROUND: { | ||||
|  | @ -270,7 +278,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::ISTORE: { | ||||
| 				store64(scratchpad + (*ibc.idst & ibc.memMask), *ibc.isrc); | ||||
| 				store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc); | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::NOP: { | ||||
|  | @ -486,7 +494,7 @@ namespace RandomX { | |||
| 				r[instr.dst] ^= r[instr.src]; | ||||
| 				break; | ||||
| 			case RandomX::SuperscalarInstructionType::IADD_RS: | ||||
| 				r[instr.dst] += r[instr.src] << (instr.mod % 4); | ||||
| 				r[instr.dst] += r[instr.src] << instr.getModShift2(); | ||||
| 				break; | ||||
| 			case RandomX::SuperscalarInstructionType::IMUL_R: | ||||
| 				r[instr.dst] *= r[instr.src]; | ||||
|  | @ -585,14 +593,14 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::IADD_RS; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					if (dst != 5) { | ||||
| 					if (dst != RegisterNeedsDisplacement) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.shift = instr.mod % 4; | ||||
| 						ibc.shift = instr.getModShift2(); | ||||
| 						ibc.imm = 0; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.shift = instr.mod % 4; | ||||
| 						ibc.shift = instr.getModShift2(); | ||||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -603,13 +611,13 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::IADD_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -645,13 +653,13 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::ISUB_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -685,13 +693,13 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::IMUL_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -711,13 +719,13 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::IMULH_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -737,13 +745,13 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::ISMULH_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -791,13 +799,13 @@ namespace RandomX { | |||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::IXOR_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
|  | @ -871,7 +879,8 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::FADD_M; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSUB_R) { | ||||
|  | @ -888,7 +897,8 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::FSUB_M; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSCAL_R) { | ||||
|  | @ -911,7 +921,8 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::FDIV_M; | ||||
| 					ibc.fdst = &e[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSQRT_R) { | ||||
|  | @ -926,12 +937,12 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::COND_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.condition = (instr.mod >> 2) & 7; | ||||
| 					ibc.condition = instr.getModCond(); | ||||
| 					ibc.imm = instr.getImm32(); | ||||
| 					//jump condition
 | ||||
| 					int reg = getConditionRegister(registerUsage); | ||||
| 					ibc.target = registerUsage[reg]; | ||||
| 					ibc.shift = (instr.mod >> 5); | ||||
| 					ibc.shift = instr.getModShift3(); | ||||
| 					ibc.creg = &r[reg]; | ||||
| 					for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 						registerUsage[j] = i; | ||||
|  | @ -944,13 +955,13 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::COND_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.condition = (instr.mod >> 2) & 7; | ||||
| 					ibc.condition = instr.getModCond(); | ||||
| 					ibc.imm = instr.getImm32(); | ||||
| 					ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					//jump condition
 | ||||
| 					int reg = getConditionRegister(registerUsage); | ||||
| 					ibc.target = registerUsage[reg]; | ||||
| 					ibc.shift = (instr.mod >> 5); | ||||
| 					ibc.shift = instr.getModShift3(); | ||||
| 					ibc.creg = &r[reg]; | ||||
| 					for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 						registerUsage[j] = i; | ||||
|  | @ -970,7 +981,11 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::ISTORE; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.getModCond()) | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					else | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(NOP) { | ||||
|  |  | |||
|  | @ -132,5 +132,6 @@ namespace RandomX { | |||
| 		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); | ||||
| 		void* getScratchpadAddress(InstructionByteCode& ibc); | ||||
| 	}; | ||||
| } | ||||
|  | @ -182,7 +182,7 @@ namespace RandomX { | |||
| 	static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 }; | ||||
| 	static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xF8, 0x0F, 0xAE, 0x54, 0x24, 0xF8 }; | ||||
| 	static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 }; | ||||
| 	static const uint8_t XOR_RCX_RCX[] = { 0x48, 0x33, 0xC9 }; | ||||
| 	static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 }; | ||||
| 	static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 }; | ||||
| 	static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 }; | ||||
| 	static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 }; | ||||
|  | @ -202,6 +202,7 @@ namespace RandomX { | |||
| 	static const uint8_t JZ[] = { 0x0f, 0x84 }; | ||||
| 	static const uint8_t RET = 0xc3; | ||||
| 	static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; | ||||
| 	static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; | ||||
| 
 | ||||
| 	static const uint8_t NOP1[] = { 0x90 }; | ||||
| 	static const uint8_t NOP2[] = { 0x66, 0x90 }; | ||||
|  | @ -360,7 +361,7 @@ namespace RandomX { | |||
| 		case RandomX::SuperscalarInstructionType::IADD_RS: | ||||
| 			emit(REX_LEA); | ||||
| 			emitByte(0x04 + 8 * instr.dst); | ||||
| 			genSIB(instr.mod % 4, instr.src, instr.dst); | ||||
| 			genSIB(instr.getModShift2(), instr.src, instr.dst); | ||||
| 			break; | ||||
| 		case RandomX::SuperscalarInstructionType::IMUL_R: | ||||
| 			emit(REX_IMUL_RR); | ||||
|  | @ -445,7 +446,7 @@ namespace RandomX { | |||
| 			emitByte(AND_EAX_I); | ||||
| 		else | ||||
| 			emit(AND_ECX_I); | ||||
| 		emit32((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 		emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) { | ||||
|  | @ -456,9 +457,14 @@ namespace RandomX { | |||
| 		} | ||||
| 		emit32(instr.getImm32()); | ||||
| 		emitByte(AND_EAX_I); | ||||
| 		int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; | ||||
| 		int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; | ||||
| 		emit32((instr.mod % 4) ? maskL1 : maskL2); | ||||
| 		if (instr.getModCond()) { | ||||
| 			int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask; | ||||
| 			int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask; | ||||
| 			emit32(instr.getModMem() ? maskL1 : maskL2); | ||||
| 		} | ||||
| 		else { | ||||
| 			emit32(ScratchpadL3Mask); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::genAddressImm(Instruction& instr) { | ||||
|  | @ -485,7 +491,7 @@ namespace RandomX { | |||
| 			emitByte(0xac); | ||||
| 		else | ||||
| 			emitByte(0x04 + 8 * instr.dst); | ||||
| 		genSIB(instr.mod % 4, instr.src, instr.dst); | ||||
| 		genSIB(instr.getModShift2(), instr.src, instr.dst); | ||||
| 		if (instr.dst == RegisterNeedsDisplacement) | ||||
| 			emit32(instr.getImm32()); | ||||
| 	} | ||||
|  | @ -880,7 +886,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { | ||||
| 		switch (((instr.mod >> 2) & 7) ^ invert) | ||||
| 		switch (instr.getModCond() ^ invert) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 			return 0x76; //jbe
 | ||||
|  | @ -902,7 +908,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	static inline uint8_t condition(Instruction& instr) { | ||||
| 		switch ((instr.mod >> 2) & 7) | ||||
| 		switch (instr.getModCond()) | ||||
| 		{ | ||||
| 			case 0: | ||||
| 				return 0x96; //setbe
 | ||||
|  | @ -938,7 +944,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = (instr.mod >> 5); | ||||
| 		const int shift = instr.getModShift3(); | ||||
| 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
|  | @ -973,7 +979,7 @@ namespace RandomX { | |||
| 			emit(NOP3); | ||||
| 			return; | ||||
| 		} | ||||
| 		emit(XOR_RCX_RCX); | ||||
| 		emit(XOR_ECX_ECX); | ||||
| 		emit(REX_CMP_R32I); | ||||
| 		emitByte(0xf8 + instr.src); | ||||
| 		emit32(instr.getImm32()); | ||||
|  | @ -988,7 +994,7 @@ namespace RandomX { | |||
| #ifdef RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		emit(XOR_RCX_RCX); | ||||
| 		emit(XOR_ECX_ECX); | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CMP_M32I); | ||||
| 		emit32(instr.getImm32()); | ||||
|  | @ -1001,7 +1007,10 @@ namespace RandomX { | |||
| 
 | ||||
| 	void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr); | ||||
| 		emit(REX_MOV_MR); | ||||
| 		//if (instr.getModCond())
 | ||||
| 			emit(REX_MOV_MR); | ||||
| 		//else
 | ||||
| 		//	emit(MOVNTI);
 | ||||
| 		emitByte(0x04 + 8 * instr.src); | ||||
| 		emitByte(0x06); | ||||
| 	} | ||||
|  |  | |||
|  | @ -92,8 +92,8 @@ Total sum of frequencies must be 256 | |||
| #define RANDOMX_FREQ_ISMULH_M       1 | ||||
| #define RANDOMX_FREQ_IMUL_RCP       8 | ||||
| #define RANDOMX_FREQ_INEG_R         2 | ||||
| #define RANDOMX_FREQ_IXOR_R        16 | ||||
| #define RANDOMX_FREQ_IXOR_M         4 | ||||
| #define RANDOMX_FREQ_IXOR_R        15 | ||||
| #define RANDOMX_FREQ_IXOR_M         5 | ||||
| #define RANDOMX_FREQ_IROR_R        10 | ||||
| #define RANDOMX_FREQ_IROL_R         0 | ||||
| #define RANDOMX_FREQ_ISWAP_R        4 | ||||
|  | @ -108,8 +108,8 @@ Total sum of frequencies must be 256 | |||
| #define RANDOMX_FREQ_FDIV_M         4 | ||||
| #define RANDOMX_FREQ_FSQRT_R        6 | ||||
| 
 | ||||
| #define RANDOMX_FREQ_COND_R         7 | ||||
| #define RANDOMX_FREQ_COND_M         1 | ||||
| #define RANDOMX_FREQ_COND_R         8 | ||||
| #define RANDOMX_FREQ_COND_M         0 | ||||
| #define RANDOMX_FREQ_CFROUND        1 | ||||
| #define RANDOMX_FREQ_ISTORE        16 | ||||
| 
 | ||||
|  |  | |||
|  | @ -98,6 +98,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #define REPCASE32(x) REPCASE31(x) case __COUNTER__: | ||||
| #define REPCASE64(x) REPCASE32(x) REPCASE32(x) | ||||
| #define REPCASE128(x) REPCASE64(x) REPCASE64(x) | ||||
| #define REPCASE256(x) REPCASE128(x) REPCASE128(x) | ||||
| #define REPCASENX(x,N) REPCASE##N(x) | ||||
| #define REPCASEN(x,N) REPCASENX(x,N) | ||||
| #define CASE_REP(x) REPCASEN(x, WT(x)) | ||||
|  | @ -176,6 +176,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi | |||
| 		store32(noncePtr, nonce); | ||||
| 		blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); | ||||
| 		fillAes1Rx4<softAes>((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); | ||||
| 		//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-before.txt");
 | ||||
| 		vm->resetRoundingMode(); | ||||
| 		vm->setScratchpad(scratchpad); | ||||
| 		for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { | ||||
|  | @ -194,7 +195,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi | |||
| 			} | ||||
| 		}*/ | ||||
| 		vm->getResult<softAes>(scratchpad, RANDOMX_SCRATCHPAD_L3, hash); | ||||
| 		//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad.txt");
 | ||||
| 		//dump((char*)scratchpad, RANDOMX_SCRATCHPAD_L3, "spad-after.txt");
 | ||||
| 		result.xorWith(hash); | ||||
| 		if (RandomX::trace) { | ||||
| 			std::cout << "Nonce: " << nonce << " "; | ||||
|  |  | |||
|  | @ -56,7 +56,7 @@ randomx_isn_14: | |||
| 	sqrtpd xmm6, xmm6 | ||||
| randomx_isn_15: | ||||
| 	; IADD_RS r6, r2, LSH 1 | ||||
| 	lea r14, [r14+r10*2] | ||||
| 	lea r14, [r14+r10*8] | ||||
| randomx_isn_16: | ||||
| 	; FSUB_M f2, L1[r1-1890725713] | ||||
| 	lea eax, [r9d-1890725713] | ||||
|  | @ -68,9 +68,9 @@ randomx_isn_17: | |||
| 	mov ecx, r11d | ||||
| 	ror r12, cl | ||||
| randomx_isn_18: | ||||
| 	; ISTORE L1[r4+1297827817], r4 | ||||
| 	; ISTORE L3[r4+1297827817], r4 | ||||
| 	lea eax, [r12d+1297827817] | ||||
| 	and eax, 16376 | ||||
| 	and eax, 2097144 | ||||
| 	mov qword ptr [rsi+rax], r12 | ||||
| randomx_isn_19: | ||||
| 	; FMUL_R e1, a2 | ||||
|  | @ -145,7 +145,7 @@ randomx_isn_35: | |||
| 	imul r14, 835132161 | ||||
| randomx_isn_36: | ||||
| 	; IADD_RS r3, r4, LSH 2 | ||||
| 	lea r11, [r11+r12*4] | ||||
| 	lea r11, [r11+r12*2] | ||||
| randomx_isn_37: | ||||
| 	; ISUB_M r6, L2[r4+1885029796] | ||||
| 	lea eax, [r12d+1885029796] | ||||
|  | @ -173,13 +173,13 @@ randomx_isn_44: | |||
| 	; FADD_R f1, a2 | ||||
| 	addpd xmm1, xmm10 | ||||
| randomx_isn_45: | ||||
| 	; ISTORE L1[r0+1805562386], r5 | ||||
| 	; ISTORE L3[r0+1805562386], r5 | ||||
| 	lea eax, [r8d+1805562386] | ||||
| 	and eax, 16376 | ||||
| 	and eax, 2097144 | ||||
| 	mov qword ptr [rsi+rax], r13 | ||||
| randomx_isn_46: | ||||
| 	; IADD_RS r0, r7, LSH 0 | ||||
| 	lea r8, [r8+r15*1] | ||||
| 	lea r8, [r8+r15*8] | ||||
| randomx_isn_47: | ||||
| 	; IXOR_R r5, r2 | ||||
| 	xor r13, r10 | ||||
|  | @ -227,7 +227,7 @@ randomx_isn_57: | |||
| 	imul r13, r9 | ||||
| randomx_isn_58: | ||||
| 	; IADD_RS r5, r1, -999103579, LSH 0 | ||||
| 	lea r13, [r13+r9*1-999103579] | ||||
| 	lea r13, [r13+r9*8-999103579] | ||||
| randomx_isn_59: | ||||
| 	; FMUL_R e2, a2 | ||||
| 	mulpd xmm6, xmm10 | ||||
|  | @ -237,7 +237,7 @@ randomx_isn_60: | |||
| 	ror r10, cl | ||||
| randomx_isn_61: | ||||
| 	; IADD_RS r0, r3, LSH 1 | ||||
| 	lea r8, [r8+r11*2] | ||||
| 	lea r8, [r8+r11*1] | ||||
| randomx_isn_62: | ||||
| 	; FSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
|  | @ -260,7 +260,7 @@ randomx_isn_66: | |||
| 	sub r12, 841292629 | ||||
| randomx_isn_67: | ||||
| 	; IADD_RS r4, r6, LSH 2 | ||||
| 	lea r12, [r12+r14*4] | ||||
| 	lea r12, [r12+r14*1] | ||||
| randomx_isn_68: | ||||
| 	; FSUB_M f3, L1[r4+613549729] | ||||
| 	lea eax, [r12d+613549729] | ||||
|  | @ -315,9 +315,9 @@ randomx_isn_79: | |||
| 	; IADD_RS r3, r1, LSH 1 | ||||
| 	lea r11, [r11+r9*2] | ||||
| randomx_isn_80: | ||||
| 	; ISTORE L1[r2+1885666804], r4 | ||||
| 	; ISTORE L3[r2+1885666804], r4 | ||||
| 	lea eax, [r10d+1885666804] | ||||
| 	and eax, 16376 | ||||
| 	and eax, 2097144 | ||||
| 	mov qword ptr [rsi+rax], r12 | ||||
| randomx_isn_81: | ||||
| 	; IMULH_R r3, r0 | ||||
|  | @ -348,14 +348,12 @@ randomx_isn_88: | |||
| 	; IMUL_R r1, r3 | ||||
| 	imul r9, r11 | ||||
| randomx_isn_89: | ||||
| 	; COND_M r2, no(L1[r0-122257389], -122257389), LSH 6 | ||||
| 	; COND_R r2, no(r0, -122257389), LSH 6 | ||||
| 	add r8, 64 | ||||
| 	test r8, 8128 | ||||
| 	jz randomx_isn_75 | ||||
| 	xor rcx, rcx | ||||
| 	lea eax, [r8d-122257389] | ||||
| 	and eax, 16376 | ||||
| 	cmp dword ptr [rsi+rax], -122257389 | ||||
| 	cmp r8d, -122257389 | ||||
| 	setno cl | ||||
| 	add r10, rcx | ||||
| randomx_isn_90: | ||||
|  | @ -429,7 +427,7 @@ randomx_isn_107: | |||
| 	mov r14, rdx | ||||
| randomx_isn_108: | ||||
| 	; IADD_RS r7, r0, LSH 1 | ||||
| 	lea r15, [r15+r8*2] | ||||
| 	lea r15, [r15+r8*4] | ||||
| randomx_isn_109: | ||||
| 	; IMUL_R r6, r5 | ||||
| 	imul r14, r13 | ||||
|  | @ -444,13 +442,13 @@ randomx_isn_111: | |||
| 	addpd xmm2, xmm12 | ||||
| randomx_isn_112: | ||||
| 	; IADD_RS r0, r3, LSH 0 | ||||
| 	lea r8, [r8+r11*1] | ||||
| 	lea r8, [r8+r11*2] | ||||
| randomx_isn_113: | ||||
| 	; IADD_RS r3, r4, LSH 1 | ||||
| 	lea r11, [r11+r12*2] | ||||
| randomx_isn_114: | ||||
| 	; IADD_RS r2, r4, LSH 2 | ||||
| 	lea r10, [r10+r12*4] | ||||
| 	lea r10, [r10+r12*8] | ||||
| randomx_isn_115: | ||||
| 	; IMUL_M r7, L1[r2-106928748] | ||||
| 	lea eax, [r10d-106928748] | ||||
|  | @ -464,7 +462,7 @@ randomx_isn_117: | |||
| 	subpd xmm2, xmm10 | ||||
| randomx_isn_118: | ||||
| 	; IADD_RS r2, r2, LSH 0 | ||||
| 	lea r10, [r10+r10*1] | ||||
| 	lea r10, [r10+r10*2] | ||||
| randomx_isn_119: | ||||
| 	; ISUB_R r7, -342152774 | ||||
| 	sub r15, -342152774 | ||||
|  | @ -473,7 +471,7 @@ randomx_isn_120: | |||
| 	lea r12, [r12+r9*2] | ||||
| randomx_isn_121: | ||||
| 	; IADD_RS r4, r7, LSH 2 | ||||
| 	lea r12, [r12+r15*4] | ||||
| 	lea r12, [r12+r15*1] | ||||
| randomx_isn_122: | ||||
| 	; FSUB_R f0, a1 | ||||
| 	subpd xmm0, xmm9 | ||||
|  | @ -504,7 +502,7 @@ randomx_isn_128: | |||
| 	subpd xmm3, xmm9 | ||||
| randomx_isn_129: | ||||
| 	; IADD_RS r1, r2, LSH 2 | ||||
| 	lea r9, [r9+r10*4] | ||||
| 	lea r9, [r9+r10*2] | ||||
| randomx_isn_130: | ||||
| 	; FSUB_R f1, a1 | ||||
| 	subpd xmm1, xmm9 | ||||
|  | @ -531,7 +529,7 @@ randomx_isn_136: | |||
| 	sub r11, r14 | ||||
| randomx_isn_137: | ||||
| 	; IADD_RS r4, r1, LSH 0 | ||||
| 	lea r12, [r12+r9*1] | ||||
| 	lea r12, [r12+r9*8] | ||||
| randomx_isn_138: | ||||
| 	; ISTORE L1[r0+56684410], r0 | ||||
| 	lea eax, [r8d+56684410] | ||||
|  | @ -573,10 +571,10 @@ randomx_isn_145: | |||
| 	sub r13, r11 | ||||
| randomx_isn_146: | ||||
| 	; IADD_RS r0, r3, LSH 1 | ||||
| 	lea r8, [r8+r11*2] | ||||
| 	lea r8, [r8+r11*4] | ||||
| randomx_isn_147: | ||||
| 	; IADD_RS r1, r3, LSH 1 | ||||
| 	lea r9, [r9+r11*2] | ||||
| 	lea r9, [r9+r11*1] | ||||
| randomx_isn_148: | ||||
| 	; FSQRT_R e1 | ||||
| 	sqrtpd xmm5, xmm5 | ||||
|  | @ -624,7 +622,7 @@ randomx_isn_158: | |||
| 	mov qword ptr [rsi+rax], r12 | ||||
| randomx_isn_159: | ||||
| 	; IADD_RS r7, r2, LSH 3 | ||||
| 	lea r15, [r15+r10*8] | ||||
| 	lea r15, [r15+r10*4] | ||||
| randomx_isn_160: | ||||
| 	; IMUL_RCP r7, 2040763167 | ||||
| 	mov rax, 9705702723791900149 | ||||
|  | @ -716,7 +714,7 @@ randomx_isn_182: | |||
| 	mulpd xmm6, xmm10 | ||||
| randomx_isn_183: | ||||
| 	; IADD_RS r6, r2, LSH 0 | ||||
| 	lea r14, [r14+r10*1] | ||||
| 	lea r14, [r14+r10*8] | ||||
| randomx_isn_184: | ||||
| 	; FADD_R f2, a3 | ||||
| 	addpd xmm2, xmm11 | ||||
|  | @ -728,7 +726,7 @@ randomx_isn_186: | |||
| 	xorps xmm3, xmm15 | ||||
| randomx_isn_187: | ||||
| 	; IADD_RS r6, r6, LSH 3 | ||||
| 	lea r14, [r14+r14*8] | ||||
| 	lea r14, [r14+r14*4] | ||||
| randomx_isn_188: | ||||
| 	; FSCAL_R f2 | ||||
| 	xorps xmm2, xmm15 | ||||
|  | @ -781,7 +779,7 @@ randomx_isn_199: | |||
| 	subpd xmm3, xmm11 | ||||
| randomx_isn_200: | ||||
| 	; IADD_RS r2, r5, LSH 2 | ||||
| 	lea r10, [r10+r13*4] | ||||
| 	lea r10, [r10+r13*1] | ||||
| randomx_isn_201: | ||||
| 	; ISUB_M r6, L2[r3+376384700] | ||||
| 	lea eax, [r11d+376384700] | ||||
|  | @ -811,7 +809,7 @@ randomx_isn_207: | |||
| 	xorps xmm1, xmm15 | ||||
| randomx_isn_208: | ||||
| 	; IADD_RS r6, r3, LSH 1 | ||||
| 	lea r14, [r14+r11*2] | ||||
| 	lea r14, [r14+r11*1] | ||||
| randomx_isn_209: | ||||
| 	; FSUB_M f0, L1[r4-557177119] | ||||
| 	lea eax, [r12d-557177119] | ||||
|  | @ -874,7 +872,7 @@ randomx_isn_223: | |||
| 	xorps xmm2, xmm15 | ||||
| randomx_isn_224: | ||||
| 	; IADD_RS r5, r4, 312567979, LSH 1 | ||||
| 	lea r13, [r13+r12*2+312567979] | ||||
| 	lea r13, [r13+r12*4+312567979] | ||||
| randomx_isn_225: | ||||
| 	; ISTORE L2[r2+260885699], r1 | ||||
| 	lea eax, [r10d+260885699] | ||||
|  | @ -899,7 +897,7 @@ randomx_isn_229: | |||
| 	xchg r8, r14 | ||||
| randomx_isn_230: | ||||
| 	; IADD_RS r2, r7, LSH 2 | ||||
| 	lea r10, [r10+r15*4] | ||||
| 	lea r10, [r10+r15*1] | ||||
| randomx_isn_231: | ||||
| 	; FMUL_R e1, a0 | ||||
| 	mulpd xmm5, xmm8 | ||||
|  | @ -925,7 +923,7 @@ randomx_isn_237: | |||
| 	subpd xmm1, xmm11 | ||||
| randomx_isn_238: | ||||
| 	; IADD_RS r4, r2, LSH 1 | ||||
| 	lea r12, [r12+r10*2] | ||||
| 	lea r12, [r12+r10*4] | ||||
| randomx_isn_239: | ||||
| 	; IMUL_RCP r7, 3065786637 | ||||
| 	mov rax, 12921343181238534701 | ||||
|  | @ -958,14 +956,12 @@ randomx_isn_246: | |||
| 	and eax, 262136 | ||||
| 	sub r15, qword ptr [rsi+rax] | ||||
| randomx_isn_247: | ||||
| 	; COND_M r2, be(L1[r5-8545330], -8545330), LSH 2 | ||||
| 	; COND_R r2, be(r5, -8545330), LSH 2 | ||||
| 	add r9, 4 | ||||
| 	test r9, 508 | ||||
| 	jz randomx_isn_223 | ||||
| 	xor rcx, rcx | ||||
| 	lea eax, [r13d-8545330] | ||||
| 	and eax, 16376 | ||||
| 	cmp dword ptr [rsi+rax], -8545330 | ||||
| 	cmp r13d, -8545330 | ||||
| 	setbe cl | ||||
| 	add r10, rcx | ||||
| randomx_isn_248: | ||||
|  | @ -981,13 +977,13 @@ randomx_isn_250: | |||
| 	addpd xmm3, xmm8 | ||||
| randomx_isn_251: | ||||
| 	; IADD_RS r0, r0, LSH 0 | ||||
| 	lea r8, [r8+r8*1] | ||||
| 	lea r8, [r8+r8*4] | ||||
| randomx_isn_252: | ||||
| 	; ISUB_R r4, r2 | ||||
| 	sub r12, r10 | ||||
| randomx_isn_253: | ||||
| 	; IADD_RS r5, r4, 256175395, LSH 0 | ||||
| 	lea r13, [r13+r12*1+256175395] | ||||
| 	lea r13, [r13+r12*4+256175395] | ||||
| randomx_isn_254: | ||||
| 	; IADD_RS r6, r7, LSH 2 | ||||
| 	lea r14, [r14+r15*4] | ||||
|  |  | |||
|  | @ -348,7 +348,7 @@ namespace RandomX { | |||
| 			instr.opcode = getType(); | ||||
| 			instr.dst = dst_; | ||||
| 			instr.src = src_ >= 0 ? src_ : dst_; | ||||
| 			instr.mod = mod_; | ||||
| 			instr.setMod(mod_); | ||||
| 			instr.setImm32(imm32_); | ||||
| 		} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue