mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Merge branch 'feature/branches' into dev
Conflicts: src/JitCompilerX86.cpp src/JitCompilerX86.hpp src/main.cpp
This commit is contained in:
		
						commit
						4c1ae951de
					
				
					 11 changed files with 1300 additions and 808 deletions
				
			
		|  | @ -18,6 +18,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| */ | ||||
| //#define TRACE
 | ||||
| 
 | ||||
| #include <climits> | ||||
| #include "AssemblyGeneratorX86.hpp" | ||||
| #include "common.hpp" | ||||
| #include "reciprocal.h" | ||||
|  | @ -45,9 +46,25 @@ namespace RandomX { | |||
| 	static const char* regDatasetAddr = "rdi"; | ||||
| 	static const char* regScratchpadAddr = "rsi"; | ||||
| 
 | ||||
| 	int AssemblyGeneratorX86::getConditionRegister() { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			if (registerUsage[i] < min) { | ||||
| 				min = registerUsage[i]; | ||||
| 				minIndex = i; | ||||
| 			} | ||||
| 		} | ||||
| 		return minIndex; | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateProgram(Program& prog) { | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { | ||||
| 			asmCode << "randomx_isn_" << i << ":" << std::endl; | ||||
| 			Instruction& instr = prog(i); | ||||
| 			instr.src %= RegistersCount; | ||||
| 			instr.dst %= RegistersCount; | ||||
|  | @ -96,6 +113,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IADD_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			asmCode << "\tadd " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 		} | ||||
|  | @ -107,6 +125,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//2.75 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; | ||||
|  | @ -119,12 +138,14 @@ namespace RandomX { | |||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		traceint(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 		} | ||||
|  | @ -136,6 +157,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//2.75 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; | ||||
|  | @ -148,12 +170,14 @@ namespace RandomX { | |||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; | ||||
| 		traceint(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 		} | ||||
|  | @ -165,6 +189,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//2.75 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; | ||||
|  | @ -177,6 +202,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//4 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; | ||||
| 		asmCode << "\tmul " << regR[instr.src] << std::endl; | ||||
| 		asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; | ||||
|  | @ -185,6 +211,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//5.75 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr, "ecx"); | ||||
| 			asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; | ||||
|  | @ -200,6 +227,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//4 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; | ||||
| 		asmCode << "\timul " << regR[instr.src] << std::endl; | ||||
| 		asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; | ||||
|  | @ -208,6 +236,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//5.75 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr, "ecx"); | ||||
| 			asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; | ||||
|  | @ -223,12 +252,14 @@ namespace RandomX { | |||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		asmCode << "\tneg " << regR[instr.dst] << std::endl; | ||||
| 		traceint(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 		} | ||||
|  | @ -240,6 +271,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//2.75 uOP
 | ||||
| 	void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; | ||||
|  | @ -252,6 +284,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//1.75 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; | ||||
| 			asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl; | ||||
|  | @ -264,6 +297,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//1.75 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl; | ||||
| 			asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl; | ||||
|  | @ -277,6 +311,7 @@ namespace RandomX { | |||
| 	//2 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { | ||||
| 		if (instr.getImm32() != 0) { | ||||
| 			registerUsage[instr.dst] = i; | ||||
| 			uint32_t divisor = instr.getImm32(); | ||||
| 			asmCode << "\tmov rax, " << reciprocal(instr.getImm32()) << std::endl; | ||||
| 			asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; | ||||
|  | @ -295,6 +330,9 @@ namespace RandomX { | |||
| 	//2 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			//std::swap(registerUsage[instr.dst], registerUsage[instr.src]);
 | ||||
| 			registerUsage[instr.dst] = i; | ||||
| 			registerUsage[instr.src] = i; | ||||
| 			asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 			traceint(instr); | ||||
| 		} | ||||
|  | @ -435,8 +473,23 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = (instr.mod >> 5); | ||||
| 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
| 		registerUsage[reg] = i; | ||||
| 		asmCode << "\tadd " << regR[reg] << ", " << (1 << shift) << std::endl; | ||||
| 		asmCode << "\ttest " << regR[reg] << ", " << conditionMask << std::endl; | ||||
| 		asmCode << "\tjz randomx_isn_" << target << std::endl; | ||||
| 		for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 			registerUsage[j] = i; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	//4 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { | ||||
| 		handleCondition(instr, i); | ||||
| 		asmCode << "\txor ecx, ecx" << std::endl; | ||||
| 		asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 		asmCode << "\tset" << condition(instr) << " cl" << std::endl; | ||||
|  | @ -446,6 +499,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	//6 uOPs
 | ||||
| 	void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) { | ||||
| 		handleCondition(instr, i); | ||||
| 		asmCode << "\txor ecx, ecx" << std::endl; | ||||
| 		genAddressReg(instr); | ||||
| 		asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl; | ||||
|  |  | |||
|  | @ -38,10 +38,13 @@ namespace RandomX { | |||
| 	private: | ||||
| 		static InstructionGenerator engine[256]; | ||||
| 		std::stringstream asmCode; | ||||
| 		int registerUsage[8]; | ||||
| 
 | ||||
| 		void genAddressReg(Instruction&, const char*); | ||||
| 		void genAddressRegDst(Instruction&, int); | ||||
| 		int32_t genAddressImm(Instruction&); | ||||
| 		int getConditionRegister(); | ||||
| 		void handleCondition(Instruction&, int); | ||||
| 
 | ||||
| 		void generateCode(Instruction&, int); | ||||
| 
 | ||||
|  |  | |||
|  | @ -302,13 +302,13 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_COND_R(std::ostream& os) const { | ||||
| 		os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << ")" << std::endl; | ||||
| 		os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_COND_M(std::ostream& os) const { | ||||
| 		os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "("; | ||||
| 		genAddressReg(os); | ||||
| 		os << ", " << (int32_t)getImm32() << ")" << std::endl; | ||||
| 		os << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void  Instruction::h_ISTORE(std::ostream& os) const { | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| */ | ||||
| //#define TRACE
 | ||||
| //#define FPUCHECK
 | ||||
| #define RANDOMX_JUMP | ||||
| #include "InterpretedVirtualMachine.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Cache.hpp" | ||||
|  | @ -45,25 +46,12 @@ constexpr bool fpuCheck = false; | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	InterpretedVirtualMachine::~InterpretedVirtualMachine() { | ||||
| 		if (asyncWorker) { | ||||
| 			delete mem.ds.asyncWorker; | ||||
| 		} | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) { | ||||
| 		if (asyncWorker) { | ||||
| 			if (softAes) { | ||||
| 				mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache); | ||||
| 			} | ||||
| 			else { | ||||
| 				mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache); | ||||
| 			} | ||||
| 			readDataset = &datasetReadLightAsync; | ||||
| 		} | ||||
| 		else { | ||||
| 			mem.ds = ds; | ||||
| 			readDataset = &datasetReadLight; | ||||
| 		} | ||||
| 		mem.ds = ds; | ||||
| 		readDataset = &datasetReadLight; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -75,14 +63,10 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template<int N> | ||||
| 	void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		executeBytecode(N, r, f, e, a); | ||||
| 		executeBytecode<N + 1>(r, f, e, a); | ||||
| 	} | ||||
| 
 | ||||
| 	template<> | ||||
| 	void InterpretedVirtualMachine::executeBytecode<RANDOMX_PROGRAM_SIZE>(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) { | ||||
| 			executeBytecode(ic, r, f, e, a); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	static void print(int_reg_t r) { | ||||
|  | @ -114,8 +98,9 @@ namespace RandomX { | |||
| 		return std::fpclassify(x) == FP_SUBNORMAL; | ||||
| 	} | ||||
| 
 | ||||
| 	FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[i]; | ||||
| 	FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[ic]; | ||||
| 		if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); | ||||
| 		//if(trace) printState(r, f, e, a);
 | ||||
| 		switch (ibc.type) | ||||
| 		{ | ||||
|  | @ -234,10 +219,38 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::COND_R: { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 				*ibc.creg += (1 << ibc.shift); | ||||
| 				const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift; | ||||
| 				if ((*ibc.creg & conditionMask) == 0) { | ||||
| #ifdef STATS | ||||
| 					count_JUMP_taken++; | ||||
| #endif | ||||
| 					ic = ibc.target; | ||||
| 					break; | ||||
| 				} | ||||
| #ifdef STATS | ||||
| 				count_JUMP_not_taken++; | ||||
| #endif | ||||
| #endif | ||||
| 				*ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::COND_M: { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 				*ibc.creg += (1uLL << ibc.shift); | ||||
| 				const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift; | ||||
| 				if ((*ibc.creg & conditionMask) == 0) { | ||||
| #ifdef STATS | ||||
| 					count_JUMP_taken++; | ||||
| #endif | ||||
| 					ic = ibc.target; | ||||
| 					break; | ||||
| 				} | ||||
| #ifdef STATS | ||||
| 				count_JUMP_not_taken++; | ||||
| #endif | ||||
| #endif | ||||
| 				*ibc.idst += condition(ibc.condition, load64(scratchpad + (*ibc.isrc & ibc.memMask)), ibc.imm) ? 1 : 0; | ||||
| 			} break; | ||||
| 
 | ||||
|  | @ -257,7 +270,6 @@ namespace RandomX { | |||
| 				UNREACHABLE; | ||||
| 		} | ||||
| 		if (trace) { | ||||
| 			std::cout << program(i); | ||||
| 			if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32) | ||||
| 				print(*ibc.idst); | ||||
| 			else //if(ibc.type >= 20 && ibc.type <= 30)
 | ||||
|  | @ -334,28 +346,15 @@ namespace RandomX { | |||
| 				std::cout << "-----------------------------------" << std::endl; | ||||
| 			} | ||||
| 
 | ||||
| 			executeBytecode<0>(r, f, e, a); | ||||
| 			executeBytecode(r, f, e, a); | ||||
| 
 | ||||
| 			if (asyncWorker) { | ||||
| 				ILightClientAsyncWorker* aw = mem.ds.asyncWorker; | ||||
| 				const uint64_t* datasetLine = aw->getBlock(datasetBase + mem.ma); | ||||
| 				for (int i = 0; i < RegistersCount; ++i) | ||||
| 					r[i] ^= datasetLine[i]; | ||||
| 				mem.mx ^= r[readReg2] ^ r[readReg3]; | ||||
| 				mem.mx &= CacheLineAlignMask; //align to cache line
 | ||||
| 				std::swap(mem.mx, mem.ma); | ||||
| 				aw->prepareBlock(datasetBase + mem.ma); | ||||
| 			} | ||||
| 			else { | ||||
| 				mem.mx ^= r[readReg2] ^ r[readReg3]; | ||||
| 				//mem.mx &= CacheLineAlignMask;
 | ||||
| 				Cache& cache = mem.ds.cache; | ||||
| 				uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; | ||||
| 				initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 				for (int i = 0; i < RegistersCount; ++i) | ||||
| 					r[i] ^= datasetLine[i]; | ||||
| 				std::swap(mem.mx, mem.ma); | ||||
| 			} | ||||
| 			mem.mx ^= r[readReg2] ^ r[readReg3]; | ||||
| 			Cache& cache = mem.ds.cache; | ||||
| 			uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; | ||||
| 			initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 			for (int i = 0; i < RegistersCount; ++i) | ||||
| 				r[i] ^= datasetLine[i]; | ||||
| 			std::swap(mem.mx, mem.ma); | ||||
| 
 | ||||
| 			if (trace) { | ||||
| 				std::cout << "iteration " << std::dec << ic << std::endl; | ||||
|  | @ -419,9 +418,25 @@ namespace RandomX { | |||
| 		_mm_store_pd(®.e[3].lo, e[3]); | ||||
| 	} | ||||
| 
 | ||||
| 	static int getConditionRegister(int(®isterUsage)[8]) { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			if (registerUsage[i] < min) { | ||||
| 				min = registerUsage[i]; | ||||
| 				minIndex = i; | ||||
| 			} | ||||
| 		} | ||||
| 		return minIndex; | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		int registerUsage[8]; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| 		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { | ||||
| 			auto& instr = program(i); | ||||
| 			auto& ibc = byteCode[i]; | ||||
|  | @ -438,6 +453,7 @@ namespace RandomX { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IADD_M) { | ||||
|  | @ -454,6 +470,7 @@ namespace RandomX { | |||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IADD_RC) { | ||||
|  | @ -463,6 +480,7 @@ namespace RandomX { | |||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISUB_R) { | ||||
|  | @ -477,6 +495,7 @@ namespace RandomX { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISUB_M) { | ||||
|  | @ -493,6 +512,7 @@ namespace RandomX { | |||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_9C) { | ||||
|  | @ -500,6 +520,7 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::IMUL_9C; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_R) { | ||||
|  | @ -514,6 +535,7 @@ namespace RandomX { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_M) { | ||||
|  | @ -530,6 +552,7 @@ namespace RandomX { | |||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMULH_R) { | ||||
|  | @ -538,6 +561,7 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::IMULH_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMULH_M) { | ||||
|  | @ -554,6 +578,7 @@ namespace RandomX { | |||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISMULH_R) { | ||||
|  | @ -562,6 +587,7 @@ namespace RandomX { | |||
| 					ibc.type = InstructionType::ISMULH_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISMULH_M) { | ||||
|  | @ -578,6 +604,7 @@ namespace RandomX { | |||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_RCP) { | ||||
|  | @ -588,6 +615,7 @@ namespace RandomX { | |||
| 						ibc.idst = &r[dst]; | ||||
| 						ibc.imm = reciprocal(divisor); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						registerUsage[instr.dst] = i; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.type = InstructionType::NOP; | ||||
|  | @ -598,6 +626,7 @@ namespace RandomX { | |||
| 					auto dst = instr.dst % RegistersCount; | ||||
| 					ibc.type = InstructionType::INEG_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IXOR_R) { | ||||
|  | @ -612,6 +641,7 @@ namespace RandomX { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IXOR_M) { | ||||
|  | @ -628,6 +658,7 @@ namespace RandomX { | |||
| 						ibc.isrc = &ibc.imm; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IROR_R) { | ||||
|  | @ -642,6 +673,7 @@ namespace RandomX { | |||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IROL_R) { | ||||
|  | @ -656,6 +688,7 @@ namespace RandomX { | |||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISWAP_R) { | ||||
|  | @ -665,6 +698,8 @@ namespace RandomX { | |||
| 						ibc.idst = &r[dst]; | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.type = InstructionType::ISWAP_R; | ||||
| 						registerUsage[instr.dst] = i; | ||||
| 						registerUsage[instr.src] = i; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.type = InstructionType::NOP; | ||||
|  | @ -751,6 +786,14 @@ namespace RandomX { | |||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.condition = (instr.mod >> 2) & 7; | ||||
| 					ibc.imm = instr.getImm32(); | ||||
| 					//jump condition
 | ||||
| 					int reg = getConditionRegister(registerUsage); | ||||
| 					ibc.target = registerUsage[reg]; | ||||
| 					ibc.shift = (instr.mod >> 5); | ||||
| 					ibc.creg = &r[reg]; | ||||
| 					for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 						registerUsage[j] = i; | ||||
| 					} | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(COND_M) { | ||||
|  | @ -762,6 +805,14 @@ namespace RandomX { | |||
| 					ibc.condition = (instr.mod >> 2) & 7; | ||||
| 					ibc.imm = instr.getImm32(); | ||||
| 					ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					//jump condition
 | ||||
| 					int reg = getConditionRegister(registerUsage); | ||||
| 					ibc.target = registerUsage[reg]; | ||||
| 					ibc.shift = (instr.mod >> 5); | ||||
| 					ibc.creg = &r[reg]; | ||||
| 					for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 						registerUsage[j] = i; | ||||
| 					} | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(CFROUND) { | ||||
|  |  | |||
|  | @ -52,9 +52,12 @@ namespace RandomX { | |||
| 			uint64_t imm; | ||||
| 			int64_t simm; | ||||
| 		}; | ||||
| 		uint32_t condition; | ||||
| 		int_reg_t* creg; | ||||
| 		uint16_t condition; | ||||
| 		int16_t target; | ||||
| 		uint32_t memMask; | ||||
| 		uint32_t type; | ||||
| 		uint16_t type; | ||||
| 		uint16_t shift; | ||||
| 	}; | ||||
| 
 | ||||
| 	constexpr int asedwfagdewsa = sizeof(InstructionByteCode); | ||||
|  | @ -70,7 +73,7 @@ namespace RandomX { | |||
| 		void operator delete(void* ptr) { | ||||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 		InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {} | ||||
| 		InterpretedVirtualMachine(bool soft) : softAes(soft) {} | ||||
| 		~InterpretedVirtualMachine(); | ||||
| 		void setDataset(dataset_t ds, uint64_t size) override; | ||||
| 		void initialize() override; | ||||
|  | @ -78,7 +81,7 @@ namespace RandomX { | |||
| 	private: | ||||
| 		static InstructionHandler engine[256]; | ||||
| 		DatasetReadFunc readDataset; | ||||
| 		bool softAes, asyncWorker; | ||||
| 		bool softAes; | ||||
| 		InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; | ||||
| 		 | ||||
| #ifdef STATS | ||||
|  | @ -112,17 +115,13 @@ namespace RandomX { | |||
| 		int count_FPROUND = 0; | ||||
| 		int count_JUMP_taken = 0; | ||||
| 		int count_JUMP_not_taken = 0; | ||||
| 		int count_CALL_taken = 0; | ||||
| 		int count_CALL_not_taken = 0; | ||||
| 		int count_RET_stack_empty = 0; | ||||
| 		int count_RET_taken = 0; | ||||
| 		int count_jump_taken[8] = { 0 }; | ||||
| 		int count_jump_not_taken[8] = { 0 }; | ||||
| 		int count_max_stack = 0; | ||||
| 		int count_retdepth = 0; | ||||
| 		int count_retdepth_max = 0; | ||||
| 		int count_endstack = 0; | ||||
| 		int count_instructions[ProgramLength] = { 0 }; | ||||
| 		int count_instructions[RANDOMX_PROGRAM_SIZE] = { 0 }; | ||||
| 		int count_FADD_nop = 0; | ||||
| 		int count_FADD_nop2 = 0; | ||||
| 		int count_FSUB_nop = 0; | ||||
|  | @ -132,8 +131,7 @@ namespace RandomX { | |||
| 		int datasetAccess[256] = { 0 }; | ||||
| #endif | ||||
| 		void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		template<int N> | ||||
| 		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 	}; | ||||
| } | ||||
|  | @ -18,12 +18,15 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| */ | ||||
| 
 | ||||
| #include <cstring> | ||||
| #include <climits> | ||||
| #include <stdexcept> | ||||
| #include "JitCompilerX86.hpp" | ||||
| #include "Program.hpp" | ||||
| #include "reciprocal.h" | ||||
| #include "virtualMemory.hpp" | ||||
| 
 | ||||
| #define RANDOMX_JUMP | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| #if !defined(_M_X64) && !defined(__x86_64__) | ||||
|  | @ -174,6 +177,9 @@ namespace RandomX { | |||
| 	static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f }; | ||||
| 	static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 }; | ||||
| 	static const uint8_t CALL = 0xe8; | ||||
| 	static const uint8_t REX_ADD_I[] = { 0x49, 0x81 }; | ||||
| 	static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; | ||||
| 	static const uint8_t JZ[] = { 0x0f, 0x84 }; | ||||
| 
 | ||||
| 	size_t JitCompilerX86::getCodeSize() { | ||||
| 		return codePos - prologueSize; | ||||
|  | @ -203,6 +209,12 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramPrologue(Program& prog) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		instructionOffsets.clear(); | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| #endif | ||||
| 		auto addressRegisters = prog.getEntropy(12); | ||||
| 		uint32_t readReg0 = 0 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
|  | @ -222,7 +234,7 @@ namespace RandomX { | |||
| 			Instruction& instr = prog(i); | ||||
| 			instr.src %= RegistersCount; | ||||
| 			instr.dst %= RegistersCount; | ||||
| 			generateCode(instr); | ||||
| 			generateCode(instr, i); | ||||
| 		} | ||||
| 		emit(REX_MOV_RR); | ||||
| 		emitByte(0xc0 + readReg2); | ||||
|  | @ -241,9 +253,12 @@ namespace RandomX { | |||
| 		emitByte(0x90); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateCode(Instruction& instr) { | ||||
| 	void JitCompilerX86::generateCode(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		instructionOffsets.push_back(codePos); | ||||
| #endif | ||||
| 		auto generator = engine[instr.opcode]; | ||||
| 		(this->*generator)(instr); | ||||
| 		(this->*generator)(instr, i); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { | ||||
|  | @ -269,7 +284,8 @@ namespace RandomX { | |||
| 		emit32(instr.getImm32() & ScratchpadL3Mask); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IADD_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IADD_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			emit(REX_ADD_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
|  | @ -281,7 +297,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IADD_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IADD_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			emit(REX_ADD_RM); | ||||
|  | @ -299,14 +316,16 @@ namespace RandomX { | |||
| 		emitByte((scale << 6) | (index << 3) | base); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IADD_RC(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_LEA); | ||||
| 		emitByte(0x84 + 8 * instr.dst); | ||||
| 		genSIB(0, instr.src, instr.dst); | ||||
| 		emit32(instr.getImm32()); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISUB_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			emit(REX_SUB_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
|  | @ -318,7 +337,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISUB_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			emit(REX_SUB_RM); | ||||
|  | @ -332,14 +352,16 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_9C(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_LEA); | ||||
| 		emitByte(0x84 + 8 * instr.dst); | ||||
| 		genSIB(3, instr.dst, instr.dst); | ||||
| 		emit32(instr.getImm32()); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			emit(REX_IMUL_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
|  | @ -351,7 +373,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			emit(REX_IMUL_RM); | ||||
|  | @ -365,7 +388,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMULH_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_MOV_RR64); | ||||
| 		emitByte(0xc0 + instr.dst); | ||||
| 		emit(REX_MUL_R); | ||||
|  | @ -374,7 +398,8 @@ namespace RandomX { | |||
| 		emitByte(0xc2 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMULH_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr, false); | ||||
| 			emit(REX_MOV_RR64); | ||||
|  | @ -392,7 +417,8 @@ namespace RandomX { | |||
| 		emitByte(0xc2 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISMULH_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_MOV_RR64); | ||||
| 		emitByte(0xc0 + instr.dst); | ||||
| 		emit(REX_MUL_R); | ||||
|  | @ -401,7 +427,8 @@ namespace RandomX { | |||
| 		emitByte(0xc2 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISMULH_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr, false); | ||||
| 			emit(REX_MOV_RR64); | ||||
|  | @ -419,8 +446,9 @@ namespace RandomX { | |||
| 		emitByte(0xc2 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_RCP(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { | ||||
| 		if (instr.getImm32() != 0) { | ||||
| 			registerUsage[instr.dst] = i; | ||||
| 			emit(MOV_RAX_I); | ||||
| 			emit64(reciprocal(instr.getImm32())); | ||||
| 			emit(REX_IMUL_RM); | ||||
|  | @ -428,16 +456,18 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISDIV_C(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) { | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_INEG_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_NEG); | ||||
| 		emitByte(0xd8 + instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IXOR_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			emit(REX_XOR_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
|  | @ -449,7 +479,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IXOR_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			genAddressReg(instr); | ||||
| 			emit(REX_XOR_RM); | ||||
|  | @ -463,7 +494,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IROR_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			emit(REX_MOV_RR); | ||||
| 			emitByte(0xc8 + instr.src); | ||||
|  | @ -477,7 +509,8 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IROL_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_IROL_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			emit(REX_MOV_RR); | ||||
| 			emitByte(0xc8 + instr.src); | ||||
|  | @ -491,20 +524,22 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISWAP_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) { | ||||
| 		if (instr.src != instr.dst) { | ||||
| 			registerUsage[instr.dst] = i; | ||||
| 			registerUsage[instr.src] = i; | ||||
| 			emit(REX_XCHG); | ||||
| 			emitByte(0xc0 + instr.src + 8 * instr.dst); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSWAP_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) { | ||||
| 		emit(SHUFPD); | ||||
| 		emitByte(0xc0 + 9 * instr.dst); | ||||
| 		emitByte(1); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FADD_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		emit(REX_ADDPD); | ||||
|  | @ -514,7 +549,7 @@ namespace RandomX { | |||
| 		//emitByte(0xf8 + instr.dst);
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FADD_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
|  | @ -522,7 +557,7 @@ namespace RandomX { | |||
| 		emitByte(0xc4 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSUB_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		emit(REX_SUBPD); | ||||
|  | @ -532,7 +567,7 @@ namespace RandomX { | |||
| 		//emitByte(0xf8 + instr.dst);
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSUB_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
|  | @ -540,20 +575,20 @@ namespace RandomX { | |||
| 		emitByte(0xc4 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSCAL_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		emit(REX_XORPS); | ||||
| 		emitByte(0xc7 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FMUL_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		emit(REX_MULPD); | ||||
| 		emitByte(0xe0 + instr.src + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FMUL_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
|  | @ -564,7 +599,7 @@ namespace RandomX { | |||
| 		emitByte(0xe5 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FDIV_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		emit(REX_DIVPD); | ||||
|  | @ -573,7 +608,7 @@ namespace RandomX { | |||
| 		emitByte(0xe5 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FDIV_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
|  | @ -582,13 +617,13 @@ namespace RandomX { | |||
| 		emitByte(0xe4 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSQRT_R(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		emit(SQRTPD); | ||||
| 		emitByte(0xe4 + 9 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_CFROUND(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_CFROUND(Instruction& instr, int i) { | ||||
| 		emit(REX_MOV_RR64); | ||||
| 		emitByte(0xc0 + instr.src);	 | ||||
| 		int rotate = (13 - (instr.getImm32() & 63)) & 63; | ||||
|  | @ -599,6 +634,28 @@ namespace RandomX { | |||
| 		emit(AND_OR_MOV_LDMXCSR); | ||||
| 	} | ||||
| 
 | ||||
| 	static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) { | ||||
| 		switch (((instr.mod >> 2) & 7) ^ invert) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 			return 0x76; //jbe
 | ||||
| 		case 1: | ||||
| 			return 0x77; //ja
 | ||||
| 		case 2: | ||||
| 			return 0x78; //js
 | ||||
| 		case 3: | ||||
| 			return 0x79; //jns
 | ||||
| 		case 4: | ||||
| 			return 0x70; //jo
 | ||||
| 		case 5: | ||||
| 			return 0x71; //jno
 | ||||
| 		case 6: | ||||
| 			return 0x7c; //jl
 | ||||
| 		case 7: | ||||
| 			return 0x7d; //jge
 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	static inline uint8_t condition(Instruction& instr) { | ||||
| 		switch ((instr.mod >> 2) & 7) | ||||
| 		{ | ||||
|  | @ -623,7 +680,40 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_COND_R(Instruction& instr) { | ||||
| 	int JitCompilerX86::getConditionRegister() { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			if (registerUsage[i] < min) { | ||||
| 				min = registerUsage[i]; | ||||
| 				minIndex = i; | ||||
| 			} | ||||
| 		} | ||||
| 		return minIndex; | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = (instr.mod >> 5); | ||||
| 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
| 		emit(REX_ADD_I); | ||||
| 		emitByte(0xc0 + reg); | ||||
| 		emit32(1 << shift); | ||||
| 		emit(REX_TEST); | ||||
| 		emitByte(0xc0 + reg); | ||||
| 		emit32(conditionMask); | ||||
| 		emit(JZ); | ||||
| 		emit32(instructionOffsets[target] - (codePos + 4)); | ||||
| 		for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 			registerUsage[j] = i; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_COND_R(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		emit(XOR_ECX_ECX); | ||||
| 		emit(REX_CMP_R32I); | ||||
| 		emitByte(0xf8 + instr.src); | ||||
|  | @ -635,7 +725,10 @@ namespace RandomX { | |||
| 		emitByte(0xc1 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_COND_M(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_COND_M(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		emit(XOR_ECX_ECX); | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CMP_M32I); | ||||
|  | @ -647,21 +740,21 @@ namespace RandomX { | |||
| 		emitByte(0xc1 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISTORE(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr); | ||||
| 		emit(REX_MOV_MR); | ||||
| 		emitByte(0x04 + 8 * instr.src); | ||||
| 		emitByte(0x06); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSTORE(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_FSTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr, true); | ||||
| 		emit(MOVAPD); | ||||
| 		emitByte(0x04 + 8 * instr.src); | ||||
| 		emitByte(0x06); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_NOP(Instruction& instr) { | ||||
| 	void JitCompilerX86::h_NOP(Instruction& instr, int i) { | ||||
| 		emitByte(0x90); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -29,7 +29,7 @@ namespace RandomX { | |||
| 	class Program; | ||||
| 	class JitCompilerX86; | ||||
| 
 | ||||
| 	typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&); | ||||
| 	typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); | ||||
| 
 | ||||
| 	constexpr uint32_t CodeSize = 64 * 1024; | ||||
| 
 | ||||
|  | @ -47,17 +47,21 @@ namespace RandomX { | |||
| 		size_t getCodeSize(); | ||||
| 	private: | ||||
| 		static InstructionGeneratorX86 engine[256]; | ||||
| 		std::vector<int32_t> instructionOffsets; | ||||
| 		int registerUsage[8]; | ||||
| 		uint8_t* code; | ||||
| 		int32_t codePos; | ||||
| 
 | ||||
| 		void generateProgramPrologue(Program&); | ||||
| 		void generateProgramEpilogue(Program&); | ||||
| 		int getConditionRegister(); | ||||
| 		void genAddressReg(Instruction&, bool); | ||||
| 		void genAddressRegDst(Instruction&, bool); | ||||
| 		void genAddressImm(Instruction&); | ||||
| 		void genSIB(int scale, int index, int base); | ||||
| 
 | ||||
| 		void generateCode(Instruction&); | ||||
| 		void handleCondition(Instruction&, int); | ||||
| 		void generateCode(Instruction&, int); | ||||
| 
 | ||||
| 		void emitByte(uint8_t val) { | ||||
| 			code[codePos] = val; | ||||
|  | @ -92,43 +96,43 @@ namespace RandomX { | |||
| 			codePos += N; | ||||
| 		} | ||||
| 
 | ||||
| 		void  h_IADD_R(Instruction&); | ||||
| 		void  h_IADD_M(Instruction&); | ||||
| 		void  h_IADD_RC(Instruction&); | ||||
| 		void  h_ISUB_R(Instruction&); | ||||
| 		void  h_ISUB_M(Instruction&); | ||||
| 		void  h_IMUL_9C(Instruction&); | ||||
| 		void  h_IMUL_R(Instruction&); | ||||
| 		void  h_IMUL_M(Instruction&); | ||||
| 		void  h_IMULH_R(Instruction&); | ||||
| 		void  h_IMULH_M(Instruction&); | ||||
| 		void  h_ISMULH_R(Instruction&); | ||||
| 		void  h_ISMULH_M(Instruction&); | ||||
| 		void  h_IMUL_RCP(Instruction&); | ||||
| 		void  h_ISDIV_C(Instruction&); | ||||
| 		void  h_INEG_R(Instruction&); | ||||
| 		void  h_IXOR_R(Instruction&); | ||||
| 		void  h_IXOR_M(Instruction&); | ||||
| 		void  h_IROR_R(Instruction&); | ||||
| 		void  h_IROL_R(Instruction&); | ||||
| 		void  h_ISWAP_R(Instruction&); | ||||
| 		void  h_FSWAP_R(Instruction&); | ||||
| 		void  h_FADD_R(Instruction&); | ||||
| 		void  h_FADD_M(Instruction&); | ||||
| 		void  h_FSUB_R(Instruction&); | ||||
| 		void  h_FSUB_M(Instruction&); | ||||
| 		void  h_FSCAL_R(Instruction&); | ||||
| 		void  h_FMUL_R(Instruction&); | ||||
| 		void  h_FMUL_M(Instruction&); | ||||
| 		void  h_FDIV_R(Instruction&); | ||||
| 		void  h_FDIV_M(Instruction&); | ||||
| 		void  h_FSQRT_R(Instruction&); | ||||
| 		void  h_COND_R(Instruction&); | ||||
| 		void  h_COND_M(Instruction&); | ||||
| 		void  h_CFROUND(Instruction&); | ||||
| 		void  h_ISTORE(Instruction&); | ||||
| 		void  h_FSTORE(Instruction&); | ||||
| 		void  h_NOP(Instruction&); | ||||
| 		void  h_IADD_R(Instruction&, int); | ||||
| 		void  h_IADD_M(Instruction&, int); | ||||
| 		void  h_IADD_RC(Instruction&, int); | ||||
| 		void  h_ISUB_R(Instruction&, int); | ||||
| 		void  h_ISUB_M(Instruction&, int); | ||||
| 		void  h_IMUL_9C(Instruction&, int); | ||||
| 		void  h_IMUL_R(Instruction&, int); | ||||
| 		void  h_IMUL_M(Instruction&, int); | ||||
| 		void  h_IMULH_R(Instruction&, int); | ||||
| 		void  h_IMULH_M(Instruction&, int); | ||||
| 		void  h_ISMULH_R(Instruction&, int); | ||||
| 		void  h_ISMULH_M(Instruction&, int); | ||||
| 		void  h_IMUL_RCP(Instruction&, int); | ||||
| 		void  h_ISDIV_C(Instruction&, int); | ||||
| 		void  h_INEG_R(Instruction&, int); | ||||
| 		void  h_IXOR_R(Instruction&, int); | ||||
| 		void  h_IXOR_M(Instruction&, int); | ||||
| 		void  h_IROR_R(Instruction&, int); | ||||
| 		void  h_IROL_R(Instruction&, int); | ||||
| 		void  h_ISWAP_R(Instruction&, int); | ||||
| 		void  h_FSWAP_R(Instruction&, int); | ||||
| 		void  h_FADD_R(Instruction&, int); | ||||
| 		void  h_FADD_M(Instruction&, int); | ||||
| 		void  h_FSUB_R(Instruction&, int); | ||||
| 		void  h_FSUB_M(Instruction&, int); | ||||
| 		void  h_FSCAL_R(Instruction&, int); | ||||
| 		void  h_FMUL_R(Instruction&, int); | ||||
| 		void  h_FMUL_M(Instruction&, int); | ||||
| 		void  h_FDIV_R(Instruction&, int); | ||||
| 		void  h_FDIV_M(Instruction&, int); | ||||
| 		void  h_FSQRT_R(Instruction&, int); | ||||
| 		void  h_COND_R(Instruction&, int); | ||||
| 		void  h_COND_M(Instruction&, int); | ||||
| 		void  h_CFROUND(Instruction&, int); | ||||
| 		void  h_ISTORE(Instruction&, int); | ||||
| 		void  h_FSTORE(Instruction&, int); | ||||
| 		void  h_NOP(Instruction&, int); | ||||
| 	}; | ||||
| 
 | ||||
| } | ||||
|  | @ -67,6 +67,9 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
 | ||||
| #define RANDOMX_SCRATCHPAD_L1      (16 * 1024) | ||||
| 
 | ||||
| //How many register bits must be zero for a jump condition to be triggered
 | ||||
| #define RANDOMX_CONDITION_BITS     7 | ||||
| 
 | ||||
| /*
 | ||||
| Instruction frequencies (per 256 opcodes) | ||||
| Total sum of frequencies must be 256 | ||||
|  |  | |||
|  | @ -54,6 +54,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #define REP32(x) REP31(x) x, | ||||
| #define REP33(x) REP32(x) x, | ||||
| #define REP40(x) REP32(x) REP8(x) | ||||
| #define REP64(x) REP32(x) REP32(x) | ||||
| #define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) | ||||
| #define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x) | ||||
| #define REP256(x) REP128(x) REP128(x) | ||||
|  | @ -95,6 +96,8 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #define REPCASE30(x) REPCASE29(x) case __COUNTER__: | ||||
| #define REPCASE31(x) REPCASE30(x) case __COUNTER__: | ||||
| #define REPCASE32(x) REPCASE31(x) case __COUNTER__: | ||||
| #define REPCASE64(x) REPCASE32(x) REPCASE32(x) | ||||
| #define REPCASE128(x) REPCASE64(x) REPCASE64(x) | ||||
| #define REPCASENX(x,N) REPCASE##N(x) | ||||
| #define REPCASEN(x,N) REPCASENX(x,N) | ||||
| #define CASE_REP(x) REPCASEN(x, WT(x)) | ||||
|  | @ -303,7 +303,7 @@ int main(int argc, char** argv) { | |||
| 				if (jit) | ||||
| 					vm = new RandomX::CompiledLightVirtualMachine(); | ||||
| 				else | ||||
| 					vm = new RandomX::InterpretedVirtualMachine(softAes, async); | ||||
| 					vm = new RandomX::InterpretedVirtualMachine(softAes); | ||||
| 			} | ||||
| 			vm->setDataset(dataset, datasetSize); | ||||
| 			vms.push_back(vm); | ||||
|  | @ -340,7 +340,7 @@ int main(int argc, char** argv) { | |||
| 		std::cout << "Calculated result: "; | ||||
| 		result.print(std::cout); | ||||
| 		if(programCount == 1000) | ||||
| 		std::cout << "Reference result:  84f37cc43cb21eabf1d5b9def462060cd24218290678dd80a8ea2f663892629e" << std::endl; | ||||
| 		std::cout << "Reference result:  9e636a04a2517f37d8ed40b67a7051e02a7577e878fbba5c4352996b2c653f90" << std::endl; | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; | ||||
| 		} | ||||
|  |  | |||
							
								
								
									
										1617
									
								
								src/program.inc
									
										
									
									
									
								
							
							
						
						
									
										1617
									
								
								src/program.inc
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue