mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Full-width mantissa for group E registers and FDIV_M
This commit is contained in:
		
							parent
							
								
									d43c7db416
								
							
						
					
					
						commit
						67046a9f38
					
				
					 11 changed files with 60 additions and 39 deletions
				
			
		|  | @ -38,7 +38,7 @@ namespace RandomX { | |||
| 	template<bool superscalar> | ||||
| 	void CompiledLightVirtualMachine<superscalar>::initialize() { | ||||
| 		VirtualMachine::initialize(); | ||||
| 		compiler.generateProgramLight<superscalar>(program); | ||||
| 		compiler.generateProgramLight<superscalar>(program, config); | ||||
| 		//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
 | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -37,7 +37,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	void CompiledVirtualMachine::initialize() { | ||||
| 		VirtualMachine::initialize(); | ||||
| 		compiler.generateProgram(program); | ||||
| 		compiler.generateProgram(program, config); | ||||
| 		mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -116,6 +116,16 @@ namespace RandomX { | |||
| 		return scratchpad + addr; | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	FORCE_INLINE __m128d InterpretedVirtualMachine<superscalar>::maskRegisterExponentMantissa(__m128d x) { | ||||
| 		constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1; | ||||
| 		const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64)); | ||||
| 		const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask); | ||||
| 		x = _mm_and_pd(x, mantissaMask); | ||||
| 		x = _mm_or_pd(x, exponentMask); | ||||
| 		return x; | ||||
| 	} | ||||
| 
 | ||||
| 	 template<bool superscalar> | ||||
| 	 FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[ic]; | ||||
|  | @ -229,7 +239,7 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::FDIV_M: { | ||||
| 				__m128d fsrc = ieee_set_exponent<-240>(load_cvt_i32x2(getScratchpadAddress(ibc))); | ||||
| 				__m128d fsrc = maskRegisterExponentMantissa(load_cvt_i32x2(getScratchpadAddress(ibc))); | ||||
| 				*ibc.fdst = _mm_div_pd(*ibc.fdst, fsrc); | ||||
| 			} break; | ||||
| 
 | ||||
|  | @ -326,7 +336,7 @@ namespace RandomX { | |||
| 		uint32_t spAddr1 = mem.ma; | ||||
| 
 | ||||
| 		if (trace) { | ||||
| 			std::cout << "execute (reg: r" << readReg0 << ", r" << readReg1 << ", r" << readReg2 << ", r" << readReg3 << ")" << std::endl; | ||||
| 			std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl; | ||||
| 			std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; | ||||
| 			std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; | ||||
| 			printState(r, f, e, a); | ||||
|  | @ -334,7 +344,7 @@ namespace RandomX { | |||
| 
 | ||||
| 		for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { | ||||
| 			//std::cout << "Iteration " << iter << std::endl;
 | ||||
| 			uint64_t spMix = r[readReg0] ^ r[readReg1]; | ||||
| 			uint64_t spMix = r[config.readReg0] ^ r[config.readReg1]; | ||||
| 			spAddr0 ^= spMix; | ||||
| 			spAddr0 &= ScratchpadL3Mask64; | ||||
| 			spAddr1 ^= spMix >> 32; | ||||
|  | @ -353,10 +363,10 @@ namespace RandomX { | |||
| 			f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8); | ||||
| 			f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16); | ||||
| 			f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24); | ||||
| 			e[0] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 32)); | ||||
| 			e[1] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 40)); | ||||
| 			e[2] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 48)); | ||||
| 			e[3] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 56)); | ||||
| 			e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32)); | ||||
| 			e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40)); | ||||
| 			e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48)); | ||||
| 			e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56)); | ||||
| 
 | ||||
| 			if (trace) { | ||||
| 				std::cout << "iteration " << std::dec << ic << std::endl; | ||||
|  | @ -368,7 +378,7 @@ namespace RandomX { | |||
| 
 | ||||
| 			executeBytecode(r, f, e, a); | ||||
| 
 | ||||
| 			mem.mx ^= r[readReg2] ^ r[readReg3]; | ||||
| 			mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; | ||||
| 			mem.mx &= CacheLineAlignMask; | ||||
| 			if (superscalar) { | ||||
| 				executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r); | ||||
|  |  | |||
|  | @ -133,5 +133,6 @@ namespace RandomX { | |||
| 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); | ||||
| 		void* getScratchpadAddress(InstructionByteCode& ibc); | ||||
| 		__m128d maskRegisterExponentMantissa(__m128d); | ||||
| 	}; | ||||
| } | ||||
|  | @ -24,6 +24,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "Program.hpp" | ||||
| #include "reciprocal.h" | ||||
| #include "virtualMemory.hpp" | ||||
| #include "intrinPortable.h" | ||||
| 
 | ||||
| #define RANDOMX_JUMP | ||||
| 
 | ||||
|  | @ -230,20 +231,20 @@ namespace RandomX { | |||
| 		freePagedMemory(code, CodeSize); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgram(Program& prog) { | ||||
| 		generateProgramPrologue(prog); | ||||
| 	void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) { | ||||
| 		generateProgramPrologue(prog, pcfg); | ||||
| 		memcpy(code + codePos, codeReadDataset, readDatasetSize); | ||||
| 		codePos += readDatasetSize; | ||||
| 		generateProgramEpilogue(prog); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void JitCompilerX86::generateProgramLight(Program& prog) { | ||||
| 	void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) { | ||||
| 		if (RANDOMX_CACHE_ACCESSES != 8) | ||||
| 			throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); | ||||
| 		if (RANDOMX_ARGON_GROWTH != 0) | ||||
| 			throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); | ||||
| 		generateProgramPrologue(prog); | ||||
| 		generateProgramPrologue(prog, pcfg); | ||||
| 		if (superscalar) { | ||||
| 			emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); | ||||
| 			emitByte(CALL); | ||||
|  | @ -259,8 +260,8 @@ namespace RandomX { | |||
| 		generateProgramEpilogue(prog); | ||||
| 	} | ||||
| 
 | ||||
| 	template void JitCompilerX86::generateProgramLight<true>(Program& prog); | ||||
| 	template void JitCompilerX86::generateProgramLight<false>(Program& prog); | ||||
| 	template void JitCompilerX86::generateProgramLight<true>(Program& prog, ProgramConfiguration& pcfg); | ||||
| 	template void JitCompilerX86::generateProgramLight<false>(Program& prog, ProgramConfiguration& pcfg); | ||||
| 
 | ||||
| 	template<size_t N> | ||||
| 	void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) { | ||||
|  | @ -298,33 +299,26 @@ namespace RandomX { | |||
| 		memcpy(code, codeDatasetInit, datasetInitSize); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramPrologue(Program& prog) { | ||||
| 	void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		instructionOffsets.clear(); | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| #endif | ||||
| 		auto addressRegisters = prog.getEntropy(12); | ||||
| 		uint32_t readReg0 = 0 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
| 		uint32_t readReg1 = 2 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
| 		uint32_t readReg2 = 4 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
| 		uint32_t readReg3 = 6 + (addressRegisters & 1); | ||||
| 		codePos = prologueSize; | ||||
| 		memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask)); | ||||
| 		emit(REX_XOR_RAX_R64); | ||||
| 		emitByte(0xc0 + readReg0); | ||||
| 		emitByte(0xc0 + pcfg.readReg0); | ||||
| 		emit(REX_XOR_RAX_R64); | ||||
| 		emitByte(0xc0 + readReg1); | ||||
| 		emitByte(0xc0 + pcfg.readReg1); | ||||
| 		memcpy(code + codePos, codeLoopLoad, loopLoadSize); | ||||
| 		codePos += loopLoadSize; | ||||
| 		generateCode(prog); | ||||
| 		emit(REX_MOV_RR); | ||||
| 		emitByte(0xc0 + readReg2); | ||||
| 		emitByte(0xc0 + pcfg.readReg2); | ||||
| 		emit(REX_XOR_EAX); | ||||
| 		emitByte(0xc0 + readReg3); | ||||
| 		emitByte(0xc0 + pcfg.readReg3); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramEpilogue(Program& prog) { | ||||
|  |  | |||
|  | @ -27,6 +27,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	class Program; | ||||
| 	class ProgramConfiguration; | ||||
| 	class SuperscalarProgram; | ||||
| 	class JitCompilerX86; | ||||
| 
 | ||||
|  | @ -38,9 +39,9 @@ namespace RandomX { | |||
| 	public: | ||||
| 		JitCompilerX86(); | ||||
| 		~JitCompilerX86(); | ||||
| 		void generateProgram(Program&); | ||||
| 		void generateProgram(Program&, ProgramConfiguration&); | ||||
| 		template<bool superscalar> | ||||
| 		void generateProgramLight(Program&); | ||||
| 		void generateProgramLight(Program&, ProgramConfiguration&); | ||||
| 		template<size_t N> | ||||
| 		void generateSuperScalarHash(SuperscalarProgram (&programs)[N]); | ||||
| 		ProgramFunc getProgramFunc() { | ||||
|  | @ -73,7 +74,7 @@ namespace RandomX { | |||
| 
 | ||||
| 		void generateDatasetInitCode(); | ||||
| 
 | ||||
| 		void generateProgramPrologue(Program&); | ||||
| 		void generateProgramPrologue(Program&, ProgramConfiguration&); | ||||
| 		void generateProgramEpilogue(Program&); | ||||
| 		int getConditionRegister(); | ||||
| 		void genAddressReg(Instruction&, bool); | ||||
|  |  | |||
|  | @ -27,6 +27,11 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	struct ProgramConfiguration { | ||||
| 		uint64_t eMask[2]; | ||||
| 		uint32_t readReg0, readReg1, readReg2, readReg3; | ||||
| 	}; | ||||
| 
 | ||||
| 	class Program { | ||||
| 	public: | ||||
| 		Instruction& operator()(int pc) { | ||||
|  |  | |||
|  | @ -77,14 +77,18 @@ namespace RandomX { | |||
| 		mem.ma = program.getEntropy(8) & CacheLineAlignMask; | ||||
| 		mem.mx = program.getEntropy(10); | ||||
| 		auto addressRegisters = program.getEntropy(12); | ||||
| 		readReg0 = 0 + (addressRegisters & 1); | ||||
| 		config.readReg0 = 0 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
| 		readReg1 = 2 + (addressRegisters & 1); | ||||
| 		config.readReg1 = 2 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
| 		readReg2 = 4 + (addressRegisters & 1); | ||||
| 		config.readReg2 = 4 + (addressRegisters & 1); | ||||
| 		addressRegisters >>= 1; | ||||
| 		readReg3 = 6 + (addressRegisters & 1); | ||||
| 		datasetBase = program.getEntropy(14) % datasetRange; | ||||
| 		config.readReg3 = 6 + (addressRegisters & 1); | ||||
| 		datasetBase = program.getEntropy(13) % datasetRange; | ||||
| 		constexpr uint64_t mask22bit = (1ULL << 22) - 1; | ||||
| 		constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>(); | ||||
| 		store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240); | ||||
| 		store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
|  |  | |||
|  | @ -46,9 +46,9 @@ namespace RandomX { | |||
| 	protected: | ||||
| 		alignas(64) Program program; | ||||
| 		alignas(64) RegisterFile reg; | ||||
| 		alignas(16) ProgramConfiguration config; | ||||
| 		MemoryRegisters mem; | ||||
| 		uint8_t* scratchpad; | ||||
| 		uint32_t readReg0, readReg1, readReg2, readReg3; | ||||
| 		uint32_t datasetRange; | ||||
| 		uint32_t datasetBase; | ||||
| 	}; | ||||
|  |  | |||
|  | @ -311,6 +311,12 @@ inline __m128d load_cvt_i32x2(const void* addr) { | |||
| 	return _mm_cvtepi32_pd(ix); | ||||
| } | ||||
| 
 | ||||
| template<int E> | ||||
| constexpr uint64_t ieee_get_exponent_mask() { | ||||
| 	static_assert(E > -1023, "Invalid exponent value"); | ||||
| 	return (uint64_t)(E + 1023U) << 52; | ||||
| } | ||||
| 
 | ||||
| template<int E> | ||||
| __m128d ieee_set_exponent(__m128d x) { | ||||
| 	static_assert(E > -1023, "Invalid exponent value"); | ||||
|  |  | |||
|  | @ -396,7 +396,7 @@ int main(int argc, char** argv) { | |||
| 		std::cout << "Calculated result: "; | ||||
| 		result.print(std::cout); | ||||
| 		if(!legacy && programCount == 1000) | ||||
| 		std::cout << "Reference result:  af72d8069bd95ef04b414d3a83772c7bd2df454940bad15ae0b48543aeef8ab2" << std::endl; | ||||
| 		std::cout << "Reference result:  630ad3bc7f44fe8386462d7b671fa2a1167d3e062bfb9a2967f64832760cfedb" << std::endl; | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; | ||||
| 		} | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue