mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	SuperscalarHash interpreter
Linux assembly code
This commit is contained in:
		
							parent
							
								
									b4c02051fa
								
							
						
					
					
						commit
						2132e5fef5
					
				
					 11 changed files with 310 additions and 74 deletions
				
			
		
							
								
								
									
										5
									
								
								makefile
									
										
									
									
									
								
							
							
						
						
									
										5
									
								
								makefile
									
										
									
									
									
								
							|  | @ -9,7 +9,7 @@ OBJDIR=obj | |||
| LDFLAGS=-lpthread | ||||
| CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp  src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c | ||||
| TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) | ||||
| ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o) | ||||
| ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o LightProgramGenerator.o) | ||||
| ifeq ($(PLATFORM),amd64) | ||||
|     ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o | ||||
|     CXXFLAGS += -maes | ||||
|  | @ -99,6 +99,9 @@ $(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtual | |||
| 
 | ||||
| $(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@ | ||||
| 
 | ||||
| $(OBJDIR)/LightProgramGenerator.o: $(addprefix $(SRCDIR)/,LightProgramGenerator.cpp LightProgramGenerator.hpp Program.hpp blake2/blake2.h blake2/endian.h configuration.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightProgramGenerator.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h configuration.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@ | ||||
|  |  | |||
|  | @ -30,7 +30,7 @@ namespace RandomX { | |||
| 	typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const; | ||||
| 
 | ||||
| 	namespace InstructionType { | ||||
| 		constexpr int IADD_R = 0; | ||||
| 		constexpr int IADD_RS = 0; | ||||
| 		constexpr int IADD_M = 1; | ||||
| 		constexpr int IADD_RC = 2; | ||||
| 		constexpr int ISUB_R = 3; | ||||
|  |  | |||
|  | @ -36,6 +36,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #ifdef STATS | ||||
| #include <algorithm> | ||||
| #endif | ||||
| #include "LightProgramGenerator.hpp" | ||||
| 
 | ||||
| #ifdef FPUCHECK | ||||
| constexpr bool fpuCheck = true; | ||||
|  | @ -45,17 +46,20 @@ constexpr bool fpuCheck = false; | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	InterpretedVirtualMachine::~InterpretedVirtualMachine() { | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		readDataset = &datasetReadLight; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
| 		if(superscalar) | ||||
| 			precompileSuperscalar(programs); | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::initialize() { | ||||
| 	template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::initialize() { | ||||
| 		VirtualMachine::initialize(); | ||||
| 		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { | ||||
| 			program(i).src %= RegistersCount; | ||||
|  | @ -63,12 +67,19 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 	template void InterpretedVirtualMachine<true>::initialize(); | ||||
| 	template void InterpretedVirtualMachine<false>::initialize(); | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) { | ||||
| 			executeBytecode(ic, r, f, e, a); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template void InterpretedVirtualMachine<true>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 	template void InterpretedVirtualMachine<false>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 
 | ||||
| 	static void print(int_reg_t r) { | ||||
| 		std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl; | ||||
| 	} | ||||
|  | @ -98,14 +109,15 @@ namespace RandomX { | |||
| 		return std::fpclassify(x) == FP_SUBNORMAL; | ||||
| 	} | ||||
| 
 | ||||
| 	FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 	 template<bool superscalar> | ||||
| 	 FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[ic]; | ||||
| 		if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); | ||||
| 		//if(trace) printState(r, f, e, a);
 | ||||
| 		switch (ibc.type) | ||||
| 		{ | ||||
| 			case InstructionType::IADD_R: { | ||||
| 				*ibc.idst += *ibc.isrc; | ||||
| 			case InstructionType::IADD_RS: { | ||||
| 				*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::IADD_M: { | ||||
|  | @ -289,7 +301,8 @@ namespace RandomX { | |||
| #endif | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::execute() { | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::execute() { | ||||
| 		int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; | ||||
| 		__m128d f[4]; | ||||
| 		__m128d e[4]; | ||||
|  | @ -350,11 +363,16 @@ namespace RandomX { | |||
| 
 | ||||
| 			mem.mx ^= r[readReg2] ^ r[readReg3]; | ||||
| 			mem.mx &= CacheLineAlignMask; | ||||
| 			Cache& cache = mem.ds.cache; | ||||
| 			uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; | ||||
| 			initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 			for (int i = 0; i < RegistersCount; ++i) | ||||
| 				r[i] ^= datasetLine[i]; | ||||
| 			if (superscalar) { | ||||
| 				executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r); | ||||
| 			} | ||||
| 			else { | ||||
| 				Cache& cache = mem.ds.cache; | ||||
| 				uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; | ||||
| 				initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 				for (int i = 0; i < RegistersCount; ++i) | ||||
| 					r[i] ^= datasetLine[i]; | ||||
| 			} | ||||
| 			std::swap(mem.mx, mem.ma); | ||||
| 
 | ||||
| 			if (trace) { | ||||
|  | @ -419,6 +437,9 @@ namespace RandomX { | |||
| 		_mm_store_pd(®.e[3].lo, e[3]); | ||||
| 	} | ||||
| 
 | ||||
| 	template void InterpretedVirtualMachine<true>::execute(); | ||||
| 	template void InterpretedVirtualMachine<false>::execute(); | ||||
| 
 | ||||
| 	static int getConditionRegister(int(®isterUsage)[8]) { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
|  | @ -431,9 +452,118 @@ namespace RandomX { | |||
| 		return minIndex; | ||||
| 	} | ||||
| 
 | ||||
| 	constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; | ||||
| 	constexpr uint64_t superscalarAdd1 = 9298410992540426048ULL; | ||||
| 	constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; | ||||
| 	constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; | ||||
| 	constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; | ||||
| 	constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; | ||||
| 	constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; | ||||
| 	constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; | ||||
| 
 | ||||
| 	static uint8_t* getMixBlock(uint64_t registerValue, Cache& cache) { | ||||
| 		uint8_t* mixBlock; | ||||
| 		if (RANDOMX_ARGON_GROWTH == 0) { | ||||
| 			constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); | ||||
| 			mixBlock = cache.memory + (registerValue & mask) * CacheLineSize; | ||||
| 		} | ||||
| 		else { | ||||
| 			const uint32_t modulus = cache.size / CacheLineSize; | ||||
| 			mixBlock = cache.memory + (registerValue % modulus) * CacheLineSize; | ||||
| 		} | ||||
| 		return mixBlock; | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]) { | ||||
| 		int_reg_t rl[8]; | ||||
| 		uint8_t* mixBlock; | ||||
| 		uint64_t registerValue = blockNumber; | ||||
| 		rl[0] = (blockNumber + 1) * superscalarMul0; | ||||
| 		rl[1] = rl[0] ^ superscalarAdd1; | ||||
| 		rl[2] = rl[0] ^ superscalarAdd2; | ||||
| 		rl[3] = rl[0] ^ superscalarAdd3; | ||||
| 		rl[4] = rl[0] ^ superscalarAdd4; | ||||
| 		rl[5] = rl[0] ^ superscalarAdd5; | ||||
| 		rl[6] = rl[0] ^ superscalarAdd6; | ||||
| 		rl[7] = rl[0] ^ superscalarAdd7; | ||||
| 		Cache& cache = mem.ds.cache; | ||||
| 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
| 			mixBlock = getMixBlock(registerValue, cache); | ||||
| 			LightProgram& prog = superScalarPrograms[i]; | ||||
| 			for (unsigned j = 0; j < prog.getSize(); ++j) { | ||||
| 				Instruction& instr = prog(j); | ||||
| 				switch (instr.opcode) | ||||
| 				{ | ||||
| 					case RandomX::LightInstructionType::ISUB_R: | ||||
| 						rl[instr.dst] -= rl[instr.src]; | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IXOR_R: | ||||
| 						rl[instr.dst] ^= rl[instr.src]; | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IADD_RS: | ||||
| 						rl[instr.dst] += rl[instr.src] << (instr.mod % 4); | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IMUL_R: | ||||
| 						rl[instr.dst] *= rl[instr.src]; | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IROR_C: | ||||
| 						rl[instr.dst] = rotr(rl[instr.dst], instr.getImm32()); | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IADD_C7: | ||||
| 					case RandomX::LightInstructionType::IADD_C8: | ||||
| 					case RandomX::LightInstructionType::IADD_C9: | ||||
| 						rl[instr.dst] += signExtend2sCompl(instr.getImm32()); | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IXOR_C7: | ||||
| 					case RandomX::LightInstructionType::IXOR_C8: | ||||
| 					case RandomX::LightInstructionType::IXOR_C9: | ||||
| 						rl[instr.dst] ^= signExtend2sCompl(instr.getImm32()); | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IMULH_R: | ||||
| 						rl[instr.dst] = mulh(rl[instr.dst], rl[instr.src]); | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::ISMULH_R: | ||||
| 						rl[instr.dst] = smulh(rl[instr.dst], rl[instr.src]); | ||||
| 						break; | ||||
| 					case RandomX::LightInstructionType::IMUL_RCP: | ||||
| 						rl[instr.dst] *= reciprocals[instr.getImm32()]; | ||||
| 						break; | ||||
| 					default: | ||||
| 						UNREACHABLE; | ||||
| 				} | ||||
| 			} | ||||
| 			 | ||||
| 			for(unsigned q = 0; q < 8; ++q) | ||||
| 				rl[q] ^= load64(mixBlock + 8 * q); | ||||
| 
 | ||||
| 			registerValue = rl[prog.getAddressRegister()]; | ||||
| 		} | ||||
| 
 | ||||
| 		for (unsigned q = 0; q < 8; ++q) | ||||
| 			r[q] ^= rl[q]; | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(LightProgram* programs) { | ||||
| 		memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms)); | ||||
| 		reciprocals.clear(); | ||||
| 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
| 			for (unsigned j = 0; j < superScalarPrograms[i].getSize(); ++j) { | ||||
| 				Instruction& instr = superScalarPrograms[i](j); | ||||
| 				if (instr.opcode == LightInstructionType::IMUL_RCP) { | ||||
| 					auto rcp = reciprocal(instr.getImm32()); | ||||
| 					instr.setImm32(reciprocals.size()); | ||||
| 					reciprocals.push_back(rcp); | ||||
| 				}	 | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		int registerUsage[8]; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
|  | @ -445,14 +575,17 @@ namespace RandomX { | |||
| 				CASE_REP(IADD_RS) { | ||||
| 					auto dst = instr.dst % RegistersCount; | ||||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::IADD_R; | ||||
| 					ibc.type = InstructionType::IADD_RS; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					if (src != dst) { | ||||
| 					if (dst != 5) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.shift = instr.mod % 4; | ||||
| 						ibc.imm = 0; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.shift = instr.mod % 4; | ||||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 				} break; | ||||
|  |  | |||
|  | @ -23,23 +23,17 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "VirtualMachine.hpp" | ||||
| #include "Program.hpp" | ||||
| #include "intrinPortable.h" | ||||
| #include <vector> | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	class ITransform { | ||||
| 	public: | ||||
| 		virtual int32_t apply(int32_t) const = 0; | ||||
| 		virtual const char* getName() const = 0; | ||||
| 		virtual std::ostream& printAsm(std::ostream&) const = 0; | ||||
| 		virtual std::ostream& printCxx(std::ostream&) const = 0; | ||||
| 	}; | ||||
| 
 | ||||
| 	struct InstructionByteCode; | ||||
| 	class InterpretedVirtualMachine; | ||||
| 	template<bool superscalar> class InterpretedVirtualMachine; | ||||
| 
 | ||||
| 	typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&); | ||||
| 	template<bool superscalar> | ||||
| 	using InstructionHandler = void(InterpretedVirtualMachine<superscalar>::*)(Instruction&); | ||||
| 
 | ||||
| 	struct alignas(8) InstructionByteCode { | ||||
| 	struct InstructionByteCode { | ||||
| 		union { | ||||
| 			int_reg_t* idst; | ||||
| 			__m128d* fdst; | ||||
|  | @ -62,6 +56,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	constexpr int asedwfagdewsa = sizeof(InstructionByteCode); | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	class InterpretedVirtualMachine : public VirtualMachine { | ||||
| 	public: | ||||
| 		void* operator new(size_t size) { | ||||
|  | @ -74,16 +69,17 @@ namespace RandomX { | |||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 		InterpretedVirtualMachine(bool soft) : softAes(soft) {} | ||||
| 		~InterpretedVirtualMachine(); | ||||
| 		~InterpretedVirtualMachine() {} | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 		void execute() override; | ||||
| 	private: | ||||
| 		static InstructionHandler engine[256]; | ||||
| 		static InstructionHandler<superscalar> engine[256]; | ||||
| 		DatasetReadFunc readDataset; | ||||
| 		bool softAes; | ||||
| 		InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; | ||||
| 		 | ||||
| 		std::vector<uint64_t> reciprocals; | ||||
| 		alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES]; | ||||
| #ifdef STATS | ||||
| 		int count_ADD_64 = 0; | ||||
| 		int count_ADD_32 = 0; | ||||
|  | @ -131,7 +127,9 @@ namespace RandomX { | |||
| 		int datasetAccess[256] = { 0 }; | ||||
| #endif | ||||
| 		void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void precompileSuperscalar(LightProgram*); | ||||
| 		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); | ||||
| 	}; | ||||
| } | ||||
|  | @ -32,10 +32,18 @@ | |||
| .global DECL(randomx_program_start) | ||||
| .global DECL(randomx_program_read_dataset) | ||||
| .global DECL(randomx_program_read_dataset_light) | ||||
| .global DECL(randomx_program_read_dataset_sshash_init) | ||||
| .global DECL(randomx_program_read_dataset_sshash_fin) | ||||
| .global DECL(randomx_program_read_dataset_light_sub) | ||||
| .global DECL(randomx_dataset_init) | ||||
| .global DECL(randomx_program_loop_store) | ||||
| .global DECL(randomx_program_loop_end) | ||||
| .global DECL(randomx_program_read_dataset_light_sub) | ||||
| .global DECL(randomx_program_epilogue) | ||||
| .global DECL(randomx_sshash_load) | ||||
| .global DECL(randomx_sshash_prefetch) | ||||
| .global DECL(randomx_sshash_end) | ||||
| .global DECL(randomx_sshash_init) | ||||
| .global DECL(randomx_program_end) | ||||
| 
 | ||||
| #define db .byte | ||||
|  | @ -63,6 +71,12 @@ DECL(randomx_program_read_dataset): | |||
| DECL(randomx_program_read_dataset_light): | ||||
| 	#include "asm/program_read_dataset_light.inc" | ||||
| 
 | ||||
| DECL(randomx_program_read_dataset_sshash_init): | ||||
| 	#include "asm/program_read_dataset_sshash_init.inc" | ||||
| 
 | ||||
| DECL(randomx_program_read_dataset_sshash_fin): | ||||
| 	#include "asm/program_read_dataset_sshash_fin.inc" | ||||
| 
 | ||||
| DECL(randomx_program_loop_store): | ||||
| 	#include "asm/program_loop_store.inc" | ||||
| 
 | ||||
|  | @ -75,10 +89,84 @@ DECL(randomx_program_read_dataset_light_sub): | |||
| squareHashSub: | ||||
| 	#include "asm/squareHash.inc" | ||||
| 
 | ||||
| .balign 64
 | ||||
| DECL(randomx_dataset_init): | ||||
| 	push rbx | ||||
| 	push rbp | ||||
| 	push r12 | ||||
| 	push r13 | ||||
| 	push r14 | ||||
| 	push r15 | ||||
| 	;# cache in rdi
 | ||||
| 	;# dataset in rsi
 | ||||
| 	mov rbp, rdx  ;# block index
 | ||||
| 	push rcx      ;# max. block index
 | ||||
| init_block_loop: | ||||
| 	prefetchw byte ptr [rsi] | ||||
| 	mov rbx, rbp | ||||
| 	.byte 232 ;# 0xE8 = call
 | ||||
| 	;# .set CALL_LOC, 
 | ||||
| 	.int 32768 - (call_offset - DECL(randomx_dataset_init)) | ||||
| call_offset: | ||||
| 	mov qword ptr [rsi+0], r8 | ||||
| 	mov qword ptr [rsi+8], r9 | ||||
| 	mov qword ptr [rsi+16], r10 | ||||
| 	mov qword ptr [rsi+24], r11 | ||||
| 	mov qword ptr [rsi+32], r12 | ||||
| 	mov qword ptr [rsi+40], r13 | ||||
| 	mov qword ptr [rsi+48], r14 | ||||
| 	mov qword ptr [rsi+56], r15 | ||||
| 	add rbp, 1 | ||||
| 	add rsi, 64 | ||||
| 	cmp rbp, qword ptr [rsp] | ||||
| 	jb init_block_loop | ||||
| 	pop rcx | ||||
| 	pop r15 | ||||
| 	pop r14 | ||||
| 	pop r13 | ||||
| 	pop r12 | ||||
| 	pop rbp | ||||
| 	pop rbx | ||||
| 	ret | ||||
| 
 | ||||
| .balign 64
 | ||||
| DECL(randomx_program_epilogue): | ||||
| 	#include "asm/program_epilogue_linux.inc" | ||||
| 
 | ||||
| .balign 64
 | ||||
| DECL(randomx_sshash_load): | ||||
| 	#include "asm/program_sshash_load.inc" | ||||
| 
 | ||||
| DECL(randomx_sshash_prefetch): | ||||
| 	#include "asm/program_sshash_prefetch.inc" | ||||
| 
 | ||||
| DECL(randomx_sshash_end): | ||||
| 	nop | ||||
| 
 | ||||
| .balign 64
 | ||||
| DECL(randomx_sshash_init): | ||||
| 	lea r8, [rbx+1] | ||||
| 	#include "asm/program_sshash_prefetch.inc" | ||||
| 	imul r8, qword ptr r0_mul[rip] | ||||
| 	mov r9, qword ptr r1_add[rip] | ||||
| 	xor r9, r8 | ||||
| 	mov r10, qword ptr r2_add[rip] | ||||
| 	xor r10, r8 | ||||
| 	mov r11, qword ptr r3_add[rip] | ||||
| 	xor r11, r8 | ||||
| 	mov r12, qword ptr r4_add[rip] | ||||
| 	xor r12, r8 | ||||
| 	mov r13, qword ptr r5_add[rip] | ||||
| 	xor r13, r8 | ||||
| 	mov r14, qword ptr r6_add[rip] | ||||
| 	xor r14, r8 | ||||
| 	mov r15, qword ptr r7_add[rip] | ||||
| 	xor r15, r8 | ||||
| 	jmp DECL(randomx_program_end) | ||||
| 
 | ||||
| .balign 64
 | ||||
| 	#include "asm/program_sshash_constants.inc" | ||||
| 	 | ||||
| .balign 64
 | ||||
| DECL(randomx_program_end): | ||||
| 	nop | ||||
|  |  | |||
|  | @ -68,35 +68,11 @@ randomx_program_read_dataset_light PROC | |||
| randomx_program_read_dataset_light ENDP | ||||
| 
 | ||||
| randomx_program_read_dataset_sshash_init PROC | ||||
| 	sub rsp, 72 | ||||
| 	mov qword ptr [rsp+64], rbx | ||||
| 	mov qword ptr [rsp+56], r8 | ||||
| 	mov qword ptr [rsp+48], r9 | ||||
| 	mov qword ptr [rsp+40], r10 | ||||
| 	mov qword ptr [rsp+32], r11 | ||||
| 	mov qword ptr [rsp+24], r12 | ||||
| 	mov qword ptr [rsp+16], r13 | ||||
| 	mov qword ptr [rsp+8], r14 | ||||
| 	mov qword ptr [rsp+0], r15 | ||||
| 	xor rbp, rax                       ;# modify "mx" | ||||
| 	ror rbp, 32                        ;# swap "ma" and "mx" | ||||
| 	mov ebx, ebp                       ;# ecx = ma | ||||
| 	and ebx, 2147483584                ;# align "ma" to the start of a cache line | ||||
| 	shr ebx, 6                         ;# ebx = Dataset block number | ||||
| 	;# call 32768 | ||||
| 	include asm/program_read_dataset_sshash_init.inc | ||||
| randomx_program_read_dataset_sshash_init ENDP | ||||
| 
 | ||||
| randomx_program_read_dataset_sshash_fin PROC | ||||
| 	mov rbx, qword ptr [rsp+64] | ||||
| 	xor r8, qword ptr [rsp+56] | ||||
| 	xor r9, qword ptr [rsp+48] | ||||
| 	xor r10, qword ptr [rsp+40] | ||||
| 	xor r11, qword ptr [rsp+32] | ||||
| 	xor r12, qword ptr [rsp+24] | ||||
| 	xor r13, qword ptr [rsp+16] | ||||
| 	xor r14, qword ptr [rsp+8] | ||||
| 	xor r15, qword ptr [rsp+0] | ||||
| 	add rsp, 72 | ||||
| 	include asm/program_read_dataset_sshash_fin.inc | ||||
| randomx_program_read_dataset_sshash_fin ENDP | ||||
| 
 | ||||
| randomx_program_loop_store PROC | ||||
|  |  | |||
|  | @ -17,10 +17,11 @@ You should have received a copy of the GNU General Public License | |||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include <stddef.h> | ||||
| #include "blake2/blake2.h" | ||||
| #include "configuration.h" | ||||
| #include "Program.hpp" | ||||
| #include "blake2/endian.h"; | ||||
| #include "blake2/endian.h" | ||||
| #include <iostream> | ||||
| #include <vector> | ||||
| #include <algorithm> | ||||
|  | @ -793,7 +794,7 @@ namespace RandomX { | |||
| 				mop.setCycle(scheduleCycle); | ||||
| 				if (scheduleCycle < 0) { | ||||
| 					if (TRACE) std::cout << "; Failed at cycle " << cycle << std::endl; | ||||
| 					return DBL_MIN; | ||||
| 					return 0; | ||||
| 				} | ||||
| 
 | ||||
| 				if (instrIndex == currentInstruction.getInfo().getSrcOp()) { | ||||
|  |  | |||
							
								
								
									
										10
									
								
								src/asm/program_read_dataset_sshash_fin.inc
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								src/asm/program_read_dataset_sshash_fin.inc
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| 	mov rbx, qword ptr [rsp+64] | ||||
| 	xor r8, qword ptr [rsp+56] | ||||
| 	xor r9, qword ptr [rsp+48] | ||||
| 	xor r10, qword ptr [rsp+40] | ||||
| 	xor r11, qword ptr [rsp+32] | ||||
| 	xor r12, qword ptr [rsp+24] | ||||
| 	xor r13, qword ptr [rsp+16] | ||||
| 	xor r14, qword ptr [rsp+8] | ||||
| 	xor r15, qword ptr [rsp+0] | ||||
| 	add rsp, 72 | ||||
							
								
								
									
										16
									
								
								src/asm/program_read_dataset_sshash_init.inc
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								src/asm/program_read_dataset_sshash_init.inc
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,16 @@ | |||
| 	sub rsp, 72 | ||||
| 	mov qword ptr [rsp+64], rbx | ||||
| 	mov qword ptr [rsp+56], r8 | ||||
| 	mov qword ptr [rsp+48], r9 | ||||
| 	mov qword ptr [rsp+40], r10 | ||||
| 	mov qword ptr [rsp+32], r11 | ||||
| 	mov qword ptr [rsp+24], r12 | ||||
| 	mov qword ptr [rsp+16], r13 | ||||
| 	mov qword ptr [rsp+8], r14 | ||||
| 	mov qword ptr [rsp+0], r15 | ||||
| 	xor rbp, rax                       ;# modify "mx"
 | ||||
| 	ror rbp, 32                        ;# swap "ma" and "mx"
 | ||||
| 	mov ebx, ebp                       ;# ecx = ma
 | ||||
| 	and ebx, 2147483584                ;# align "ma" to the start of a cache line
 | ||||
| 	shr ebx, 6                         ;# ebx = Dataset block number
 | ||||
| 	;# call 32768
 | ||||
|  | @ -1,16 +1,24 @@ | |||
| r0_mul: ;# 6364136223846793005
 | ||||
| r0_mul: | ||||
| 	;#/ 6364136223846793005
 | ||||
| 	db 45, 127, 149, 76, 45, 244, 81, 88 | ||||
| r1_add: ;# 9298410992540426048
 | ||||
| r1_add: | ||||
| 	;#/ 9298410992540426048
 | ||||
| 	db 64, 159, 245, 89, 136, 151, 10, 129 | ||||
| r2_add: ;# 12065312585734608966
 | ||||
| r2_add: | ||||
| 	;#/ 12065312585734608966
 | ||||
| 	db 70, 216, 194, 56, 223, 153, 112, 167 | ||||
| r3_add: ;# 9306329213124610396
 | ||||
| r3_add: | ||||
| 	;#/ 9306329213124610396
 | ||||
| 	db 92, 9, 34, 191, 28, 185, 38, 129 | ||||
| r4_add: ;# 5281919268842080866
 | ||||
| r4_add: | ||||
| 	;#/ 5281919268842080866
 | ||||
| 	db 98, 138, 159, 23, 151, 37, 77, 73 | ||||
| r5_add: ;# 10536153434571861004
 | ||||
| r5_add: | ||||
| 	;#/ 10536153434571861004
 | ||||
| 	db 12, 236, 170, 206, 185, 239, 55, 146 | ||||
| r6_add: ;# 3398623926847679864
 | ||||
| r6_add: | ||||
| 	;#/ 3398623926847679864
 | ||||
| 	db 120, 45, 230, 108, 116, 86, 42, 47 | ||||
| r7_add: ;# 9549104520008361294
 | ||||
| r7_add: | ||||
| 	;#/ 9549104520008361294
 | ||||
| 	db 78, 229, 44, 182, 247, 59, 133, 132 | ||||
|  | @ -301,6 +301,7 @@ int main(int argc, char** argv) { | |||
| 				RandomX::JitCompilerX86 jit86; | ||||
| 				jit86.generateSuperScalarHash(programs); | ||||
| 				jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount); | ||||
| 			//dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat");
 | ||||
| 			} | ||||
| 			else { | ||||
| 				if (initThreadCount > 1) { | ||||
|  | @ -331,10 +332,12 @@ int main(int argc, char** argv) { | |||
| 			else { | ||||
| 				if (jit && useSuperscalar) | ||||
| 					vm = new RandomX::CompiledLightVirtualMachine<true>(); | ||||
| 				else if(jit) | ||||
| 				else if (jit) | ||||
| 					vm = new RandomX::CompiledLightVirtualMachine<false>(); | ||||
| 				else if (useSuperscalar) | ||||
| 					vm = new RandomX::InterpretedVirtualMachine<true>(softAes); | ||||
| 				else | ||||
| 					vm = new RandomX::InterpretedVirtualMachine(softAes); | ||||
| 					vm = new RandomX::InterpretedVirtualMachine<false>(softAes); | ||||
| 			} | ||||
| 			vm->setDataset(dataset, datasetSize, programs); | ||||
| 			vms.push_back(vm); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue