mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Reworked SuperscalarHash instruction set
ASM and C code generator for SuperscalarHash Support for Superscalar hash in the light mode
This commit is contained in:
		
							parent
							
								
									6e3136b37f
								
							
						
					
					
						commit
						b4c02051fa
					
				
					 16 changed files with 505 additions and 212 deletions
				
			
		|  | @ -23,6 +23,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "common.hpp" | ||||
| #include "reciprocal.h" | ||||
| #include "Program.hpp" | ||||
| #include "./LightProgramGenerator.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
|  | @ -46,6 +47,179 @@ namespace RandomX { | |||
| 	static const char* regDatasetAddr = "rdi"; | ||||
| 	static const char* regScratchpadAddr = "rsi"; | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateProgram(Program& prog) { | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| 			asmCode << "randomx_isn_" << i << ":" << std::endl; | ||||
| 			Instruction& instr = prog(i); | ||||
| 			instr.src %= RegistersCount; | ||||
| 			instr.dst %= RegistersCount; | ||||
| 			generateCode(instr, i); | ||||
| 			//asmCode << std::endl;
 | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateAsm(LightProgram& prog) { | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		asmCode << "ALIGN 16" << std::endl; | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| 			Instruction& instr = prog(i); | ||||
| 			switch (instr.opcode) | ||||
| 			{ | ||||
| 			case RandomX::LightInstructionType::ISUB_R: | ||||
| 				asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IXOR_R: | ||||
| 				asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IADD_RS: | ||||
| 				asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IMUL_R: | ||||
| 				asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IROR_C: | ||||
| 				asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IADD_C7: | ||||
| 				asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IXOR_C7: | ||||
| 				asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IADD_C8: | ||||
| 				asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 				asmCode << "nop" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IXOR_C8: | ||||
| 				asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 				asmCode << "nop" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IADD_C9: | ||||
| 				asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 				asmCode << "xchg ax, ax ;nop" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IXOR_C9: | ||||
| 				asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 				asmCode << "xchg ax, ax ;nop" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IMULH_R: | ||||
| 				asmCode << "mov rax, " << regR[instr.dst] << std::endl; | ||||
| 				asmCode << "mul " << regR[instr.src] << std::endl; | ||||
| 				asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::ISMULH_R: | ||||
| 				asmCode << "mov rax, " << regR[instr.dst] << std::endl; | ||||
| 				asmCode << "imul " << regR[instr.src] << std::endl; | ||||
| 				asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IMUL_RCP: | ||||
| 				asmCode << "mov rax, " << (int64_t)reciprocal(instr.getImm32()) << std::endl; | ||||
| 				asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl; | ||||
| 				break; | ||||
| 			default: | ||||
| 				UNREACHABLE; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateC(LightProgram& prog) { | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		asmCode << "#include <stdint.h>" << std::endl; | ||||
| 		asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl; | ||||
| 		asmCode << "	static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; | ||||
| 		asmCode << "		return ((unsigned __int128)a * b) >> 64;" << std::endl; | ||||
| 		asmCode << "	}" << std::endl; | ||||
| 		asmCode << "	static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; | ||||
| 		asmCode << "		return ((__int128)a * b) >> 64;" << std::endl; | ||||
| 		asmCode << "	}" << std::endl; | ||||
| 		asmCode << "	#define HAVE_MULH" << std::endl; | ||||
| 		asmCode << "	#define HAVE_SMULH" << std::endl; | ||||
| 		asmCode << "#endif" << std::endl; | ||||
| 		asmCode << "#if defined(_MSC_VER)" << std::endl; | ||||
| 		asmCode << "	#define HAS_VALUE(X) X ## 0" << std::endl; | ||||
| 		asmCode << "	#define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl; | ||||
| 		asmCode << "	#include <intrin.h>" << std::endl; | ||||
| 		asmCode << "	#include <stdlib.h>" << std::endl; | ||||
| 		asmCode << "	static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl; | ||||
| 		asmCode << "		return _rotr64(x, c);" << std::endl; | ||||
| 		asmCode << "	}" << std::endl; | ||||
| 		asmCode << "	#define HAVE_ROTR" << std::endl; | ||||
| 		asmCode << "	#if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl; | ||||
| 		asmCode << "		static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl; | ||||
| 		asmCode << "			return __umulh(a, b);" << std::endl; | ||||
| 		asmCode << "		}" << std::endl; | ||||
| 		asmCode << "		#define HAVE_MULH" << std::endl; | ||||
| 		asmCode << "	#endif" << std::endl; | ||||
| 		asmCode << "	#if EVAL_DEFINE(__MACHINEX64(1))" << std::endl; | ||||
| 		asmCode << "		static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl; | ||||
| 		asmCode << "			int64_t hi;" << std::endl; | ||||
| 		asmCode << "			_mul128(a, b, &hi);" << std::endl; | ||||
| 		asmCode << "			return hi;" << std::endl; | ||||
| 		asmCode << "		}" << std::endl; | ||||
| 		asmCode << "		#define HAVE_SMULH" << std::endl; | ||||
| 		asmCode << "	#endif" << std::endl; | ||||
| 		asmCode << "#endif" << std::endl; | ||||
| 		asmCode << "#ifndef HAVE_ROTR" << std::endl; | ||||
| 		asmCode << "	static inline uint64_t rotr(uint64_t a, int b) {" << std::endl; | ||||
| 		asmCode << "		return (a >> b) | (a << (64 - b));" << std::endl; | ||||
| 		asmCode << "	}" << std::endl; | ||||
| 		asmCode << "	#define HAVE_ROTR" << std::endl; | ||||
| 		asmCode << "#endif" << std::endl; | ||||
| 		asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl; | ||||
| 		asmCode << "	#error \"Required functions are not defined\"" << std::endl; | ||||
| 		asmCode << "#endif" << std::endl; | ||||
| 		asmCode << "void superScalar(uint64_t r[8]) {" << std::endl; | ||||
| 		asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl; | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| 			Instruction& instr = prog(i); | ||||
| 			switch (instr.opcode) | ||||
| 			{ | ||||
| 			case RandomX::LightInstructionType::ISUB_R: | ||||
| 				asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IXOR_R: | ||||
| 				asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IADD_RS: | ||||
| 				asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IMUL_R: | ||||
| 				asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IROR_C: | ||||
| 				asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IADD_C7: | ||||
| 			case RandomX::LightInstructionType::IADD_C8: | ||||
| 			case RandomX::LightInstructionType::IADD_C9: | ||||
| 				asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IXOR_C7: | ||||
| 			case RandomX::LightInstructionType::IXOR_C8: | ||||
| 			case RandomX::LightInstructionType::IXOR_C9: | ||||
| 				asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IMULH_R: | ||||
| 				asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::ISMULH_R: | ||||
| 				asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl; | ||||
| 				break; | ||||
| 			case RandomX::LightInstructionType::IMUL_RCP: | ||||
| 				asmCode << regR[instr.dst] << " *= " << (int64_t)reciprocal(instr.getImm32()) << ";" << std::endl; | ||||
| 				break; | ||||
| 			default: | ||||
| 				UNREACHABLE; | ||||
| 			} | ||||
| 		} | ||||
| 		asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl; | ||||
| 		asmCode << "}" << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	int AssemblyGeneratorX86::getConditionRegister() { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
|  |  | |||
|  | @ -27,27 +27,16 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	class Program; | ||||
| 	class LightProgram; | ||||
| 	class AssemblyGeneratorX86; | ||||
| 
 | ||||
| 	typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int); | ||||
| 
 | ||||
| 	class AssemblyGeneratorX86 { | ||||
| 	public: | ||||
| 		template<class P> | ||||
| 		void generateProgram(P& prog) { | ||||
| 			for (unsigned i = 0; i < 8; ++i) { | ||||
| 				registerUsage[i] = -1; | ||||
| 			} | ||||
| 			asmCode.str(std::string()); //clear
 | ||||
| 			for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| 				asmCode << "randomx_isn_" << i << ":" << std::endl; | ||||
| 				Instruction& instr = prog(i); | ||||
| 				instr.src %= RegistersCount; | ||||
| 				instr.dst %= RegistersCount; | ||||
| 				generateCode(instr, i); | ||||
| 				//asmCode << std::endl;
 | ||||
| 			} | ||||
| 		} | ||||
| 		void generateProgram(Program& prog); | ||||
| 		void generateAsm(LightProgram& prog); | ||||
| 		void generateC(LightProgram& prog); | ||||
| 		void printCode(std::ostream& os) { | ||||
| 			os << asmCode.rdbuf(); | ||||
| 		} | ||||
|  |  | |||
|  | @ -23,18 +23,25 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	CompiledLightVirtualMachine::CompiledLightVirtualMachine() { | ||||
| 	} | ||||
| 
 | ||||
| 	void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size) { | ||||
| 	template<bool superscalar> | ||||
| 	void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
| 		if(superscalar) | ||||
| 			compiler.generateSuperScalarHash(programs); | ||||
| 		//datasetBasePtr = ds.dataset.memory;
 | ||||
| 	} | ||||
| 
 | ||||
| 	void CompiledLightVirtualMachine::initialize() { | ||||
| 	template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void CompiledLightVirtualMachine<superscalar>::initialize() { | ||||
| 		VirtualMachine::initialize(); | ||||
| 		compiler.generateProgramLight(program); | ||||
| 		compiler.generateProgramLight<superscalar>(program); | ||||
| 		//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
 | ||||
| 	} | ||||
| 
 | ||||
| 	template void CompiledLightVirtualMachine<true>::initialize(); | ||||
| 	template void CompiledLightVirtualMachine<false>::initialize(); | ||||
| } | ||||
|  | @ -26,6 +26,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	class CompiledLightVirtualMachine : public CompiledVirtualMachine { | ||||
| 	public: | ||||
| 		void* operator new(size_t size) { | ||||
|  | @ -37,8 +38,8 @@ namespace RandomX { | |||
| 		void operator delete(void* ptr) { | ||||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 		CompiledLightVirtualMachine(); | ||||
| 		void setDataset(dataset_t ds, uint64_t size) override; | ||||
| 		CompiledLightVirtualMachine() {} | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 	}; | ||||
| } | ||||
|  | @ -29,7 +29,7 @@ namespace RandomX { | |||
| 	CompiledVirtualMachine::CompiledVirtualMachine() { | ||||
| 	} | ||||
| 
 | ||||
| 	void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size) { | ||||
| 	void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
| 		datasetBasePtr = ds.dataset.memory; | ||||
|  |  | |||
|  | @ -42,7 +42,7 @@ namespace RandomX { | |||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 		CompiledVirtualMachine(); | ||||
| 		void setDataset(dataset_t ds, uint64_t size) override; | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 		virtual void execute() override; | ||||
| 		void* getProgram() { | ||||
|  |  | |||
|  | @ -49,7 +49,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) { | ||||
| 	void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		readDataset = &datasetReadLight; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
|  |  | |||
|  | @ -75,7 +75,7 @@ namespace RandomX { | |||
| 		} | ||||
| 		InterpretedVirtualMachine(bool soft) : softAes(soft) {} | ||||
| 		~InterpretedVirtualMachine(); | ||||
| 		void setDataset(dataset_t ds, uint64_t size) override; | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 		void execute() override; | ||||
| 	private: | ||||
|  |  | |||
|  | @ -25,6 +25,8 @@ PUBLIC randomx_program_loop_load | |||
| PUBLIC randomx_program_start | ||||
| PUBLIC randomx_program_read_dataset | ||||
| PUBLIC randomx_program_read_dataset_light | ||||
| PUBLIC randomx_program_read_dataset_sshash_init | ||||
| PUBLIC randomx_program_read_dataset_sshash_fin | ||||
| PUBLIC randomx_program_read_dataset_light_sub | ||||
| PUBLIC randomx_dataset_init | ||||
| PUBLIC randomx_program_loop_store | ||||
|  | @ -65,6 +67,38 @@ randomx_program_read_dataset_light PROC | |||
| 	include asm/program_read_dataset_light.inc | ||||
| randomx_program_read_dataset_light ENDP | ||||
| 
 | ||||
| randomx_program_read_dataset_sshash_init PROC | ||||
| 	sub rsp, 72 | ||||
| 	mov qword ptr [rsp+64], rbx | ||||
| 	mov qword ptr [rsp+56], r8 | ||||
| 	mov qword ptr [rsp+48], r9 | ||||
| 	mov qword ptr [rsp+40], r10 | ||||
| 	mov qword ptr [rsp+32], r11 | ||||
| 	mov qword ptr [rsp+24], r12 | ||||
| 	mov qword ptr [rsp+16], r13 | ||||
| 	mov qword ptr [rsp+8], r14 | ||||
| 	mov qword ptr [rsp+0], r15 | ||||
| 	xor rbp, rax                       ;# modify "mx" | ||||
| 	ror rbp, 32                        ;# swap "ma" and "mx" | ||||
| 	mov ebx, ebp                       ;# ecx = ma | ||||
| 	and ebx, 2147483584                ;# align "ma" to the start of a cache line | ||||
| 	shr ebx, 6                         ;# ebx = Dataset block number | ||||
| 	;# call 32768 | ||||
| randomx_program_read_dataset_sshash_init ENDP | ||||
| 
 | ||||
| randomx_program_read_dataset_sshash_fin PROC | ||||
| 	mov rbx, qword ptr [rsp+64] | ||||
| 	xor r8, qword ptr [rsp+56] | ||||
| 	xor r9, qword ptr [rsp+48] | ||||
| 	xor r10, qword ptr [rsp+40] | ||||
| 	xor r11, qword ptr [rsp+32] | ||||
| 	xor r12, qword ptr [rsp+24] | ||||
| 	xor r13, qword ptr [rsp+16] | ||||
| 	xor r14, qword ptr [rsp+8] | ||||
| 	xor r15, qword ptr [rsp+0] | ||||
| 	add rsp, 72 | ||||
| randomx_program_read_dataset_sshash_fin ENDP | ||||
| 
 | ||||
| randomx_program_loop_store PROC | ||||
| 	include asm/program_loop_store.inc | ||||
| randomx_program_loop_store ENDP | ||||
|  |  | |||
|  | @ -24,6 +24,8 @@ extern "C" { | |||
| 	void randomx_program_start(); | ||||
| 	void randomx_program_read_dataset(); | ||||
| 	void randomx_program_read_dataset_light(); | ||||
| 	void randomx_program_read_dataset_sshash_init(); | ||||
| 	void randomx_program_read_dataset_sshash_fin(); | ||||
| 	void randomx_program_loop_store(); | ||||
| 	void randomx_program_loop_end(); | ||||
| 	void randomx_program_read_dataset_light_sub(); | ||||
|  |  | |||
|  | @ -87,6 +87,7 @@ namespace RandomX { | |||
| 	*/ | ||||
| 
 | ||||
| #include "JitCompilerX86-static.hpp" | ||||
| #include "LightProgramGenerator.hpp" | ||||
| 
 | ||||
| #define NOP_TEST true | ||||
| 
 | ||||
|  | @ -96,6 +97,8 @@ namespace RandomX { | |||
| 	const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; | ||||
| 	const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; | ||||
| 	const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light; | ||||
| 	const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; | ||||
| 	const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; | ||||
| 	const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; | ||||
| 	const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store; | ||||
| 	const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end; | ||||
|  | @ -110,7 +113,9 @@ namespace RandomX { | |||
| 	const int32_t prologueSize = codeLoopBegin - codePrologue; | ||||
| 	const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; | ||||
| 	const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset; | ||||
| 	const int32_t readDatasetLightSize = codeLoopStore - codeReadDatasetLight; | ||||
| 	const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight; | ||||
| 	const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; | ||||
| 	const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; | ||||
| 	const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; | ||||
| 	const int32_t readDatasetLightSubSize = codeDatasetInit - codeReadDatasetLightSub; | ||||
| 	const int32_t datasetInitSize = codeEpilogue - codeDatasetInit; | ||||
|  | @ -199,7 +204,7 @@ namespace RandomX { | |||
| 
 | ||||
| 	static const uint8_t NOP1[] = { 0x90 }; | ||||
| 	static const uint8_t NOP2[] = { 0x66, 0x90 }; | ||||
| 	static const uint8_t NOP3[] = { 0x0F, 0x1F, 0x00 }; | ||||
| 	static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 }; | ||||
| 	static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 }; | ||||
| 	static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 }; | ||||
| 	static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; | ||||
|  | @ -230,19 +235,31 @@ namespace RandomX { | |||
| 		generateProgramEpilogue(prog); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void JitCompilerX86::generateProgramLight(Program& prog) { | ||||
| 		if (RANDOMX_CACHE_ACCESSES != 8) | ||||
| 			throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); | ||||
| 		if (RANDOMX_ARGON_GROWTH != 0) | ||||
| 			throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); | ||||
| 		generateProgramPrologue(prog); | ||||
| 		memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); | ||||
| 		codePos += readDatasetLightSize; | ||||
| 		emitByte(CALL); | ||||
| 		emit32(readDatasetLightSubOffset - (codePos + 4)); | ||||
| 		if (superscalar) { | ||||
| 			emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); | ||||
| 			emitByte(CALL); | ||||
| 			emit32(superScalarHashOffset - (codePos + 4)); | ||||
| 			emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); | ||||
| 		} | ||||
| 		else { | ||||
| 			memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); | ||||
| 			codePos += readDatasetLightSize; | ||||
| 			emitByte(CALL); | ||||
| 			emit32(readDatasetLightSubOffset - (codePos + 4)); | ||||
| 		} | ||||
| 		generateProgramEpilogue(prog); | ||||
| 	} | ||||
| 
 | ||||
| 	template void JitCompilerX86::generateProgramLight<true>(Program& prog); | ||||
| 	template void JitCompilerX86::generateProgramLight<false>(Program& prog); | ||||
| 
 | ||||
| 	template<size_t N> | ||||
| 	void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) { | ||||
| 		memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); | ||||
|  | @ -253,7 +270,7 @@ namespace RandomX { | |||
| 				Instruction& instr = prog(i); | ||||
| 				instr.src %= RegistersCount; | ||||
| 				instr.dst %= RegistersCount; | ||||
| 				generateCode(instr, i); | ||||
| 				generateCode<LightProgram>(instr, i); | ||||
| 			} | ||||
| 			emit(codeShhLoad, codeSshLoadSize); | ||||
| 			if (j < N - 1) { | ||||
|  | @ -318,6 +335,7 @@ namespace RandomX { | |||
| 		emit32(epilogueOffset - codePos - 4); | ||||
| 	} | ||||
| 
 | ||||
| 	template<class P> | ||||
| 	void JitCompilerX86::generateCode(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		instructionOffsets.push_back(codePos); | ||||
|  | @ -326,6 +344,95 @@ namespace RandomX { | |||
| 		(this->*generator)(instr, i); | ||||
| 	} | ||||
| 
 | ||||
| 	template<> | ||||
| 	void JitCompilerX86::generateCode<LightProgram>(Instruction& instr, int i) { | ||||
| 		switch (instr.opcode) | ||||
| 		{ | ||||
| 		case RandomX::LightInstructionType::ISUB_R: | ||||
| 			emit(REX_SUB_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IXOR_R: | ||||
| 			emit(REX_XOR_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IADD_RS: | ||||
| 			emit(REX_LEA); | ||||
| 			emitByte(0x04 + 8 * instr.dst); | ||||
| 			genSIB(instr.mod % 4, instr.src, instr.dst); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IMUL_R: | ||||
| 			emit(REX_IMUL_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IROR_C: | ||||
| 			emit(REX_ROT_I8); | ||||
| 			emitByte(0xc8 + instr.dst); | ||||
| 			emitByte(instr.getImm32() & 63); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IADD_C7: | ||||
| 			emit(REX_81); | ||||
| 			emitByte(0xc0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IXOR_C7: | ||||
| 			emit(REX_XOR_RI); | ||||
| 			emitByte(0xf0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IADD_C8: | ||||
| 			emit(REX_81); | ||||
| 			emitByte(0xc0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 			emit(NOP1); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IXOR_C8: | ||||
| 			emit(REX_XOR_RI); | ||||
| 			emitByte(0xf0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 			emit(NOP1); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IADD_C9: | ||||
| 			emit(REX_81); | ||||
| 			emitByte(0xc0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 			emit(NOP2); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IXOR_C9: | ||||
| 			emit(REX_XOR_RI); | ||||
| 			emitByte(0xf0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 			emit(NOP2); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IMULH_R: | ||||
| 			emit(REX_MOV_RR64); | ||||
| 			emitByte(0xc0 + instr.dst); | ||||
| 			emit(REX_MUL_R); | ||||
| 			emitByte(0xe0 + instr.src); | ||||
| 			emit(REX_MOV_R64R); | ||||
| 			emitByte(0xc2 + 8 * instr.dst); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::ISMULH_R: | ||||
| 			emit(REX_MOV_RR64); | ||||
| 			emitByte(0xc0 + instr.dst); | ||||
| 			emit(REX_MUL_R); | ||||
| 			emitByte(0xe8 + instr.src); | ||||
| 			emit(REX_MOV_R64R); | ||||
| 			emitByte(0xc2 + 8 * instr.dst); | ||||
| 			break; | ||||
| 		case RandomX::LightInstructionType::IMUL_RCP: | ||||
| 			emit(MOV_RAX_I); | ||||
| 			emit64(reciprocal(instr.getImm32())); | ||||
| 			emit(REX_IMUL_RM); | ||||
| 			emitByte(0xc0 + 8 * instr.dst); | ||||
| 			break; | ||||
| 		default: | ||||
| 			UNREACHABLE; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template void JitCompilerX86::generateCode<Program>(Instruction& instr, int i); | ||||
| 
 | ||||
| 	void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) { | ||||
| 		emit(REX_MOV_RR); | ||||
| 		emitByte((rax ? 0xc0 : 0xc8) + instr.src); | ||||
|  |  | |||
|  | @ -39,6 +39,7 @@ namespace RandomX { | |||
| 		JitCompilerX86(); | ||||
| 		~JitCompilerX86(); | ||||
| 		void generateProgram(Program&); | ||||
| 		template<bool superscalar> | ||||
| 		void generateProgramLight(Program&); | ||||
| 		template<size_t N> | ||||
| 		void generateSuperScalarHash(LightProgram (&programs)[N]); | ||||
|  | @ -66,7 +67,7 @@ namespace RandomX { | |||
| 				Instruction& instr = prog(i); | ||||
| 				instr.src %= RegistersCount; | ||||
| 				instr.dst %= RegistersCount; | ||||
| 				generateCode(instr, i); | ||||
| 				generateCode<P>(instr, i); | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
|  | @ -81,6 +82,8 @@ namespace RandomX { | |||
| 		void genSIB(int scale, int index, int base); | ||||
| 
 | ||||
| 		void handleCondition(Instruction&, int); | ||||
| 
 | ||||
| 		template<class P> | ||||
| 		void generateCode(Instruction&, int); | ||||
| 
 | ||||
| 		void emitByte(uint8_t val) { | ||||
|  |  | |||
|  | @ -29,23 +29,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "LightProgramGenerator.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
|                                             //                             Intel Ivy Bridge reference
 | ||||
| 	namespace LightInstructionType {        //uOPs (decode)   execution ports         latency       code size
 | ||||
| 		constexpr int IADD_RS = 0;          //1               p01                     1               4
 | ||||
| 		constexpr int ISUB_R = 1;           //1               p015                    1               3
 | ||||
| 		constexpr int ISUB_C = 2;           //1               p015                    3               7
 | ||||
| 		constexpr int IMUL_R = 3;           //1               p1                      3               4
 | ||||
| 		constexpr int IMUL_C = 4;           //1               p1                      3               7
 | ||||
| 		constexpr int IMULH_R = 5;          //1+2+1           0+(p1,p5)+0             3               3+3+3
 | ||||
| 		constexpr int ISMULH_R = 6;         //1+2+1           0+(p1,p5)+0             3               3+3+3
 | ||||
| 		constexpr int IMUL_RCP = 7;         //1+1             p015+p1                 4              10+4
 | ||||
| 		constexpr int IXOR_R = 8;           //1               p015                    1               3
 | ||||
| 		constexpr int IXOR_C = 9;           //1               p015                    1               7
 | ||||
| 		constexpr int IROR_R = 10;          //1+2             0+(p0,p5)               1               3+3
 | ||||
| 		constexpr int IROR_C = 11;          //1               p05                     1               4
 | ||||
| 		constexpr int COND_R = 12;          //1+1+1+1+1+1     p015+p5+0+p015+p05+p015 3               7+13+3+7+3+3
 | ||||
| 		constexpr int COUNT = 13; | ||||
| 	} | ||||
| 
 | ||||
| 	namespace LightInstructionOpcode { | ||||
| 		constexpr int IADD_RS = 0; | ||||
|  | @ -62,7 +45,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	static bool isMul(int type) { | ||||
| 		return type == LightInstructionType::IMUL_R || type == LightInstructionType::IMUL_C || type == LightInstructionType::IMULH_R || type == LightInstructionType::ISMULH_R || type == LightInstructionType::IMUL_RCP; | ||||
| 		return type == LightInstructionType::IMUL_R || type == LightInstructionType::IMULH_R || type == LightInstructionType::ISMULH_R || type == LightInstructionType::IMUL_RCP; | ||||
| 	} | ||||
| 
 | ||||
| 	const int lightInstructionOpcode[] = { | ||||
|  | @ -289,19 +272,20 @@ namespace RandomX { | |||
| 		int getSrcOp() const { | ||||
| 			return srcOp_; | ||||
| 		} | ||||
| 		static const LightInstructionInfo IADD_RS; | ||||
| 		static const LightInstructionInfo ISUB_R; | ||||
| 		static const LightInstructionInfo ISUB_C; | ||||
| 		static const LightInstructionInfo IXOR_R; | ||||
| 		static const LightInstructionInfo IADD_RS; | ||||
| 		static const LightInstructionInfo IMUL_R; | ||||
| 		static const LightInstructionInfo IMUL_C; | ||||
| 		static const LightInstructionInfo IROR_C; | ||||
| 		static const LightInstructionInfo IADD_C7; | ||||
| 		static const LightInstructionInfo IXOR_C7; | ||||
| 		static const LightInstructionInfo IADD_C8; | ||||
| 		static const LightInstructionInfo IXOR_C8; | ||||
| 		static const LightInstructionInfo IADD_C9; | ||||
| 		static const LightInstructionInfo IXOR_C9; | ||||
| 		static const LightInstructionInfo IMULH_R; | ||||
| 		static const LightInstructionInfo ISMULH_R; | ||||
| 		static const LightInstructionInfo IMUL_RCP; | ||||
| 		static const LightInstructionInfo IXOR_R; | ||||
| 		static const LightInstructionInfo IXOR_C; | ||||
| 		static const LightInstructionInfo IROR_R; | ||||
| 		static const LightInstructionInfo IROR_C; | ||||
| 		static const LightInstructionInfo COND_R; | ||||
| 		static const LightInstructionInfo NOP; | ||||
| 	private: | ||||
| 		const char* name_; | ||||
|  | @ -316,28 +300,31 @@ namespace RandomX { | |||
| 			: name_(name), type_(-1), latency_(0) {} | ||||
| 	}; | ||||
| 
 | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", LightInstructionType::IADD_RS, MacroOp::Lea_sib, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", LightInstructionType::ISUB_R, MacroOp::Sub_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::ISUB_C = LightInstructionInfo("ISUB_C", LightInstructionType::ISUB_C, MacroOp::Sub_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", LightInstructionType::IADD_RS, MacroOp::Lea_sib, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", LightInstructionType::IMUL_R, MacroOp::Imul_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", LightInstructionType::IMUL_C, MacroOp::Imul_rri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri, -1); | ||||
| 
 | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", LightInstructionType::IADD_C7, MacroOp::Add_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", LightInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", LightInstructionType::IADD_C8, MacroOp::Add_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", LightInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", LightInstructionType::IADD_C9, MacroOp::Add_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", LightInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); | ||||
| 
 | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", LightInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", LightInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", LightInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", LightInstructionType::IXOR_C, MacroOp::Xor_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", LightInstructionType::IROR_R, IROR_R_ops_array, 1, 1, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", LightInstructionType::COND_R, COND_R_ops_array, 5, 5, 3); | ||||
| 	 | ||||
| 	const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP"); | ||||
| 
 | ||||
| 	const int buffer0[] = { 3, 3, 10 }; | ||||
| 	const int buffer0[] = { 4, 8, 4 }; | ||||
| 	const int buffer1[] = { 7, 3, 3, 3 }; | ||||
| 	const int buffer2[] = { 3, 3, 3, 7 }; | ||||
| 	const int buffer2[] = { 3, 7, 3, 3 }; | ||||
| 	const int buffer3[] = { 4, 9, 3 }; | ||||
| 	const int buffer4[] = { 4, 4, 4, 4 }; | ||||
| 	const int buffer5[] = { 3, 7, 3, 3 }; | ||||
| 	const int buffer6[] = { 3, 3, 7, 3 }; | ||||
| 	const int buffer7[] = { 13, 3 }; | ||||
| 	const int buffer5[] = { 3, 3, 10 }; | ||||
| 
 | ||||
| 	class DecoderBuffer { | ||||
| 	public: | ||||
|  | @ -360,16 +347,10 @@ namespace RandomX { | |||
| 		const DecoderBuffer* fetchNext(int instrType, int cycle, int mulCount, Blake2Generator& gen) const { | ||||
| 			if (instrType == LightInstructionType::IMULH_R || instrType == LightInstructionType::ISMULH_R) | ||||
| 				return &decodeBuffer3310; //2-1-1 decode
 | ||||
| 			if (mulCount < cycle) | ||||
| 				return &decodeBuffer4444_mul; | ||||
| 			if (index_ == 0) { | ||||
| 				return &decodeBuffer4444; //IMUL_RCP end
 | ||||
| 			} | ||||
| 			/*if (index_ == 2) {
 | ||||
| 				return &decodeBuffer133; //COND_R middle
 | ||||
| 			}*/ | ||||
| 			if (index_ == 7) { | ||||
| 				return &decodeBuffer7333; //COND_R end
 | ||||
| 			if (mulCount < cycle + 1) | ||||
| 				return &decodeBuffer4444; | ||||
| 			if (index_ == 5) { //IMUL_RCP end
 | ||||
| 				return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493; | ||||
| 			} | ||||
| 			return fetchNextDefault(gen); | ||||
| 		} | ||||
|  | @ -379,49 +360,40 @@ namespace RandomX { | |||
| 		const int* counts_; | ||||
| 		int opsCount_; | ||||
| 		DecoderBuffer() : index_(-1) {} | ||||
| 		static const DecoderBuffer decodeBuffer3310; | ||||
| 		static const DecoderBuffer decodeBuffer484; | ||||
| 		static const DecoderBuffer decodeBuffer7333; | ||||
| 		static const DecoderBuffer decodeBuffer3337; | ||||
| 		static const DecoderBuffer decodeBuffer4444; | ||||
| 		static const DecoderBuffer decodeBuffer4444_mul; | ||||
| 		static const DecoderBuffer decodeBuffer3733; | ||||
| 		static const DecoderBuffer decodeBuffer3373; | ||||
| 		static const DecoderBuffer decodeBuffer133; | ||||
| 		static const DecoderBuffer* decodeBuffers[7]; | ||||
| 		static const DecoderBuffer decodeBuffer493; | ||||
| 		static const DecoderBuffer decodeBuffer4444; | ||||
| 		static const DecoderBuffer decodeBuffer3310; | ||||
| 		static const DecoderBuffer* decodeBuffers[4]; | ||||
| 		const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const { | ||||
| 			int select; | ||||
| 			//do {
 | ||||
| 				select = gen.getByte() & 3; | ||||
| 			//} while (select == 7);
 | ||||
| 			return decodeBuffers[select]; | ||||
| 			return decodeBuffers[gen.getByte() & 3]; | ||||
| 		} | ||||
| 	}; | ||||
| 
 | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 0, buffer0); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3337 = DecoderBuffer("3,3,3,7", 2, buffer2); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer4444_mul = DecoderBuffer("4,4,4,4-MUL", 3, buffer4); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4); | ||||
| 	 | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 5, buffer5); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3373 = DecoderBuffer("3,3,7,3", 6, buffer6); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer133 = DecoderBuffer("13,3", 7, buffer7); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5); | ||||
| 
 | ||||
| 	const DecoderBuffer* DecoderBuffer::decodeBuffers[7] = { | ||||
| 			&DecoderBuffer::decodeBuffer3310, | ||||
| 			&DecoderBuffer::decodeBuffer3337, | ||||
| 	const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = { | ||||
| 			&DecoderBuffer::decodeBuffer484, | ||||
| 			&DecoderBuffer::decodeBuffer7333, | ||||
| 			&DecoderBuffer::decodeBuffer3733, | ||||
| 			&DecoderBuffer::decodeBuffer3373, | ||||
| 			&DecoderBuffer::decodeBuffer493, | ||||
| 	}; | ||||
| 
 | ||||
| 	const DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); | ||||
| 
 | ||||
| 	const LightInstructionInfo* slot_3[]  = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R }; | ||||
| 	const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R }; | ||||
| 	const LightInstructionInfo* slot_3C[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R, &LightInstructionInfo::IXOR_R }; | ||||
| 	const LightInstructionInfo* slot_4[]  = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS }; | ||||
| 	const LightInstructionInfo* slot_7[]  = { &LightInstructionInfo::IXOR_C, &LightInstructionInfo::ISUB_C }; | ||||
| 	const LightInstructionInfo* slot_7L   = &LightInstructionInfo::COND_R; | ||||
| 	const LightInstructionInfo* slot_7[]  = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 }; | ||||
| 	const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 }; | ||||
| 	const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 }; | ||||
| 	const LightInstructionInfo* slot_10   = &LightInstructionInfo::IMUL_RCP; | ||||
| 
 | ||||
| 	static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) { | ||||
|  | @ -443,7 +415,7 @@ namespace RandomX { | |||
| 	class LightInstruction { | ||||
| 	public: | ||||
| 		void toInstr(Instruction& instr) { | ||||
| 			instr.opcode = lightInstructionOpcode[getType()]; | ||||
| 			instr.opcode = getType(); | ||||
| 			instr.dst = dst_; | ||||
| 			instr.src = src_ >= 0 ? src_ : dst_; | ||||
| 			instr.mod = mod_; | ||||
|  | @ -457,28 +429,22 @@ namespace RandomX { | |||
| 				if (isLast) { | ||||
| 					return create(slot_3L[gen.getByte() & 3], gen); | ||||
| 				} | ||||
| 				else if (false && isFirst && fetchType == 0) { | ||||
| 					return create(slot_3C[gen.getByte() & 3], gen); | ||||
| 				} | ||||
| 				else { | ||||
| 					return create(slot_3[gen.getByte() & 1], gen); | ||||
| 				} | ||||
| 			case 4: | ||||
| 				if (fetchType == 3 && !isLast) { | ||||
| 				if (fetchType == 4 && !isLast) { | ||||
| 					return create(&LightInstructionInfo::IMUL_R, gen); | ||||
| 				} | ||||
| 				else { | ||||
| 					return create(slot_4[gen.getByte() & 1], gen); | ||||
| 				} | ||||
| 			case 7: | ||||
| 				if (false && isLast) { | ||||
| 					return create(slot_7L, gen); | ||||
| 				} | ||||
| 				if (false && isFirst) { | ||||
| 					return create(&LightInstructionInfo::IMUL_C, gen); | ||||
| 				} else { | ||||
| 					return create(slot_7[gen.getByte() & 1], gen); | ||||
| 				} | ||||
| 				return create(slot_7[gen.getByte() & 1], gen); | ||||
| 			case 8: | ||||
| 				return create(slot_8[gen.getByte() & 1], gen); | ||||
| 			case 9: | ||||
| 				return create(slot_9[gen.getByte() & 1], gen); | ||||
| 			case 10: | ||||
| 				return create(slot_10, gen); | ||||
| 			default: | ||||
|  | @ -490,13 +456,6 @@ namespace RandomX { | |||
| 			LightInstruction li(info); | ||||
| 			switch (info->getType()) | ||||
| 			{ | ||||
| 			case LightInstructionType::IADD_RS: { | ||||
| 				li.mod_ = gen.getByte(); | ||||
| 				li.imm32_ = 0; | ||||
| 				li.opGroup_ = LightInstructionType::IADD_RS; | ||||
| 				li.groupParIsSource_ = true; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::ISUB_R: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = 0; | ||||
|  | @ -504,24 +463,51 @@ namespace RandomX { | |||
| 				li.groupParIsSource_ = true; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::ISUB_C: { | ||||
| 			case LightInstructionType::IXOR_R: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = gen.getInt32(); | ||||
| 				li.opGroup_ = LightInstructionType::ISUB_C; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 				li.imm32_ = 0; | ||||
| 				li.opGroup_ = LightInstructionType::IXOR_R; | ||||
| 				li.groupParIsSource_ = true; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IADD_RS: { | ||||
| 				li.mod_ = gen.getByte(); | ||||
| 				li.imm32_ = 0; | ||||
| 				li.opGroup_ = LightInstructionType::IADD_RS; | ||||
| 				li.groupParIsSource_ = true; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IMUL_R: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = 0; | ||||
| 				li.opGroup_ = LightInstructionType::IMUL_R; | ||||
| 				li.opGroupPar_ = gen.getInt32(); | ||||
| 				li.opGroupPar_ = -1; //TODO
 | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IMUL_C: { | ||||
| 			case LightInstructionType::IROR_C: { | ||||
| 				li.mod_ = 0; | ||||
| 				do { | ||||
| 					li.imm32_ = gen.getByte() & 63; | ||||
| 				} while (li.imm32_ == 0); | ||||
| 				li.opGroup_ = LightInstructionType::IROR_C; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IADD_C7: | ||||
| 			case LightInstructionType::IADD_C8: | ||||
| 			case LightInstructionType::IADD_C9: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = gen.getInt32(); | ||||
| 				li.opGroup_ = LightInstructionType::IMUL_C; | ||||
| 				li.opGroup_ = LightInstructionType::IADD_C7; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IXOR_C7: | ||||
| 			case LightInstructionType::IXOR_C8: | ||||
| 			case LightInstructionType::IXOR_C9: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = gen.getInt32(); | ||||
| 				li.opGroup_ = LightInstructionType::IXOR_C7; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
|  | @ -542,50 +528,14 @@ namespace RandomX { | |||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IMUL_RCP: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = gen.getInt32(); | ||||
| 				li.opGroup_ = LightInstructionType::IMUL_C; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IXOR_R: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = 0; | ||||
| 				li.opGroup_ = LightInstructionType::IXOR_R; | ||||
| 				li.groupParIsSource_ = true; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IXOR_C: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = gen.getInt32(); | ||||
| 				li.opGroup_ = LightInstructionType::IXOR_R; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IROR_R: { | ||||
| 				li.mod_ = 0; | ||||
| 				li.imm32_ = 0; | ||||
| 				li.opGroup_ = LightInstructionType::IROR_R; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::IROR_C: { | ||||
| 				li.mod_ = 0; | ||||
| 				do { | ||||
| 					li.imm32_ = gen.getByte(); | ||||
| 				} while ((li.imm32_ & 63) == 0); | ||||
| 				li.opGroup_ = LightInstructionType::IROR_R; | ||||
| 					li.imm32_ = gen.getInt32(); | ||||
| 				} while ((li.imm32_ & (li.imm32_ - 1)) == 0); | ||||
| 				li.opGroup_ = LightInstructionType::IMUL_RCP; | ||||
| 				li.opGroupPar_ = -1; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case LightInstructionType::COND_R: { | ||||
| 				li.canReuse_ = true; | ||||
| 				li.mod_ = gen.getByte(); | ||||
| 				li.imm32_ = gen.getInt32(); | ||||
| 				li.opGroup_ = LightInstructionType::COND_R; | ||||
| 				li.opGroupPar_ = li.imm32_; | ||||
| 			} break; | ||||
| 
 | ||||
| 			default: | ||||
| 				break; | ||||
| 			} | ||||
|  | @ -675,8 +625,10 @@ namespace RandomX { | |||
| 	constexpr int CYCLE_MAP_SIZE = RANDOMX_LPROG_LATENCY + 3; | ||||
| #ifndef _DEBUG | ||||
| 	constexpr bool TRACE = false; | ||||
| 	constexpr bool INFO = false; | ||||
| #else | ||||
| 	constexpr bool TRACE = true; | ||||
| 	constexpr bool INFO = true; | ||||
| #endif | ||||
| 
 | ||||
| 	static int blakeCounter = 0; | ||||
|  | @ -806,6 +758,7 @@ namespace RandomX { | |||
| 		int codeSize = 0; | ||||
| 		int macroOpCount = 0; | ||||
| 		int cycle = 0; | ||||
| 		int fetchCycle = 0; | ||||
| 		int depCycle = 0; | ||||
| 		int retireCycle = 0; | ||||
| 		int mopIndex = 0; | ||||
|  | @ -816,7 +769,7 @@ namespace RandomX { | |||
| 		constexpr int MAX_ATTEMPTS = 4; | ||||
| 
 | ||||
| 		while(!portsSaturated) { | ||||
| 			fetchLine = fetchLine->fetchNext(currentInstruction.getType(), cycle, mulCount, gen); | ||||
| 			fetchLine = fetchLine->fetchNext(currentInstruction.getType(), fetchCycle++, mulCount, gen); | ||||
| 			if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine->getName() << ")" << std::endl; | ||||
| 
 | ||||
| 			mopIndex = 0; | ||||
|  | @ -833,7 +786,6 @@ namespace RandomX { | |||
| 				MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex); | ||||
| 				if (fetchLine->getCounts()[mopIndex] != mop.getSize()) { | ||||
| 					if (TRACE) std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine->getCounts()[mopIndex] << std::endl; | ||||
| 					return DBL_MIN; | ||||
| 				} | ||||
| 				 | ||||
| 				if (TRACE) std::cout << mop.getName() << " "; | ||||
|  | @ -899,8 +851,8 @@ namespace RandomX { | |||
| 			++cycle; | ||||
| 		} | ||||
| 
 | ||||
| 		std::cout << "; ALU port utilization:" << std::endl; | ||||
| 		std::cout << "; (* = in use, _ = idle)" << std::endl; | ||||
| 		if(INFO) std::cout << "; ALU port utilization:" << std::endl; | ||||
| 		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; | ||||
| 
 | ||||
| 		int portCycles = 0; | ||||
| 		/*for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
 | ||||
|  | @ -914,13 +866,13 @@ namespace RandomX { | |||
| 
 | ||||
| 		double ipc = (macroOpCount / (double)retireCycle); | ||||
| 
 | ||||
| 		std::cout << "; code size " << codeSize << " bytes" << std::endl; | ||||
| 		std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; | ||||
| 		std::cout << "; RandomX instructions: " << outIndex << std::endl; | ||||
| 		std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl; | ||||
| 		std::cout << "; IPC = " << ipc << std::endl; | ||||
| 		std::cout << "; Port-cycles: " << portCycles << std::endl; | ||||
| 		std::cout << "; Multiplications: " << mulCount << std::endl; | ||||
| 		if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl; | ||||
| 		if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; | ||||
| 		if (INFO) std::cout << "; RandomX instructions: " << outIndex << std::endl; | ||||
| 		if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl; | ||||
| 		if (INFO) std::cout << "; IPC = " << ipc << std::endl; | ||||
| 		if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl; | ||||
| 		if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl; | ||||
| 
 | ||||
| 		int asicLatency[8]; | ||||
| 		memset(asicLatency, 0, sizeof(asicLatency)); | ||||
|  | @ -942,19 +894,21 @@ namespace RandomX { | |||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl; | ||||
| 		if (INFO) std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl; | ||||
| 
 | ||||
| 		std::cout << "; ASIC latency:" << std::endl; | ||||
| 		for (int i = 0; i < 8; ++i) { | ||||
| 			std::cout << ";  r" << i << " = " << asicLatency[i] << std::endl; | ||||
| 		} | ||||
| 		std::cout << "; CPU latency:" << std::endl; | ||||
| 		for (int i = 0; i < 8; ++i) { | ||||
| 			std::cout << ";  r" << i << " = " << registers[i].latency << std::endl; | ||||
| 		if (INFO) { | ||||
| 			std::cout << "; ASIC latency:" << std::endl; | ||||
| 			for (int i = 0; i < 8; ++i) { | ||||
| 				std::cout << ";  r" << i << " = " << asicLatency[i] << std::endl; | ||||
| 			} | ||||
| 			if (INFO) std::cout << "; CPU latency:" << std::endl; | ||||
| 			for (int i = 0; i < 8; ++i) { | ||||
| 				std::cout << ";  r" << i << " = " << registers[i].latency << std::endl; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		prog.setSize(outIndex); | ||||
| 		prog.setAddressRegister(addressReg); | ||||
| 		return addressReg; | ||||
| 		return outIndex; | ||||
| 	} | ||||
| } | ||||
|  | @ -21,6 +21,27 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	//                             Intel Ivy Bridge reference
 | ||||
| 	namespace LightInstructionType {        //uOPs (decode)   execution ports         latency       code size
 | ||||
| 		constexpr int ISUB_R = 0;           //1               p015                    1               3
 | ||||
| 		constexpr int IXOR_R = 1;           //1               p015                    1               3
 | ||||
| 		constexpr int IADD_RS = 2;          //1               p01                     1               4
 | ||||
| 		constexpr int IMUL_R = 3;           //1               p1                      3               4
 | ||||
| 		constexpr int IROR_C = 4;           //1               p05                     1               4
 | ||||
| 		constexpr int IADD_C7 = 5;          //1               p015                    1               7
 | ||||
| 		constexpr int IXOR_C7 = 6;          //1               p015                    1               7
 | ||||
| 		constexpr int IADD_C8 = 7;          //1+0             p015                    1               8
 | ||||
| 		constexpr int IXOR_C8 = 8;          //1+0             p015                    1               8
 | ||||
| 		constexpr int IADD_C9 = 9;          //1+0             p015                    1               9
 | ||||
| 		constexpr int IXOR_C9 = 10;         //1+0             p015                    1               9
 | ||||
| 		constexpr int IMULH_R = 11;         //1+2+1           0+(p1,p5)+0             3               3+3+3
 | ||||
| 		constexpr int ISMULH_R = 12;        //1+2+1           0+(p1,p5)+0             3               3+3+3
 | ||||
| 		constexpr int IMUL_RCP = 13;        //1+1             p015+p1                 4              10+4
 | ||||
| 
 | ||||
| 		constexpr int COUNT = 14; | ||||
| 		constexpr int INVALID = -1; | ||||
| 	} | ||||
| 
 | ||||
| 	class Blake2Generator { | ||||
| 	public: | ||||
| 		Blake2Generator(const void* seed, int nonce); | ||||
|  |  | |||
|  | @ -24,13 +24,11 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 	class VirtualMachine { | ||||
| 	public: | ||||
| 		VirtualMachine(); | ||||
| 		virtual ~VirtualMachine() {} | ||||
| 		virtual void setDataset(dataset_t ds, uint64_t size) = 0; | ||||
| 		virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; | ||||
| 		void setScratchpad(void* ptr) { | ||||
| 			scratchpad = (uint8_t*)ptr; | ||||
| 		} | ||||
|  |  | |||
							
								
								
									
										29
									
								
								src/main.cpp
									
										
									
									
									
								
							
							
						
						
									
										29
									
								
								src/main.cpp
									
										
									
									
									
								
							|  | @ -205,7 +205,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi | |||
| } | ||||
| 
 | ||||
| int main(int argc, char** argv) { | ||||
| 	bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genLight, useSuperscalar; | ||||
| 	bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genSuperscalar, useSuperscalar; | ||||
| 	int programCount, threadCount, initThreadCount, epoch; | ||||
| 
 | ||||
| 	readOption("--softAes", argc, argv, softAes); | ||||
|  | @ -220,15 +220,15 @@ int main(int argc, char** argv) { | |||
| 	readOption("--jit", argc, argv, jit); | ||||
| 	readOption("--genNative", argc, argv, genNative); | ||||
| 	readOption("--help", argc, argv, help); | ||||
| 	readOption("--genLight", argc, argv, genLight); | ||||
| 	readOption("--genSuperscalar", argc, argv, genSuperscalar); | ||||
| 	readOption("--useSuperscalar", argc, argv, useSuperscalar); | ||||
| 
 | ||||
| 	if (genLight) { | ||||
| 	if (genSuperscalar) { | ||||
| 		RandomX::LightProgram p; | ||||
| 		RandomX::Blake2Generator gen(seed, programCount); | ||||
| 		RandomX::generateLightProg2(p, gen); | ||||
| 		RandomX::AssemblyGeneratorX86 asmX86; | ||||
| 		asmX86.generateProgram(p); | ||||
| 		asmX86.generateAsm(p); | ||||
| 		//std::ofstream file("lightProg2.asm");
 | ||||
| 		asmX86.printCode(std::cout); | ||||
| 		return 0; | ||||
|  | @ -266,6 +266,7 @@ int main(int argc, char** argv) { | |||
| 	const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize; | ||||
| 	const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch); | ||||
| 	dataset.cache.size = cacheSize; | ||||
| 	RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES]; | ||||
| 
 | ||||
| 	std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl; | ||||
| 
 | ||||
|  | @ -282,6 +283,12 @@ int main(int argc, char** argv) { | |||
| 			outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i)); | ||||
| 			std::cout << std::endl; | ||||
| 		} | ||||
| 		if (useSuperscalar) { | ||||
| 			RandomX::Blake2Generator gen(seed, programCount); | ||||
| 			for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
| 				RandomX::generateLightProg2(programs[i], gen); | ||||
| 			} | ||||
| 		} | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Cache (" << cacheSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; | ||||
| 		} | ||||
|  | @ -291,11 +298,6 @@ int main(int argc, char** argv) { | |||
| 			RandomX::datasetAlloc(dataset, largePages); | ||||
| 			const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize; | ||||
| 			if (useSuperscalar) { | ||||
| 				RandomX::Blake2Generator gen(seed, programCount); | ||||
| 				RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES]; | ||||
| 				for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
| 					RandomX::generateLightProg2(programs[i], gen); | ||||
| 				} | ||||
| 				RandomX::JitCompilerX86 jit86; | ||||
| 				jit86.generateSuperScalarHash(programs); | ||||
| 				jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount); | ||||
|  | @ -320,7 +322,6 @@ int main(int argc, char** argv) { | |||
| 			threads.clear(); | ||||
| 			std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; | ||||
| 		} | ||||
| 		return 0; | ||||
| 		std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl; | ||||
| 		for (int i = 0; i < threadCount; ++i) { | ||||
| 			RandomX::VirtualMachine* vm; | ||||
|  | @ -328,12 +329,14 @@ int main(int argc, char** argv) { | |||
| 				vm = new RandomX::CompiledVirtualMachine(); | ||||
| 			} | ||||
| 			else { | ||||
| 				if (jit) | ||||
| 					vm = new RandomX::CompiledLightVirtualMachine(); | ||||
| 				if (jit && useSuperscalar) | ||||
| 					vm = new RandomX::CompiledLightVirtualMachine<true>(); | ||||
| 				else if(jit) | ||||
| 					vm = new RandomX::CompiledLightVirtualMachine<false>(); | ||||
| 				else | ||||
| 					vm = new RandomX::InterpretedVirtualMachine(softAes); | ||||
| 			} | ||||
| 			vm->setDataset(dataset, datasetSize); | ||||
| 			vm->setDataset(dataset, datasetSize, programs); | ||||
| 			vms.push_back(vm); | ||||
| 		} | ||||
| 		uint8_t* scratchpadMem; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue