mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Code cleanup & refactoring
This commit is contained in:
		
							parent
							
								
									22a3aa8d79
								
							
						
					
					
						commit
						7f6bdd9a52
					
				
					 22 changed files with 261 additions and 535 deletions
				
			
		
							
								
								
									
										4
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							|  | @ -3,4 +3,6 @@ obj/ | |||
| *.user | ||||
| *.suo | ||||
| .vs | ||||
| x64 | ||||
| x64/ | ||||
| Release/ | ||||
| Debug/ | ||||
							
								
								
									
										6
									
								
								makefile
									
										
									
									
									
								
							
							
						
						
									
										6
									
								
								makefile
									
										
									
									
									
								
							|  | @ -3,7 +3,7 @@ | |||
| AR=gcc-ar | ||||
| PLATFORM=$(shell uname -m) | ||||
| CXXFLAGS=-std=c++11 | ||||
| CCFLAGS= | ||||
| CCFLAGS=-std=c99 | ||||
| ARFLAGS=rcs | ||||
| BINDIR=bin | ||||
| SRCDIR=src | ||||
|  | @ -80,7 +80,8 @@ $(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2 | |||
|  $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \
 | ||||
|  $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \
 | ||||
|  $(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \
 | ||||
|  $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h | ||||
|  $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
 | ||||
|  $(SRCDIR)/intrin_portable.h | ||||
| $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \ | ||||
|  $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
 | ||||
|  $(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \
 | ||||
|  | @ -90,7 +91,6 @@ $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compi | |||
| $(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \ | ||||
|  $(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \
 | ||||
|  $(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \
 | ||||
|  $(SRCDIR)/asm/program_read_dataset_light.inc \
 | ||||
|  $(SRCDIR)/asm/program_read_dataset_sshash_init.inc \
 | ||||
|  $(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \
 | ||||
|  $(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \
 | ||||
|  |  | |||
|  | @ -1,5 +0,0 @@ | |||
| 	xor rbp, rax                       ;# modify "mx"
 | ||||
| 	ror rbp, 32                        ;# swap "ma" and "mx"
 | ||||
| 	mov ecx, ebp                       ;# ecx = ma
 | ||||
| 	and ecx, 2147483584                ;# align "ma" to the start of a cache line
 | ||||
| 	shr ecx, 6                         ;# ecx = Dataset block number
 | ||||
|  | @ -27,12 +27,12 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| namespace randomx { | ||||
| 
 | ||||
| 	static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; | ||||
| 	static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; | ||||
| 	static const char* regFE[8] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; | ||||
| 	static const char* regF[4] = { "xmm0", "xmm1", "xmm2", "xmm3" }; | ||||
| 	static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" }; | ||||
| 	static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" }; | ||||
| 	static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; | ||||
| 	static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; | ||||
| 	static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; | ||||
| 	static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" }; | ||||
| 	static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" }; | ||||
| 	static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" }; | ||||
| 
 | ||||
| 	static const char* tempRegx = "xmm12"; | ||||
| 	static const char* mantissaMask = "xmm13"; | ||||
|  | @ -49,7 +49,9 @@ namespace randomx { | |||
| 		} | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| #if RANDOMX_JUMP | ||||
| 			asmCode << "randomx_isn_" << i << ":" << std::endl; | ||||
| #endif | ||||
| 			Instruction& instr = prog(i); | ||||
| 			instr.src %= RegistersCount; | ||||
| 			instr.dst %= RegistersCount; | ||||
|  | @ -469,14 +471,14 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		instr.src %= RegisterCountFlt; | ||||
| 		asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; | ||||
| 		traceflt(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		genAddressReg(instr); | ||||
| 		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; | ||||
| 		asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl; | ||||
|  | @ -484,14 +486,14 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		instr.src %= RegisterCountFlt; | ||||
| 		asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; | ||||
| 		traceflt(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		genAddressReg(instr); | ||||
| 		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; | ||||
| 		asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl; | ||||
|  | @ -499,20 +501,20 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl; | ||||
| 		traceflt(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		instr.src %= RegisterCountFlt; | ||||
| 		asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl; | ||||
| 		traceflt(instr); | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		genAddressReg(instr); | ||||
| 		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; | ||||
| 		asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl; | ||||
|  | @ -522,7 +524,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl; | ||||
| 		traceflt(instr); | ||||
| 	}	 | ||||
|  | @ -566,7 +568,7 @@ namespace randomx { | |||
| 
 | ||||
| 	void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = instr.getModShift(); | ||||
| 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; | ||||
| 		const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift; | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
| 		registerUsage[reg] = i; | ||||
|  | @ -579,7 +581,9 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { | ||||
| #if RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		asmCode << "\txor ecx, ecx" << std::endl; | ||||
| 		asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl; | ||||
| 		asmCode << "\tset" << condition(instr) << " cl" << std::endl; | ||||
|  | @ -602,7 +606,6 @@ namespace randomx { | |||
| #define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x)) | ||||
| 
 | ||||
| 	InstructionGenerator AssemblyGeneratorX86::engine[256] = { | ||||
| 		//Integer
 | ||||
| 		INST_HANDLE(IADD_RS) | ||||
| 		INST_HANDLE(IADD_M) | ||||
| 		INST_HANDLE(ISUB_R) | ||||
|  | @ -620,27 +623,18 @@ namespace randomx { | |||
| 		INST_HANDLE(IROR_R) | ||||
| 		INST_HANDLE(IROL_R) | ||||
| 		INST_HANDLE(ISWAP_R) | ||||
| 
 | ||||
| 		//Common floating point
 | ||||
| 		INST_HANDLE(FSWAP_R) | ||||
| 
 | ||||
| 		//Floating point group F
 | ||||
| 		INST_HANDLE(FADD_R) | ||||
| 		INST_HANDLE(FADD_M) | ||||
| 		INST_HANDLE(FSUB_R) | ||||
| 		INST_HANDLE(FSUB_M) | ||||
| 		INST_HANDLE(FSCAL_R) | ||||
| 
 | ||||
| 		//Floating point group E
 | ||||
| 		INST_HANDLE(FMUL_R) | ||||
| 		INST_HANDLE(FDIV_M) | ||||
| 		INST_HANDLE(FSQRT_R) | ||||
| 
 | ||||
| 		//Control
 | ||||
| 		INST_HANDLE(COND_R) | ||||
| 		INST_HANDLE(CFROUND) | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 
 | ||||
| 		INST_HANDLE(NOP) | ||||
| 	}; | ||||
| } | ||||
|  | @ -19,6 +19,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common.hpp" | ||||
| #include <sstream> | ||||
| 
 | ||||
| namespace randomx { | ||||
|  | @ -48,40 +49,40 @@ namespace randomx { | |||
| 		void traceint(Instruction&); | ||||
| 		void traceflt(Instruction&); | ||||
| 		void tracenop(Instruction&); | ||||
| 		void  h_IADD_RS(Instruction&, int); | ||||
| 		void  h_IADD_M(Instruction&, int); | ||||
| 		void  h_ISUB_R(Instruction&, int); | ||||
| 		void  h_ISUB_M(Instruction&, int); | ||||
| 		void  h_IMUL_R(Instruction&, int); | ||||
| 		void  h_IMUL_M(Instruction&, int); | ||||
| 		void  h_IMULH_R(Instruction&, int); | ||||
| 		void  h_IMULH_M(Instruction&, int); | ||||
| 		void  h_ISMULH_R(Instruction&, int); | ||||
| 		void  h_ISMULH_M(Instruction&, int); | ||||
| 		void  h_IMUL_RCP(Instruction&, int); | ||||
| 		void  h_ISDIV_C(Instruction&, int); | ||||
| 		void  h_INEG_R(Instruction&, int); | ||||
| 		void  h_IXOR_R(Instruction&, int); | ||||
| 		void  h_IXOR_M(Instruction&, int); | ||||
| 		void  h_IROR_R(Instruction&, int); | ||||
| 		void  h_IROL_R(Instruction&, int); | ||||
| 		void  h_ISWAP_R(Instruction&, int); | ||||
| 		void  h_FSWAP_R(Instruction&, int); | ||||
| 		void  h_FADD_R(Instruction&, int); | ||||
| 		void  h_FADD_M(Instruction&, int); | ||||
| 		void  h_FSUB_R(Instruction&, int); | ||||
| 		void  h_FSUB_M(Instruction&, int); | ||||
| 		void  h_FSCAL_R(Instruction&, int); | ||||
| 		void  h_FMUL_R(Instruction&, int); | ||||
| 		void  h_FDIV_M(Instruction&, int); | ||||
| 		void  h_FSQRT_R(Instruction&, int); | ||||
| 		void  h_COND_R(Instruction&, int); | ||||
| 		void  h_CFROUND(Instruction&, int); | ||||
| 		void  h_ISTORE(Instruction&, int); | ||||
| 		void  h_NOP(Instruction&, int); | ||||
| 		void h_IADD_RS(Instruction&, int); | ||||
| 		void h_IADD_M(Instruction&, int); | ||||
| 		void h_ISUB_R(Instruction&, int); | ||||
| 		void h_ISUB_M(Instruction&, int); | ||||
| 		void h_IMUL_R(Instruction&, int); | ||||
| 		void h_IMUL_M(Instruction&, int); | ||||
| 		void h_IMULH_R(Instruction&, int); | ||||
| 		void h_IMULH_M(Instruction&, int); | ||||
| 		void h_ISMULH_R(Instruction&, int); | ||||
| 		void h_ISMULH_M(Instruction&, int); | ||||
| 		void h_IMUL_RCP(Instruction&, int); | ||||
| 		void h_ISDIV_C(Instruction&, int); | ||||
| 		void h_INEG_R(Instruction&, int); | ||||
| 		void h_IXOR_R(Instruction&, int); | ||||
| 		void h_IXOR_M(Instruction&, int); | ||||
| 		void h_IROR_R(Instruction&, int); | ||||
| 		void h_IROL_R(Instruction&, int); | ||||
| 		void h_ISWAP_R(Instruction&, int); | ||||
| 		void h_FSWAP_R(Instruction&, int); | ||||
| 		void h_FADD_R(Instruction&, int); | ||||
| 		void h_FADD_M(Instruction&, int); | ||||
| 		void h_FSUB_R(Instruction&, int); | ||||
| 		void h_FSUB_M(Instruction&, int); | ||||
| 		void h_FSCAL_R(Instruction&, int); | ||||
| 		void h_FMUL_R(Instruction&, int); | ||||
| 		void h_FDIV_M(Instruction&, int); | ||||
| 		void h_FSQRT_R(Instruction&, int); | ||||
| 		void h_COND_R(Instruction&, int); | ||||
| 		void h_CFROUND(Instruction&, int); | ||||
| 		void h_ISTORE(Instruction&, int); | ||||
| 		void h_NOP(Instruction&, int); | ||||
| 
 | ||||
| 		static InstructionGenerator engine[256]; | ||||
| 		std::stringstream asmCode; | ||||
| 		int registerUsage[8]; | ||||
| 		int registerUsage[RegistersCount]; | ||||
| 	}; | ||||
| } | ||||
|  | @ -51,8 +51,6 @@ namespace randomx { | |||
| 
 | ||||
| 	static_assert(wtSum == 256,	"Sum of instruction frequencies must be 256."); | ||||
| 
 | ||||
| 	using addr_t = uint32_t; | ||||
| 
 | ||||
| 	constexpr int ArgonBlockSize = 1024; | ||||
| 	constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; | ||||
| 	constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; | ||||
|  | @ -78,6 +76,10 @@ namespace randomx { | |||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0) | ||||
| 
 | ||||
| 	using addr_t = uint32_t; | ||||
| 
 | ||||
| 	using int_reg_t = uint64_t; | ||||
| 
 | ||||
| 	struct fpu_reg_t { | ||||
|  | @ -95,6 +97,7 @@ namespace randomx { | |||
| 	constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; | ||||
| 	constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; | ||||
| 	constexpr int RegistersCount = 8; | ||||
| 	constexpr int RegisterCountFlt = RegistersCount / 2; | ||||
| 	constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
 | ||||
| 	constexpr int RegisterNeedsSib = 4; //x86 r12 register
 | ||||
| 
 | ||||
|  | @ -118,5 +121,3 @@ namespace randomx { | |||
| 	typedef void(*CacheDeallocFunc)(randomx_cache*); | ||||
| 	typedef void(*CacheInitializeFunc)(randomx_cache*, const void*, size_t); | ||||
| } | ||||
| 
 | ||||
| std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf); | ||||
|  |  | |||
|  | @ -34,7 +34,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| //Number of random Cache accesses per Dataset item. Minimum is 2.
 | ||||
| #define RANDOMX_CACHE_ACCESSES     8 | ||||
| 
 | ||||
| //Target latency for SuperscalarHash (in cycles of the reference CPU).
 | ||||
| #define RANDOMX_SUPERSCALAR_LATENCY   170 | ||||
| 
 | ||||
| //The maximum size of a SuperscalarHash program (number of instructions).
 | ||||
| #define RANDOMX_SUPERSCALAR_MAX_SIZE  512 | ||||
| 
 | ||||
| //Dataset base size in bytes. Must be a power of 2.
 | ||||
|  | @ -61,8 +64,8 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
 | ||||
| #define RANDOMX_SCRATCHPAD_L1      (16 * 1024) | ||||
| 
 | ||||
| //How many register bits must be zero for a jump condition to be triggered
 | ||||
| #define RANDOMX_CONDITION_BITS     7 | ||||
| //How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled.
 | ||||
| #define RANDOMX_JUMP_BITS          7 | ||||
| 
 | ||||
| /*
 | ||||
| Instruction frequencies (per 256 opcodes) | ||||
|  |  | |||
|  | @ -39,6 +39,8 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "blake2/endian.h" | ||||
| #include "argon2.h" | ||||
| #include "argon2_core.h" | ||||
| #include "jit_compiler_x86.hpp" | ||||
| #include "intrin_portable.h" | ||||
| 
 | ||||
| static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); | ||||
| static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE"); | ||||
|  | @ -146,6 +148,7 @@ namespace randomx { | |||
| 		rl[7] = rl[0] ^ superscalarAdd7; | ||||
| 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
| 			mixBlock = getMixBlock(registerValue, cache->memory); | ||||
| 			PREFETCHNTA(mixBlock); | ||||
| 			SuperscalarProgram& prog = cache->programs[i]; | ||||
| 
 | ||||
| 			executeSuperscalar(rl, prog, &cache->reciprocalCache); | ||||
|  |  | |||
|  | @ -24,7 +24,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include <type_traits> | ||||
| #include "common.hpp" | ||||
| #include "superscalar_program.hpp" | ||||
| #include "jit_compiler_x86.hpp" | ||||
| #include "allocator.hpp" | ||||
| 
 | ||||
| /* Global scope for C binding */ | ||||
|  | @ -33,6 +32,10 @@ struct randomx_dataset { | |||
| 	randomx::DatasetDeallocFunc dealloc; | ||||
| }; | ||||
| 
 | ||||
| namespace randomx { | ||||
| 	class JitCompilerX86; | ||||
| } | ||||
| 
 | ||||
| /* Global scope for C binding */ | ||||
| struct randomx_cache { | ||||
| 	uint8_t* memory = nullptr; | ||||
|  |  | |||
|  | @ -29,12 +29,12 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressReg(std::ostream& os) const { | ||||
| 		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
| 		os << (getModMem() ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::genAddressRegDst(std::ostream& os) const { | ||||
| 		if (getModCond()) | ||||
| 			os << ((mod % 4) ? "L1" : "L2"); | ||||
| 			os << (getModMem() ? "L1" : "L2"); | ||||
| 		else | ||||
| 			os << "L3"; | ||||
| 		os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]"; | ||||
|  | @ -49,7 +49,7 @@ namespace randomx { | |||
| 		if(dst == RegisterNeedsDisplacement) { | ||||
| 			os << ", " << (int32_t)getImm32(); | ||||
| 		} | ||||
| 		os << ", LSH " << (int)(mod % 4) << std::endl; | ||||
| 		os << ", LSH " << (int)getModMem() << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_IADD_M(std::ostream& os) const { | ||||
|  | @ -65,7 +65,6 @@ namespace randomx { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	//1 uOP
 | ||||
| 	void Instruction::h_ISUB_R(std::ostream& os) const { | ||||
| 		if (src != dst) { | ||||
| 			os << "r" << (int)dst << ", r" << (int)src << std::endl; | ||||
|  | @ -197,57 +196,57 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FSWAP_R(std::ostream& os) const { | ||||
| 		const char reg = (dst >= 4) ? 'e' : 'f'; | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		const char reg = (dst >= RegisterCountFlt) ? 'e' : 'f'; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		os << reg << dstIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FADD_R(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto srcIndex = src % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		auto srcIndex = src % RegisterCountFlt; | ||||
| 		os << "f" << dstIndex << ", a" << srcIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FADD_M(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		os << "f" << dstIndex << ", "; | ||||
| 		genAddressReg(os); | ||||
| 		os << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FSUB_R(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto srcIndex = src % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		auto srcIndex = src % RegisterCountFlt; | ||||
| 		os << "f" << dstIndex << ", a" << srcIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FSUB_M(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		os << "f" << dstIndex << ", "; | ||||
| 		genAddressReg(os); | ||||
| 		os << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FSCAL_R(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		os << "f" << dstIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FMUL_R(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto srcIndex = src % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		auto srcIndex = src % RegisterCountFlt; | ||||
| 		os << "e" << dstIndex << ", a" << srcIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FDIV_M(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		os << "e" << dstIndex << ", "; | ||||
| 		genAddressReg(os); | ||||
| 		os << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_FSQRT_R(std::ostream& os) const { | ||||
| 		auto dstIndex = dst % 4; | ||||
| 		auto dstIndex = dst % RegisterCountFlt; | ||||
| 		os << "e" << dstIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -280,7 +279,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void Instruction::h_COND_R(std::ostream& os) const { | ||||
| 		os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl; | ||||
| 		os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void  Instruction::h_ISTORE(std::ostream& os) const { | ||||
|  | @ -297,7 +296,6 @@ namespace randomx { | |||
| #define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x)) | ||||
| 
 | ||||
| 	const char* Instruction::names[256] = { | ||||
| 		//Integer
 | ||||
| 		INST_NAME(IADD_RS) | ||||
| 		INST_NAME(IADD_M) | ||||
| 		INST_NAME(ISUB_R) | ||||
|  | @ -314,33 +312,22 @@ namespace randomx { | |||
| 		INST_NAME(IXOR_M) | ||||
| 		INST_NAME(IROR_R) | ||||
| 		INST_NAME(ISWAP_R) | ||||
| 
 | ||||
| 		//Common floating point
 | ||||
| 		INST_NAME(FSWAP_R) | ||||
| 
 | ||||
| 		//Floating point group F
 | ||||
| 		INST_NAME(FADD_R) | ||||
| 		INST_NAME(FADD_M) | ||||
| 		INST_NAME(FSUB_R) | ||||
| 		INST_NAME(FSUB_M) | ||||
| 		INST_NAME(FSCAL_R) | ||||
| 
 | ||||
| 		//Floating point group E
 | ||||
| 		INST_NAME(FMUL_R) | ||||
| 		INST_NAME(FDIV_M) | ||||
| 		INST_NAME(FSQRT_R) | ||||
| 
 | ||||
| 		//Control
 | ||||
| 		INST_NAME(COND_R) | ||||
| 		INST_NAME(CFROUND) | ||||
| 
 | ||||
| 		INST_NAME(ISTORE) | ||||
| 
 | ||||
| 		INST_NAME(NOP) | ||||
| 	}; | ||||
| 
 | ||||
| 	InstructionFormatter Instruction::engine[256] = { | ||||
| 		//Integer
 | ||||
| 		INST_HANDLE(IADD_RS) | ||||
| 		INST_HANDLE(IADD_M) | ||||
| 		INST_HANDLE(ISUB_R) | ||||
|  | @ -358,22 +345,15 @@ namespace randomx { | |||
| 		INST_HANDLE(IROR_R) | ||||
| 		INST_HANDLE(IROL_R) | ||||
| 		INST_HANDLE(ISWAP_R) | ||||
| 
 | ||||
| 		//Common floating point
 | ||||
| 		INST_HANDLE(FSWAP_R) | ||||
| 
 | ||||
| 		//Floating point group F
 | ||||
| 		INST_HANDLE(FADD_R) | ||||
| 		INST_HANDLE(FADD_M) | ||||
| 		INST_HANDLE(FSUB_R) | ||||
| 		INST_HANDLE(FSUB_M) | ||||
| 		INST_HANDLE(FSCAL_R) | ||||
| 
 | ||||
| 		//Floating point group E
 | ||||
| 		INST_HANDLE(FMUL_R) | ||||
| 		INST_HANDLE(FDIV_M) | ||||
| 		INST_HANDLE(FSQRT_R) | ||||
| 
 | ||||
| 		INST_HANDLE(COND_R) | ||||
| 		INST_HANDLE(CFROUND) | ||||
| 		INST_HANDLE(ISTORE) | ||||
|  |  | |||
|  | @ -103,36 +103,36 @@ namespace randomx { | |||
| 		void genAddressReg(std::ostream& os) const; | ||||
| 		void genAddressImm(std::ostream& os) const; | ||||
| 		void genAddressRegDst(std::ostream&) const; | ||||
| 		void  h_IADD_RS(std::ostream&) const; | ||||
| 		void  h_IADD_M(std::ostream&) const; | ||||
| 		void  h_ISUB_R(std::ostream&) const; | ||||
| 		void  h_ISUB_M(std::ostream&) const; | ||||
| 		void  h_IMUL_R(std::ostream&) const; | ||||
| 		void  h_IMUL_M(std::ostream&) const; | ||||
| 		void  h_IMULH_R(std::ostream&) const; | ||||
| 		void  h_IMULH_M(std::ostream&) const; | ||||
| 		void  h_ISMULH_R(std::ostream&) const; | ||||
| 		void  h_ISMULH_M(std::ostream&) const; | ||||
| 		void  h_IMUL_RCP(std::ostream&) const; | ||||
| 		void  h_INEG_R(std::ostream&) const; | ||||
| 		void  h_IXOR_R(std::ostream&) const; | ||||
| 		void  h_IXOR_M(std::ostream&) const; | ||||
| 		void  h_IROR_R(std::ostream&) const; | ||||
| 		void  h_IROL_R(std::ostream&) const; | ||||
| 		void  h_ISWAP_R(std::ostream&) const; | ||||
| 		void  h_FSWAP_R(std::ostream&) const; | ||||
| 		void  h_FADD_R(std::ostream&) const; | ||||
| 		void  h_FADD_M(std::ostream&) const; | ||||
| 		void  h_FSUB_R(std::ostream&) const; | ||||
| 		void  h_FSUB_M(std::ostream&) const; | ||||
| 		void  h_FSCAL_R(std::ostream&) const; | ||||
| 		void  h_FMUL_R(std::ostream&) const; | ||||
| 		void  h_FDIV_M(std::ostream&) const; | ||||
| 		void  h_FSQRT_R(std::ostream&) const; | ||||
| 		void  h_COND_R(std::ostream&) const; | ||||
| 		void  h_CFROUND(std::ostream&) const; | ||||
| 		void  h_ISTORE(std::ostream&) const; | ||||
| 		void  h_NOP(std::ostream&) const; | ||||
| 		void h_IADD_RS(std::ostream&) const; | ||||
| 		void h_IADD_M(std::ostream&) const; | ||||
| 		void h_ISUB_R(std::ostream&) const; | ||||
| 		void h_ISUB_M(std::ostream&) const; | ||||
| 		void h_IMUL_R(std::ostream&) const; | ||||
| 		void h_IMUL_M(std::ostream&) const; | ||||
| 		void h_IMULH_R(std::ostream&) const; | ||||
| 		void h_IMULH_M(std::ostream&) const; | ||||
| 		void h_ISMULH_R(std::ostream&) const; | ||||
| 		void h_ISMULH_M(std::ostream&) const; | ||||
| 		void h_IMUL_RCP(std::ostream&) const; | ||||
| 		void h_INEG_R(std::ostream&) const; | ||||
| 		void h_IXOR_R(std::ostream&) const; | ||||
| 		void h_IXOR_M(std::ostream&) const; | ||||
| 		void h_IROR_R(std::ostream&) const; | ||||
| 		void h_IROL_R(std::ostream&) const; | ||||
| 		void h_ISWAP_R(std::ostream&) const; | ||||
| 		void h_FSWAP_R(std::ostream&) const; | ||||
| 		void h_FADD_R(std::ostream&) const; | ||||
| 		void h_FADD_M(std::ostream&) const; | ||||
| 		void h_FSUB_R(std::ostream&) const; | ||||
| 		void h_FSUB_M(std::ostream&) const; | ||||
| 		void h_FSCAL_R(std::ostream&) const; | ||||
| 		void h_FMUL_R(std::ostream&) const; | ||||
| 		void h_FDIV_M(std::ostream&) const; | ||||
| 		void h_FSQRT_R(std::ostream&) const; | ||||
| 		void h_COND_R(std::ostream&) const; | ||||
| 		void h_CFROUND(std::ostream&) const; | ||||
| 		void h_ISTORE(std::ostream&) const; | ||||
| 		void h_NOP(std::ostream&) const; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction"); | ||||
|  |  | |||
|  | @ -318,18 +318,6 @@ constexpr uint64_t ieee_get_exponent_mask() { | |||
| 	return (uint64_t)(E + 1023U) << 52; | ||||
| } | ||||
| 
 | ||||
| template<int E> | ||||
| __m128d ieee_set_exponent(__m128d x) { | ||||
| 	static_assert(E > -1023, "Invalid exponent value"); | ||||
| 	constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1; | ||||
| 	const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64)); | ||||
| 	constexpr uint64_t exponent64 = (uint64_t)(E + 1023U) << 52; | ||||
| 	const __m128d exponentMask = _mm_castsi128_pd(_mm_set_epi64x(exponent64, exponent64)); | ||||
| 	x = _mm_and_pd(x, mantissaMask); | ||||
| 	x = _mm_or_pd(x, exponentMask); | ||||
| 	return x; | ||||
| } | ||||
| 
 | ||||
| double loadDoublePortable(const void* addr); | ||||
| uint64_t mulh(uint64_t, uint64_t); | ||||
| int64_t smulh(int64_t, int64_t); | ||||
|  |  | |||
|  | @ -20,8 +20,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include <stdexcept> | ||||
| #include "jit_compiler_x86.hpp" | ||||
| 
 | ||||
| #define RANDOMX_JUMP | ||||
| 
 | ||||
| #if !defined(_M_X64) && !defined(__x86_64__) | ||||
| namespace randomx { | ||||
| 
 | ||||
|  | @ -113,7 +111,6 @@ namespace randomx { | |||
| 	const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; | ||||
| 	const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start; | ||||
| 	const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset; | ||||
| 	const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light; | ||||
| 	const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init; | ||||
| 	const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin; | ||||
| 	const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init; | ||||
|  | @ -128,8 +125,7 @@ namespace randomx { | |||
| 
 | ||||
| 	const int32_t prologueSize = codeLoopBegin - codePrologue; | ||||
| 	const int32_t loopLoadSize = codeProgamStart - codeLoopLoad; | ||||
| 	const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset; | ||||
| 	const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight; | ||||
| 	const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset; | ||||
| 	const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit; | ||||
| 	const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin; | ||||
| 	const int32_t loopStoreSize = codeLoopEnd - codeLoopStore; | ||||
|  | @ -299,7 +295,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| #if RANDOMX_JUMP | ||||
| 		instructionOffsets.clear(); | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
|  | @ -336,7 +332,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateCode(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| #if RANDOMX_JUMP | ||||
| 		instructionOffsets.push_back(codePos); | ||||
| #endif | ||||
| 		auto generator = engine[instr.opcode]; | ||||
|  | @ -467,15 +463,6 @@ namespace randomx { | |||
| 
 | ||||
| 	void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		/*if (instr.src != instr.dst) {
 | ||||
| 			emit(REX_ADD_RR); | ||||
| 			emitByte(0xc0 + 8 * instr.dst + instr.src); | ||||
| 		} | ||||
| 		else { | ||||
| 			emit(REX_81); | ||||
| 			emitByte(0xc0 + instr.dst); | ||||
| 			emit32(instr.getImm32()); | ||||
| 		}*/ | ||||
| 		emit(REX_LEA); | ||||
| 		if (instr.dst == RegisterNeedsDisplacement) | ||||
| 			emitByte(0xac); | ||||
|  | @ -505,14 +492,6 @@ namespace randomx { | |||
| 		emitByte((scale << 6) | (index << 3) | base); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_LEA); | ||||
| 		emitByte(0x84 + 8 * instr.dst); | ||||
| 		genSIB(0, instr.src, instr.dst); | ||||
| 		emit32(instr.getImm32()); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
|  | @ -541,14 +520,6 @@ namespace randomx { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_LEA); | ||||
| 		emitByte(0x84 + 8 * instr.dst); | ||||
| 		genSIB(3, instr.dst, instr.dst); | ||||
| 		emit32(instr.getImm32()); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		if (instr.src != instr.dst) { | ||||
|  | @ -645,10 +616,6 @@ namespace randomx { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) { | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_INEG_R(Instruction& instr, int i) { | ||||
| 		registerUsage[instr.dst] = i; | ||||
| 		emit(REX_NEG); | ||||
|  | @ -729,17 +696,14 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FADD_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		instr.src %= RegisterCountFlt; | ||||
| 		emit(REX_ADDPD); | ||||
| 		emitByte(0xc0 + instr.src + 8 * instr.dst); | ||||
| 		//emit(REX_PADD);
 | ||||
| 		//emitByte(PADD_OPCODES[instr.mod % 4]);
 | ||||
| 		//emitByte(0xf8 + instr.dst);
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FADD_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
| 		emit(REX_ADDPD); | ||||
|  | @ -747,17 +711,14 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		instr.src %= RegisterCountFlt; | ||||
| 		emit(REX_SUBPD); | ||||
| 		emitByte(0xc0 + instr.src + 8 * instr.dst); | ||||
| 		//emit(REX_PADD);
 | ||||
| 		//emitByte(PADD_OPCODES[instr.mod % 4]);
 | ||||
| 		//emitByte(0xf8 + instr.dst);
 | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
| 		emit(REX_SUBPD); | ||||
|  | @ -765,40 +726,20 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		emit(REX_XORPS); | ||||
| 		emitByte(0xc7 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		instr.src %= RegisterCountFlt; | ||||
| 		emit(REX_MULPD); | ||||
| 		emitByte(0xe0 + instr.src + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
| 		emit(REX_ANDPS_XMM12); | ||||
| 		emit(REX_MULPD); | ||||
| 		emitByte(0xe4 + 8 * instr.dst); | ||||
| 		emit(REX_MAXPD); | ||||
| 		emitByte(0xe5 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.src %= 4; | ||||
| 		emit(REX_DIVPD); | ||||
| 		emitByte(0xe0 + instr.src + 8 * instr.dst); | ||||
| 		emit(REX_MAXPD); | ||||
| 		emitByte(0xe5 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CVTDQ2PD_XMM12); | ||||
| 		emit(REX_ANDPS_XMM12); | ||||
|  | @ -807,7 +748,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) { | ||||
| 		instr.dst %= 4; | ||||
| 		instr.dst %= RegisterCountFlt; | ||||
| 		emit(SQRTPD); | ||||
| 		emitByte(0xe4 + 9 * instr.dst); | ||||
| 	} | ||||
|  | @ -883,7 +824,7 @@ namespace randomx { | |||
| 
 | ||||
| 	void JitCompilerX86::handleCondition(Instruction& instr, int i) { | ||||
| 		const int shift = instr.getModShift(); | ||||
| 		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; | ||||
| 		const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift; | ||||
| 		int reg = getConditionRegister(); | ||||
| 		int target = registerUsage[reg] + 1; | ||||
| 		emit(REX_ADD_I); | ||||
|  | @ -900,7 +841,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_COND_R(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| #if RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		emit(XOR_ECX_ECX); | ||||
|  | @ -914,40 +855,15 @@ namespace randomx { | |||
| 		emitByte(0xc1 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_COND_M(Instruction& instr, int i) { | ||||
| #ifdef RANDOMX_JUMP | ||||
| 		handleCondition(instr, i); | ||||
| #endif | ||||
| 		emit(XOR_ECX_ECX); | ||||
| 		genAddressReg(instr); | ||||
| 		emit(REX_CMP_M32I); | ||||
| 		emit32(instr.getImm32()); | ||||
| 		emitByte(0x0f); | ||||
| 		emitByte(condition(instr)); | ||||
| 		emitByte(0xc1); | ||||
| 		emit(REX_ADD_RM); | ||||
| 		emitByte(0xc1 + 8 * instr.dst); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_ISTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr); | ||||
| 		//if (instr.getModCond())
 | ||||
| 		emit(REX_MOV_MR); | ||||
| 		//else
 | ||||
| 		//	emit(MOVNTI);
 | ||||
| 		emitByte(0x04 + 8 * instr.src); | ||||
| 		emitByte(0x06); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_FSTORE(Instruction& instr, int i) { | ||||
| 		genAddressRegDst(instr, true); | ||||
| 		emit(MOVAPD); | ||||
| 		emitByte(0x04 + 8 * instr.src); | ||||
| 		emitByte(0x06); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_NOP(Instruction& instr, int i) { | ||||
| 		emitByte(0x90); | ||||
| 		emit(NOP1); | ||||
| 	} | ||||
| 
 | ||||
| #include "instruction_weights.hpp" | ||||
|  |  | |||
|  | @ -110,43 +110,36 @@ namespace randomx { | |||
| 			codePos += count; | ||||
| 		} | ||||
| 
 | ||||
| 		void  h_IADD_RS(Instruction&, int); | ||||
| 		void  h_IADD_M(Instruction&, int); | ||||
| 		void  h_IADD_RC(Instruction&, int); | ||||
| 		void  h_ISUB_R(Instruction&, int); | ||||
| 		void  h_ISUB_M(Instruction&, int); | ||||
| 		void  h_IMUL_9C(Instruction&, int); | ||||
| 		void  h_IMUL_R(Instruction&, int); | ||||
| 		void  h_IMUL_M(Instruction&, int); | ||||
| 		void  h_IMULH_R(Instruction&, int); | ||||
| 		void  h_IMULH_M(Instruction&, int); | ||||
| 		void  h_ISMULH_R(Instruction&, int); | ||||
| 		void  h_ISMULH_M(Instruction&, int); | ||||
| 		void  h_IMUL_RCP(Instruction&, int); | ||||
| 		void  h_ISDIV_C(Instruction&, int); | ||||
| 		void  h_INEG_R(Instruction&, int); | ||||
| 		void  h_IXOR_R(Instruction&, int); | ||||
| 		void  h_IXOR_M(Instruction&, int); | ||||
| 		void  h_IROR_R(Instruction&, int); | ||||
| 		void  h_IROL_R(Instruction&, int); | ||||
| 		void  h_ISWAP_R(Instruction&, int); | ||||
| 		void  h_FSWAP_R(Instruction&, int); | ||||
| 		void  h_FADD_R(Instruction&, int); | ||||
| 		void  h_FADD_M(Instruction&, int); | ||||
| 		void  h_FSUB_R(Instruction&, int); | ||||
| 		void  h_FSUB_M(Instruction&, int); | ||||
| 		void  h_FSCAL_R(Instruction&, int); | ||||
| 		void  h_FMUL_R(Instruction&, int); | ||||
| 		void  h_FMUL_M(Instruction&, int); | ||||
| 		void  h_FDIV_R(Instruction&, int); | ||||
| 		void  h_FDIV_M(Instruction&, int); | ||||
| 		void  h_FSQRT_R(Instruction&, int); | ||||
| 		void  h_COND_R(Instruction&, int); | ||||
| 		void  h_COND_M(Instruction&, int); | ||||
| 		void  h_CFROUND(Instruction&, int); | ||||
| 		void  h_ISTORE(Instruction&, int); | ||||
| 		void  h_FSTORE(Instruction&, int); | ||||
| 		void  h_NOP(Instruction&, int); | ||||
| 		void h_IADD_RS(Instruction&, int); | ||||
| 		void h_IADD_M(Instruction&, int); | ||||
| 		void h_ISUB_R(Instruction&, int); | ||||
| 		void h_ISUB_M(Instruction&, int); | ||||
| 		void h_IMUL_R(Instruction&, int); | ||||
| 		void h_IMUL_M(Instruction&, int); | ||||
| 		void h_IMULH_R(Instruction&, int); | ||||
| 		void h_IMULH_M(Instruction&, int); | ||||
| 		void h_ISMULH_R(Instruction&, int); | ||||
| 		void h_ISMULH_M(Instruction&, int); | ||||
| 		void h_IMUL_RCP(Instruction&, int); | ||||
| 		void h_INEG_R(Instruction&, int); | ||||
| 		void h_IXOR_R(Instruction&, int); | ||||
| 		void h_IXOR_M(Instruction&, int); | ||||
| 		void h_IROR_R(Instruction&, int); | ||||
| 		void h_IROL_R(Instruction&, int); | ||||
| 		void h_ISWAP_R(Instruction&, int); | ||||
| 		void h_FSWAP_R(Instruction&, int); | ||||
| 		void h_FADD_R(Instruction&, int); | ||||
| 		void h_FADD_M(Instruction&, int); | ||||
| 		void h_FSUB_R(Instruction&, int); | ||||
| 		void h_FSUB_M(Instruction&, int); | ||||
| 		void h_FSCAL_R(Instruction&, int); | ||||
| 		void h_FMUL_R(Instruction&, int); | ||||
| 		void h_FDIV_M(Instruction&, int); | ||||
| 		void h_FSQRT_R(Instruction&, int); | ||||
| 		void h_COND_R(Instruction&, int); | ||||
| 		void h_CFROUND(Instruction&, int); | ||||
| 		void h_ISTORE(Instruction&, int); | ||||
| 		void h_NOP(Instruction&, int); | ||||
| 	}; | ||||
| 
 | ||||
| } | ||||
|  | @ -31,7 +31,6 @@ | |||
| .global DECL(randomx_program_loop_load) | ||||
| .global DECL(randomx_program_start) | ||||
| .global DECL(randomx_program_read_dataset) | ||||
| .global DECL(randomx_program_read_dataset_light) | ||||
| .global DECL(randomx_program_read_dataset_sshash_init) | ||||
| .global DECL(randomx_program_read_dataset_sshash_fin) | ||||
| .global DECL(randomx_program_loop_store) | ||||
|  | @ -66,9 +65,6 @@ DECL(randomx_program_start): | |||
| DECL(randomx_program_read_dataset): | ||||
| 	#include "asm/program_read_dataset.inc" | ||||
| 
 | ||||
| DECL(randomx_program_read_dataset_light): | ||||
| 	#include "asm/program_read_dataset_light.inc" | ||||
| 
 | ||||
| DECL(randomx_program_read_dataset_sshash_init): | ||||
| 	#include "asm/program_read_dataset_sshash_init.inc" | ||||
| 
 | ||||
|  |  | |||
|  | @ -24,7 +24,6 @@ PUBLIC randomx_program_loop_begin | |||
| PUBLIC randomx_program_loop_load | ||||
| PUBLIC randomx_program_start | ||||
| PUBLIC randomx_program_read_dataset | ||||
| PUBLIC randomx_program_read_dataset_light | ||||
| PUBLIC randomx_program_read_dataset_sshash_init | ||||
| PUBLIC randomx_program_read_dataset_sshash_fin | ||||
| PUBLIC randomx_dataset_init | ||||
|  | @ -62,10 +61,6 @@ randomx_program_read_dataset PROC | |||
| 	include asm/program_read_dataset.inc | ||||
| randomx_program_read_dataset ENDP | ||||
| 
 | ||||
| randomx_program_read_dataset_light PROC | ||||
| 	include asm/program_read_dataset_light.inc | ||||
| randomx_program_read_dataset_light ENDP | ||||
| 
 | ||||
| randomx_program_read_dataset_sshash_init PROC | ||||
| 	include asm/program_read_dataset_sshash_init.inc | ||||
| randomx_program_read_dataset_sshash_init ENDP | ||||
|  |  | |||
|  | @ -25,7 +25,6 @@ extern "C" { | |||
| 	void randomx_program_loop_load(); | ||||
| 	void randomx_program_start(); | ||||
| 	void randomx_program_read_dataset(); | ||||
| 	void randomx_program_read_dataset_light(); | ||||
| 	void randomx_program_read_dataset_sshash_init(); | ||||
| 	void randomx_program_read_dataset_sshash_fin(); | ||||
| 	void randomx_program_loop_store(); | ||||
|  |  | |||
|  | @ -76,22 +76,6 @@ void randomx_vm::initialize() { | |||
| 	store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240); | ||||
| } | ||||
| 
 | ||||
| //TODO
 | ||||
| std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) { | ||||
| 	for (int i = 0; i < randomx::RegistersCount; ++i) | ||||
| 		os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec; | ||||
| 	for (int i = 0; i < 4; ++i) | ||||
| 		os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl | ||||
| 		<< "   = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec; | ||||
| 	for (int i = 0; i < 4; ++i) | ||||
| 		os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl | ||||
| 		<< "   = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec; | ||||
| 	for (int i = 0; i < 4; ++i) | ||||
| 		os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl | ||||
| 		<< "   = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec; | ||||
| 	return os; | ||||
| } | ||||
| 
 | ||||
| namespace randomx { | ||||
| 
 | ||||
| 	alignas(16) volatile static __m128i aesDummy; | ||||
|  |  | |||
|  | @ -17,10 +17,6 @@ You should have received a copy of the GNU General Public License | |||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| //#define TRACE
 | ||||
| //#define FPUCHECK
 | ||||
| #define RANDOMX_JUMP | ||||
| 
 | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <stdexcept> | ||||
|  | @ -33,12 +29,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "intrin_portable.h" | ||||
| #include "reciprocal.h" | ||||
| 
 | ||||
| #ifdef FPUCHECK | ||||
| constexpr bool fpuCheck = true; | ||||
| #else | ||||
| constexpr bool fpuCheck = false; | ||||
| #endif | ||||
| 
 | ||||
| namespace randomx { | ||||
| 
 | ||||
| 	static int_reg_t Zero = 0; | ||||
|  | @ -53,49 +43,16 @@ namespace randomx { | |||
| 	void InterpretedVm<Allocator, softAes>::run(void* seed) { | ||||
| 		VmBase<Allocator, softAes>::generateProgram(seed); | ||||
| 		randomx_vm::initialize(); | ||||
| 		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { | ||||
| 			program(i).src %= RegistersCount; | ||||
| 			program(i).dst %= RegistersCount; | ||||
| 		} | ||||
| 		execute(); | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) { | ||||
| 			executeBytecode(ic, r, f, e, a); | ||||
| 	void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) { | ||||
| 		for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) { | ||||
| 			executeBytecode(pc, r, f, e, a); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	static void print(int_reg_t r) { | ||||
| 		std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	static void print(__m128d f) { | ||||
| 		uint64_t lo = *(((uint64_t*)&f) + 0); | ||||
| 		uint64_t hi = *(((uint64_t*)&f) + 1); | ||||
| 		std::cout << std::hex << std::setw(16) << std::setfill('0') << hi << '-' << std::hex << std::setw(16) << std::setfill('0') << lo << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	static void printState(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		for (int i = 0; i < 8; ++i) { | ||||
| 			std::cout << "r" << i << " = "; print(r[i]); | ||||
| 		} | ||||
| 		for (int i = 0; i < 4; ++i) { | ||||
| 			std::cout << "f" << i << " = "; print(f[i]); | ||||
| 		} | ||||
| 		for (int i = 0; i < 4; ++i) { | ||||
| 			std::cout << "e" << i << " = "; print(e[i]); | ||||
| 		} | ||||
| 		for (int i = 0; i < 4; ++i) { | ||||
| 			std::cout << "a" << i << " = "; print(a[i]); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	static bool isDenormal(double x) { | ||||
| 		return std::fpclassify(x) == FP_SUBNORMAL; | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	FORCE_INLINE void* InterpretedVm<Allocator, softAes>::getScratchpadAddress(InstructionByteCode& ibc) { | ||||
| 		uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask; | ||||
|  | @ -113,9 +70,8 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[ic]; | ||||
| 		if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); | ||||
| 	void InterpretedVm<Allocator, softAes>::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) { | ||||
| 		auto& ibc = byteCode[pc]; | ||||
| 		switch (ibc.type) | ||||
| 		{ | ||||
| 			case InstructionType::IADD_RS: { | ||||
|  | @ -225,11 +181,11 @@ namespace randomx { | |||
| 			} break; | ||||
| 
 | ||||
| 			case InstructionType::COND_R: { | ||||
| #ifdef RANDOMX_JUMP | ||||
| #if RANDOMX_JUMP | ||||
| 				*ibc.creg += (1 << ibc.shift); | ||||
| 				const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift; | ||||
| 				const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift; | ||||
| 				if ((*ibc.creg & conditionMask) == 0) { | ||||
| 					ic = ibc.target; | ||||
| 					pc = ibc.target; | ||||
| 					break; | ||||
| 				} | ||||
| #endif | ||||
|  | @ -251,50 +207,23 @@ namespace randomx { | |||
| 			default: | ||||
| 				UNREACHABLE; | ||||
| 		} | ||||
| 		if (trace && ibc.type != InstructionType::NOP) { | ||||
| 			if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32) | ||||
| 				print(*ibc.idst); | ||||
| 			else //if(ibc.type >= 20 && ibc.type <= 30)
 | ||||
| 				print(0); | ||||
| 		} | ||||
| #ifdef FPUCHECK | ||||
| 		if (ibc.type >= 26 && ibc.type <= 30) { | ||||
| 			double lo = *(((double*)ibc.fdst) + 0); | ||||
| 			double hi = *(((double*)ibc.fdst) + 1); | ||||
| 			if (lo <= 0 || hi <= 0) { | ||||
| 				std::stringstream ss; | ||||
| 				ss << "Underflow in operation " << ibc.type; | ||||
| 				printState(r, f, e, a); | ||||
| 				throw std::runtime_error(ss.str()); | ||||
| 			} | ||||
| 		} | ||||
| #endif | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedVm<Allocator, softAes>::execute() { | ||||
| 		int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; | ||||
| 		__m128d f[4]; | ||||
| 		__m128d e[4]; | ||||
| 		__m128d a[4]; | ||||
| 		int_reg_t r[RegistersCount] = { 0 }; | ||||
| 		__m128d f[RegisterCountFlt]; | ||||
| 		__m128d e[RegisterCountFlt]; | ||||
| 		__m128d a[RegisterCountFlt]; | ||||
| 
 | ||||
| 		a[0] = _mm_load_pd(®.a[0].lo); | ||||
| 		a[1] = _mm_load_pd(®.a[1].lo); | ||||
| 		a[2] = _mm_load_pd(®.a[2].lo); | ||||
| 		a[3] = _mm_load_pd(®.a[3].lo); | ||||
| 		for(unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 			a[i] = _mm_load_pd(®.a[i].lo); | ||||
| 
 | ||||
| 		precompileProgram(r, f, e, a); | ||||
| 
 | ||||
| 		uint32_t spAddr0 = mem.mx; | ||||
| 		uint32_t spAddr1 = mem.ma; | ||||
| 
 | ||||
| 		if (trace) { | ||||
| 			std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl; | ||||
| 			std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; | ||||
| 			std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; | ||||
| 			printState(r, f, e, a); | ||||
| 		} | ||||
| 
 | ||||
| 		for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { | ||||
| 			uint64_t spMix = r[config.readReg0] ^ r[config.readReg1]; | ||||
| 			spAddr0 ^= spMix; | ||||
|  | @ -302,31 +231,14 @@ namespace randomx { | |||
| 			spAddr1 ^= spMix >> 32; | ||||
| 			spAddr1 &= ScratchpadL3Mask64; | ||||
| 			 | ||||
| 			r[0] ^= load64(scratchpad + spAddr0 + 0); | ||||
| 			r[1] ^= load64(scratchpad + spAddr0 + 8); | ||||
| 			r[2] ^= load64(scratchpad + spAddr0 + 16); | ||||
| 			r[3] ^= load64(scratchpad + spAddr0 + 24); | ||||
| 			r[4] ^= load64(scratchpad + spAddr0 + 32); | ||||
| 			r[5] ^= load64(scratchpad + spAddr0 + 40); | ||||
| 			r[6] ^= load64(scratchpad + spAddr0 + 48); | ||||
| 			r[7] ^= load64(scratchpad + spAddr0 + 56); | ||||
| 			for (unsigned i = 0; i < RegistersCount; ++i) | ||||
| 				r[i] ^= load64(scratchpad + spAddr0 + 8 * i); | ||||
| 
 | ||||
| 			f[0] = load_cvt_i32x2(scratchpad + spAddr1 + 0); | ||||
| 			f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8); | ||||
| 			f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16); | ||||
| 			f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24); | ||||
| 			e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32)); | ||||
| 			e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40)); | ||||
| 			e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48)); | ||||
| 			e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56)); | ||||
| 			for (unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 				f[i] = load_cvt_i32x2(scratchpad + spAddr1 + 8 * i); | ||||
| 
 | ||||
| 			if (trace) { | ||||
| 				std::cout << "iteration " << std::dec << ic << std::endl; | ||||
| 				std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; | ||||
| 				std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; | ||||
| 				printState(r, f, e, a); | ||||
| 				std::cout << "-----------------------------------" << std::endl; | ||||
| 			} | ||||
| 			for (unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 				e[i] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i))); | ||||
| 
 | ||||
| 			executeBytecode(r, f, e, a); | ||||
| 
 | ||||
|  | @ -335,72 +247,33 @@ namespace randomx { | |||
| 			datasetRead(datasetOffset + mem.ma, r); | ||||
| 			std::swap(mem.mx, mem.ma); | ||||
| 
 | ||||
| 			if (trace) { | ||||
| 				std::cout << "iteration " << std::dec << ic << std::endl; | ||||
| 				std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; | ||||
| 				std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; | ||||
| 				printState(r, f, e, a); | ||||
| 				std::cout << "===================================" << std::endl; | ||||
| 			} | ||||
| 			for (unsigned i = 0; i < RegistersCount; ++i) | ||||
| 				store64(scratchpad + spAddr1 + 8 * i, r[i]); | ||||
| 
 | ||||
| 			store64(scratchpad + spAddr1 + 0, r[0]); | ||||
| 			store64(scratchpad + spAddr1 + 8, r[1]); | ||||
| 			store64(scratchpad + spAddr1 + 16, r[2]); | ||||
| 			store64(scratchpad + spAddr1 + 24, r[3]); | ||||
| 			store64(scratchpad + spAddr1 + 32, r[4]); | ||||
| 			store64(scratchpad + spAddr1 + 40, r[5]); | ||||
| 			store64(scratchpad + spAddr1 + 48, r[6]); | ||||
| 			store64(scratchpad + spAddr1 + 56, r[7]); | ||||
| 			for (unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 				f[i] = _mm_xor_pd(f[i], e[i]); | ||||
| 
 | ||||
| 			f[0] = _mm_xor_pd(f[0], e[0]); | ||||
| 			f[1] = _mm_xor_pd(f[1], e[1]); | ||||
| 			f[2] = _mm_xor_pd(f[2], e[2]); | ||||
| 			f[3] = _mm_xor_pd(f[3], e[3]); | ||||
| 
 | ||||
| #ifdef FPUCHECK | ||||
| 			for(int i = 0; i < 4; ++i) { | ||||
| 				double lo = *(((double*)&f[i]) + 0); | ||||
| 				double hi = *(((double*)&f[i]) + 1); | ||||
| 				if (isDenormal(lo) || isDenormal(hi)) { | ||||
| 					std::stringstream ss; | ||||
| 					ss << "Denormal f" << i; | ||||
| 					throw std::runtime_error(ss.str()); | ||||
| 				} | ||||
| 			} | ||||
| #endif | ||||
| 
 | ||||
| 			_mm_store_pd((double*)(scratchpad + spAddr0 + 0), f[0]); | ||||
| 			_mm_store_pd((double*)(scratchpad + spAddr0 + 16), f[1]); | ||||
| 			_mm_store_pd((double*)(scratchpad + spAddr0 + 32), f[2]); | ||||
| 			_mm_store_pd((double*)(scratchpad + spAddr0 + 48), f[3]); | ||||
| 			for (unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 				_mm_store_pd((double*)(scratchpad + spAddr0 + 16 * i), f[i]); | ||||
| 
 | ||||
| 			spAddr0 = 0; | ||||
| 			spAddr1 = 0; | ||||
| 		} | ||||
| 
 | ||||
| 		store64(®.r[0], r[0]); | ||||
| 		store64(®.r[1], r[1]); | ||||
| 		store64(®.r[2], r[2]); | ||||
| 		store64(®.r[3], r[3]); | ||||
| 		store64(®.r[4], r[4]); | ||||
| 		store64(®.r[5], r[5]); | ||||
| 		store64(®.r[6], r[6]); | ||||
| 		store64(®.r[7], r[7]); | ||||
| 		for (unsigned i = 0; i < RegistersCount; ++i) | ||||
| 			store64(®.r[i], r[i]); | ||||
| 
 | ||||
| 		_mm_store_pd(®.f[0].lo, f[0]); | ||||
| 		_mm_store_pd(®.f[1].lo, f[1]); | ||||
| 		_mm_store_pd(®.f[2].lo, f[2]); | ||||
| 		_mm_store_pd(®.f[3].lo, f[3]); | ||||
| 		_mm_store_pd(®.e[0].lo, e[0]); | ||||
| 		_mm_store_pd(®.e[1].lo, e[1]); | ||||
| 		_mm_store_pd(®.e[2].lo, e[2]); | ||||
| 		_mm_store_pd(®.e[3].lo, e[3]); | ||||
| 		for (unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 			_mm_store_pd(®.f[i].lo, f[i]); | ||||
| 
 | ||||
| 		for (unsigned i = 0; i < RegisterCountFlt; ++i) | ||||
| 			_mm_store_pd(®.e[i].lo, e[i]); | ||||
| 	} | ||||
| 
 | ||||
| 	static int getConditionRegister(int(®isterUsage)[8]) { | ||||
| 	static int getConditionRegister(int(®isterUsage)[RegistersCount]) { | ||||
| 		int min = INT_MAX; | ||||
| 		int minIndex; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 		for (unsigned i = 0; i < RegistersCount; ++i) { | ||||
| 			if (registerUsage[i] < min) { | ||||
| 				min = registerUsage[i]; | ||||
| 				minIndex = i; | ||||
|  | @ -410,7 +283,7 @@ namespace randomx { | |||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[8]) { | ||||
| 	void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[RegistersCount]) { | ||||
| 		uint64_t* datasetLine = (uint64_t*)(mem.memory + address); | ||||
| 		for (int i = 0; i < RegistersCount; ++i) | ||||
| 			r[i] ^= datasetLine[i]; | ||||
|  | @ -419,9 +292,9 @@ namespace randomx { | |||
| #include "instruction_weights.hpp" | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		int registerUsage[8]; | ||||
| 		for (unsigned i = 0; i < 8; ++i) { | ||||
| 	void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) { | ||||
| 		int registerUsage[RegistersCount]; | ||||
| 		for (unsigned i = 0; i < RegistersCount; ++i) { | ||||
| 			registerUsage[i] = -1; | ||||
| 		} | ||||
| 		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { | ||||
|  | @ -443,7 +316,7 @@ namespace randomx { | |||
| 						ibc.shift = instr.getModMem(); | ||||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IADD_M) { | ||||
|  | @ -452,7 +325,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::IADD_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 					if (src != dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
|  | @ -460,7 +333,7 @@ namespace randomx { | |||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISUB_R) { | ||||
|  | @ -475,7 +348,7 @@ namespace randomx { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISUB_M) { | ||||
|  | @ -484,7 +357,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::ISUB_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 					if (src != dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
|  | @ -492,7 +365,7 @@ namespace randomx { | |||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_R) { | ||||
|  | @ -507,7 +380,7 @@ namespace randomx { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_M) { | ||||
|  | @ -516,7 +389,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::IMUL_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 					if (src != dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
|  | @ -524,7 +397,7 @@ namespace randomx { | |||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMULH_R) { | ||||
|  | @ -533,7 +406,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::IMULH_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMULH_M) { | ||||
|  | @ -542,7 +415,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::IMULH_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 					if (src != dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
|  | @ -550,7 +423,7 @@ namespace randomx { | |||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISMULH_R) { | ||||
|  | @ -559,7 +432,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::ISMULH_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISMULH_M) { | ||||
|  | @ -568,7 +441,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::ISMULH_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 					if (src != dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
|  | @ -576,7 +449,7 @@ namespace randomx { | |||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IMUL_RCP) { | ||||
|  | @ -587,7 +460,7 @@ namespace randomx { | |||
| 						ibc.idst = &r[dst]; | ||||
| 						ibc.imm = randomx_reciprocal(divisor); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 						registerUsage[instr.dst] = i; | ||||
| 						registerUsage[dst] = i; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.type = InstructionType::NOP; | ||||
|  | @ -598,7 +471,7 @@ namespace randomx { | |||
| 					auto dst = instr.dst % RegistersCount; | ||||
| 					ibc.type = InstructionType::INEG_R; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IXOR_R) { | ||||
|  | @ -613,7 +486,7 @@ namespace randomx { | |||
| 						ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IXOR_M) { | ||||
|  | @ -622,7 +495,7 @@ namespace randomx { | |||
| 					ibc.type = InstructionType::IXOR_M; | ||||
| 					ibc.idst = &r[dst]; | ||||
| 					ibc.imm = signExtend2sCompl(instr.getImm32()); | ||||
| 					if (instr.src != instr.dst) { | ||||
| 					if (src != dst) { | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); | ||||
| 					} | ||||
|  | @ -630,7 +503,7 @@ namespace randomx { | |||
| 						ibc.isrc = &Zero; | ||||
| 						ibc.memMask = ScratchpadL3Mask; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IROR_R) { | ||||
|  | @ -645,7 +518,7 @@ namespace randomx { | |||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(IROL_R) { | ||||
|  | @ -660,7 +533,7 @@ namespace randomx { | |||
| 						ibc.imm = instr.getImm32(); | ||||
| 						ibc.isrc = &ibc.imm; | ||||
| 					} | ||||
| 					registerUsage[instr.dst] = i; | ||||
| 					registerUsage[dst] = i; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(ISWAP_R) { | ||||
|  | @ -670,8 +543,8 @@ namespace randomx { | |||
| 						ibc.idst = &r[dst]; | ||||
| 						ibc.isrc = &r[src]; | ||||
| 						ibc.type = InstructionType::ISWAP_R; | ||||
| 						registerUsage[instr.dst] = i; | ||||
| 						registerUsage[instr.src] = i; | ||||
| 						registerUsage[dst] = i; | ||||
| 						registerUsage[src] = i; | ||||
| 					} | ||||
| 					else { | ||||
| 						ibc.type = InstructionType::NOP; | ||||
|  | @ -681,23 +554,23 @@ namespace randomx { | |||
| 				CASE_REP(FSWAP_R) { | ||||
| 					auto dst = instr.dst % RegistersCount; | ||||
| 					ibc.type = InstructionType::FSWAP_R; | ||||
| 					if (dst < 4) | ||||
| 					if (dst < RegisterCountFlt) | ||||
| 						ibc.fdst = &f[dst]; | ||||
| 					else | ||||
| 						ibc.fdst = &e[dst - 4]; | ||||
| 						ibc.fdst = &e[dst - RegisterCountFlt]; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FADD_R) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto src = instr.src % 4; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					auto src = instr.src % RegisterCountFlt; | ||||
| 					ibc.type = InstructionType::FADD_R; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.fsrc = &a[src]; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FADD_M) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto src = instr.src % 8; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::FADD_M; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
|  | @ -706,16 +579,16 @@ namespace randomx { | |||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSUB_R) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto src = instr.src % 4; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					auto src = instr.src % RegisterCountFlt; | ||||
| 					ibc.type = InstructionType::FSUB_R; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.fsrc = &a[src]; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSUB_M) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto src = instr.src % 8; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::FSUB_M; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
|  | @ -724,22 +597,22 @@ namespace randomx { | |||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSCAL_R) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					ibc.fdst = &f[dst]; | ||||
| 					ibc.type = InstructionType::FSCAL_R; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FMUL_R) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto src = instr.src % 4; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					auto src = instr.src % RegisterCountFlt; | ||||
| 					ibc.type = InstructionType::FMUL_R; | ||||
| 					ibc.fdst = &e[dst]; | ||||
| 					ibc.fsrc = &a[src]; | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FDIV_M) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto src = instr.src % 8; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.type = InstructionType::FDIV_M; | ||||
| 					ibc.fdst = &e[dst]; | ||||
| 					ibc.isrc = &r[src]; | ||||
|  | @ -748,7 +621,7 @@ namespace randomx { | |||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(FSQRT_R) { | ||||
| 					auto dst = instr.dst % 4; | ||||
| 					auto dst = instr.dst % RegisterCountFlt; | ||||
| 					ibc.type = InstructionType::FSQRT_R; | ||||
| 					ibc.fdst = &e[dst]; | ||||
| 				} break; | ||||
|  | @ -766,13 +639,13 @@ namespace randomx { | |||
| 					ibc.target = registerUsage[reg]; | ||||
| 					ibc.shift = instr.getModShift(); | ||||
| 					ibc.creg = &r[reg]; | ||||
| 					for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
 | ||||
| 					for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
 | ||||
| 						registerUsage[j] = i; | ||||
| 					} | ||||
| 				} break; | ||||
| 
 | ||||
| 				CASE_REP(CFROUND) { | ||||
| 					auto src = instr.src % 8; | ||||
| 					auto src = instr.src % RegistersCount; | ||||
| 					ibc.isrc = &r[src]; | ||||
| 					ibc.type = InstructionType::CFROUND; | ||||
| 					ibc.imm = instr.getImm32() & 63; | ||||
|  |  | |||
|  | @ -71,12 +71,12 @@ namespace randomx { | |||
| 		void run(void* seed) override; | ||||
| 		void setDataset(randomx_dataset* dataset) override; | ||||
| 	protected: | ||||
| 		virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]); | ||||
| 		virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[RegistersCount]); | ||||
| 	private: | ||||
| 		void execute(); | ||||
| 		void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]); | ||||
| 		void executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]); | ||||
| 		void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]); | ||||
| 		void* getScratchpadAddress(InstructionByteCode& ibc); | ||||
| 		__m128d maskRegisterExponentMantissa(__m128d); | ||||
| 
 | ||||
|  |  | |||
|  | @ -106,7 +106,7 @@ | |||
|       <Optimization>MaxSpeed</Optimization> | ||||
|       <FunctionLevelLinking>true</FunctionLevelLinking> | ||||
|       <IntrinsicFunctions>true</IntrinsicFunctions> | ||||
|       <SDLCheck>true</SDLCheck> | ||||
|       <SDLCheck>false</SDLCheck> | ||||
|       <ConformanceMode>true</ConformanceMode> | ||||
|     </ClCompile> | ||||
|     <Link> | ||||
|  |  | |||
|  | @ -26,20 +26,20 @@ | |||
|   </PropertyGroup> | ||||
|   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> | ||||
|   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> | ||||
|     <ConfigurationType>Application</ConfigurationType> | ||||
|     <ConfigurationType>StaticLibrary</ConfigurationType> | ||||
|     <UseDebugLibraries>true</UseDebugLibraries> | ||||
|     <PlatformToolset>v141</PlatformToolset> | ||||
|     <CharacterSet>MultiByte</CharacterSet> | ||||
|   </PropertyGroup> | ||||
|   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> | ||||
|     <ConfigurationType>Application</ConfigurationType> | ||||
|     <ConfigurationType>StaticLibrary</ConfigurationType> | ||||
|     <UseDebugLibraries>false</UseDebugLibraries> | ||||
|     <PlatformToolset>v141</PlatformToolset> | ||||
|     <WholeProgramOptimization>true</WholeProgramOptimization> | ||||
|     <CharacterSet>MultiByte</CharacterSet> | ||||
|   </PropertyGroup> | ||||
|   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> | ||||
|     <ConfigurationType>Application</ConfigurationType> | ||||
|     <ConfigurationType>StaticLibrary</ConfigurationType> | ||||
|     <UseDebugLibraries>true</UseDebugLibraries> | ||||
|     <PlatformToolset>v141</PlatformToolset> | ||||
|     <CharacterSet>MultiByte</CharacterSet> | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue