mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	More refactoring
This commit is contained in:
		
							parent
							
								
									9404516dd8
								
							
						
					
					
						commit
						8c37d4aac3
					
				
					 28 changed files with 347 additions and 453 deletions
				
			
		|  | @ -23,7 +23,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "common.hpp" | ||||
| #include "reciprocal.h" | ||||
| #include "Program.hpp" | ||||
| #include "./LightProgramGenerator.hpp" | ||||
| #include "superscalarGenerator.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
|  | @ -62,7 +62,7 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateAsm(LightProgram& prog) { | ||||
| 	void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) { | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		asmCode << "ALIGN 16" << std::endl; | ||||
| 		for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
|  | @ -126,7 +126,7 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::generateC(LightProgram& prog) { | ||||
| 	void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) { | ||||
| 		asmCode.str(std::string()); //clear
 | ||||
| 		asmCode << "#include <stdint.h>" << std::endl; | ||||
| 		asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl; | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	class Program; | ||||
| 	class LightProgram; | ||||
| 	class SuperscalarProgram; | ||||
| 	class AssemblyGeneratorX86; | ||||
| 
 | ||||
| 	typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int); | ||||
|  | @ -35,8 +35,8 @@ namespace RandomX { | |||
| 	class AssemblyGeneratorX86 { | ||||
| 	public: | ||||
| 		void generateProgram(Program& prog); | ||||
| 		void generateAsm(LightProgram& prog); | ||||
| 		void generateC(LightProgram& prog); | ||||
| 		void generateAsm(SuperscalarProgram& prog); | ||||
| 		void generateC(SuperscalarProgram& prog); | ||||
| 		void printCode(std::ostream& os) { | ||||
| 			os << asmCode.rdbuf(); | ||||
| 		} | ||||
|  |  | |||
							
								
								
									
										51
									
								
								src/Blake2Generator.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								src/Blake2Generator.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,51 @@ | |||
| /*
 | ||||
| Copyright (c) 2019 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include "blake2/blake2.h" | ||||
| #include "blake2/endian.h" | ||||
| #include "Blake2Generator.hpp" | ||||
| #include "common.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) { | ||||
| 		memset(data, 0, sizeof(data)); | ||||
| 		memcpy(data, seed, SeedSize); | ||||
| 		store32(&data[60], nonce); | ||||
| 	} | ||||
| 
 | ||||
| 	uint8_t Blake2Generator::getByte() { | ||||
| 		checkData(1); | ||||
| 		return data[dataIndex++]; | ||||
| 	} | ||||
| 
 | ||||
| 	uint32_t Blake2Generator::getInt32() { | ||||
| 		checkData(4); | ||||
| 		auto ret = load32(&data[dataIndex]); | ||||
| 		dataIndex += 4; | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	void Blake2Generator::checkData(const size_t bytesNeeded) { | ||||
| 		if (dataIndex + bytesNeeded > sizeof(data)) { | ||||
| 			blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); | ||||
| 			dataIndex = 0; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										36
									
								
								src/Blake2Generator.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								src/Blake2Generator.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /*
 | ||||
| Copyright (c) 2019 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| #include <cstdint> | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	class Blake2Generator { | ||||
| 	public: | ||||
| 		Blake2Generator(const void* seed, int nonce); | ||||
| 		uint8_t getByte(); | ||||
| 		uint32_t getInt32(); | ||||
| 	private: | ||||
| 		uint8_t data[64]; | ||||
| 		size_t dataIndex; | ||||
| 
 | ||||
| 		void checkData(const size_t); | ||||
| 	}; | ||||
| } | ||||
|  | @ -24,7 +24,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 	void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
| 		if(superscalar) | ||||
|  | @ -32,8 +32,8 @@ namespace RandomX { | |||
| 		//datasetBasePtr = ds.dataset.memory;
 | ||||
| 	} | ||||
| 
 | ||||
| 	template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void CompiledLightVirtualMachine<superscalar>::initialize() { | ||||
|  |  | |||
|  | @ -39,7 +39,7 @@ namespace RandomX { | |||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 		CompiledLightVirtualMachine() {} | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 	}; | ||||
| } | ||||
|  | @ -29,7 +29,7 @@ namespace RandomX { | |||
| 	CompiledVirtualMachine::CompiledVirtualMachine() { | ||||
| 	} | ||||
| 
 | ||||
| 	void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 	void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
| 		datasetBasePtr = ds.dataset.memory; | ||||
|  |  | |||
|  | @ -42,7 +42,7 @@ namespace RandomX { | |||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 		CompiledVirtualMachine(); | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 		virtual void execute() override; | ||||
| 		void* getProgram() { | ||||
|  |  | |||
|  | @ -22,7 +22,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "InterpretedVirtualMachine.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Cache.hpp" | ||||
| #include "LightClientAsyncWorker.hpp" | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <stdexcept> | ||||
|  | @ -36,7 +35,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #ifdef STATS | ||||
| #include <algorithm> | ||||
| #endif | ||||
| #include "LightProgramGenerator.hpp" | ||||
| #include "superscalarGenerator.hpp" | ||||
| 
 | ||||
| #ifdef FPUCHECK | ||||
| constexpr bool fpuCheck = true; | ||||
|  | @ -47,7 +46,7 @@ constexpr bool fpuCheck = false; | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 	void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { | ||||
| 		mem.ds = ds; | ||||
| 		readDataset = &datasetReadLight; | ||||
| 		datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; | ||||
|  | @ -55,8 +54,8 @@ namespace RandomX { | |||
| 			precompileSuperscalar(programs); | ||||
| 	} | ||||
| 
 | ||||
| 	template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::initialize() { | ||||
|  | @ -475,7 +474,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector<uint64_t>& reciprocals) { | ||||
| 	void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals) { | ||||
| 		for (unsigned j = 0; j < prog.getSize(); ++j) { | ||||
| 			Instruction& instr = prog(j); | ||||
| 			switch (instr.opcode) | ||||
|  | @ -539,7 +538,7 @@ namespace RandomX { | |||
| 		Cache& cache = mem.ds.cache; | ||||
| 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
| 			mixBlock = getMixBlock(registerValue, cache); | ||||
| 			LightProgram& prog = superScalarPrograms[i]; | ||||
| 			SuperscalarProgram& prog = superScalarPrograms[i]; | ||||
| 			 | ||||
| 			executeSuperscalar(rl, prog, reciprocals); | ||||
| 
 | ||||
|  | @ -554,7 +553,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	template<bool superscalar> | ||||
| 	void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(LightProgram* programs) { | ||||
| 	void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(SuperscalarProgram* programs) { | ||||
| 		memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms)); | ||||
| 		reciprocals.clear(); | ||||
| 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { | ||||
|  |  | |||
|  | @ -70,17 +70,17 @@ namespace RandomX { | |||
| 		} | ||||
| 		InterpretedVirtualMachine(bool soft) : softAes(soft) {} | ||||
| 		~InterpretedVirtualMachine() {} | ||||
| 		void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; | ||||
| 		void initialize() override; | ||||
| 		void execute() override; | ||||
| 		static void executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector<uint64_t>& reciprocals); | ||||
| 		static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals); | ||||
| 	private: | ||||
| 		static InstructionHandler<superscalar> engine[256]; | ||||
| 		DatasetReadFunc readDataset; | ||||
| 		bool softAes; | ||||
| 		InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; | ||||
| 		std::vector<uint64_t> reciprocals; | ||||
| 		alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES]; | ||||
| 		alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES]; | ||||
| #ifdef STATS | ||||
| 		int count_ADD_64 = 0; | ||||
| 		int count_ADD_32 = 0; | ||||
|  | @ -128,7 +128,7 @@ namespace RandomX { | |||
| 		int datasetAccess[256] = { 0 }; | ||||
| #endif | ||||
| 		void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void precompileSuperscalar(LightProgram*); | ||||
| 		void precompileSuperscalar(SuperscalarProgram*); | ||||
| 		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); | ||||
| 		void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); | ||||
|  |  | |||
|  | @ -87,7 +87,7 @@ namespace RandomX { | |||
| 	*/ | ||||
| 
 | ||||
| #include "JitCompilerX86-static.hpp" | ||||
| #include "LightProgramGenerator.hpp" | ||||
| #include "superscalarGenerator.hpp" | ||||
| 
 | ||||
| #define NOP_TEST true | ||||
| 
 | ||||
|  | @ -261,16 +261,16 @@ namespace RandomX { | |||
| 	template void JitCompilerX86::generateProgramLight<false>(Program& prog); | ||||
| 
 | ||||
| 	template<size_t N> | ||||
| 	void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) { | ||||
| 	void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) { | ||||
| 		memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); | ||||
| 		codePos = superScalarHashOffset + codeSshInitSize; | ||||
| 		for (unsigned j = 0; j < N; ++j) { | ||||
| 			LightProgram& prog = programs[j]; | ||||
| 			SuperscalarProgram& prog = programs[j]; | ||||
| 			for (unsigned i = 0; i < prog.getSize(); ++i) { | ||||
| 				Instruction& instr = prog(i); | ||||
| 				instr.src %= RegistersCount; | ||||
| 				instr.dst %= RegistersCount; | ||||
| 				generateCode<LightProgram>(instr, i); | ||||
| 				generateCode<SuperscalarProgram>(instr, i); | ||||
| 			} | ||||
| 			emit(codeShhLoad, codeSshLoadSize); | ||||
| 			if (j < N - 1) { | ||||
|  | @ -290,7 +290,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 	void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 	void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); | ||||
| 
 | ||||
| 	void JitCompilerX86::generateDatasetInitCode() { | ||||
| 		memcpy(code, codeDatasetInit, datasetInitSize); | ||||
|  | @ -345,7 +345,7 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	template<> | ||||
| 	void JitCompilerX86::generateCode<LightProgram>(Instruction& instr, int i) { | ||||
| 	void JitCompilerX86::generateCode<SuperscalarProgram>(Instruction& instr, int i) { | ||||
| 		switch (instr.opcode) | ||||
| 		{ | ||||
| 		case RandomX::SuperscalarInstructionType::ISUB_R: | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| namespace RandomX { | ||||
| 
 | ||||
| 	class Program; | ||||
| 	class LightProgram; | ||||
| 	class SuperscalarProgram; | ||||
| 	class JitCompilerX86; | ||||
| 
 | ||||
| 	typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); | ||||
|  | @ -42,7 +42,7 @@ namespace RandomX { | |||
| 		template<bool superscalar> | ||||
| 		void generateProgramLight(Program&); | ||||
| 		template<size_t N> | ||||
| 		void generateSuperScalarHash(LightProgram (&programs)[N]); | ||||
| 		void generateSuperScalarHash(SuperscalarProgram (&programs)[N]); | ||||
| 		ProgramFunc getProgramFunc() { | ||||
| 			return (ProgramFunc)code; | ||||
| 		} | ||||
|  |  | |||
|  | @ -1,113 +0,0 @@ | |||
| /*
 | ||||
| Copyright (c) 2019 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include "LightClientAsyncWorker.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Cache.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	LightClientAsyncWorker::LightClientAsyncWorker(const Cache& c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false),  | ||||
| #ifdef TRACE | ||||
| 		sw(true), | ||||
| #endif | ||||
| 		workerThread(&LightClientAsyncWorker::runWorker, this) { | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	void LightClientAsyncWorker::prepareBlock(addr_t addr) { | ||||
| #ifdef TRACE | ||||
| 		std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr / CacheLineSize << std::endl; | ||||
| #endif | ||||
| 		{ | ||||
| 			std::lock_guard<std::mutex> lk(mutex); | ||||
| 			startBlock = addr / CacheLineSize; | ||||
| 			blockCount = 1; | ||||
| 			output = currentLine.data(); | ||||
| 			hasWork = true; | ||||
| 		} | ||||
| #ifdef TRACE | ||||
| 		std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl; | ||||
| #endif | ||||
| 		notifier.notify_one(); | ||||
| 	} | ||||
| 
 | ||||
| 	const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) { | ||||
| #ifdef TRACE | ||||
| 		std::cout << sw.getElapsed() << ": getBlock-enter " << addr / CacheLineSize << std::endl; | ||||
| #endif | ||||
| 		uint32_t currentBlock = addr / CacheLineSize; | ||||
| 		if (currentBlock != startBlock || output != currentLine.data()) { | ||||
| 			initBlock(cache, (uint8_t*)currentLine.data(), currentBlock, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 		} | ||||
| 		else { | ||||
| 			sync(); | ||||
| 		} | ||||
| #ifdef TRACE | ||||
| 		std::cout << sw.getElapsed() << ": getBlock-return " << addr / CacheLineSize << std::endl; | ||||
| #endif | ||||
| 		return currentLine.data(); | ||||
| 	} | ||||
| 
 | ||||
| 	void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { | ||||
| #ifdef TRACE | ||||
| 		std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl; | ||||
| #endif | ||||
| 		{ | ||||
| 			std::lock_guard<std::mutex> lk(mutex); | ||||
| 			this->startBlock = startBlock; | ||||
| 			this->blockCount = blockCount; | ||||
| 			output = out; | ||||
| 			hasWork = true; | ||||
| 			notifier.notify_one(); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { | ||||
| 		for (uint32_t i = 0; i < blockCount; ++i) { | ||||
| 			initBlock(cache, (uint8_t*)out + CacheLineSize * i, startBlock + i, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void LightClientAsyncWorker::sync() { | ||||
| 		std::unique_lock<std::mutex> lk(mutex); | ||||
| 		notifier.wait(lk, [this] { return !hasWork; }); | ||||
| 	} | ||||
| 
 | ||||
| 	void LightClientAsyncWorker::runWorker() { | ||||
| #ifdef TRACE | ||||
| 		std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl; | ||||
| #endif | ||||
| 		for (;;) { | ||||
| 			std::unique_lock<std::mutex> lk(mutex); | ||||
| 			notifier.wait(lk, [this] { return hasWork; }); | ||||
| #ifdef TRACE | ||||
| 			std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl; | ||||
| #endif | ||||
| 			//getBlocks(output, startBlock, blockCount);
 | ||||
| 			initBlock(cache, (uint8_t*)output, startBlock, RANDOMX_CACHE_ACCESSES / 8); | ||||
| 			hasWork = false; | ||||
| #ifdef TRACE | ||||
| 			std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl; | ||||
| #endif | ||||
| 			lk.unlock(); | ||||
| 			notifier.notify_one(); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | @ -1,57 +0,0 @@ | |||
| /*
 | ||||
| Copyright (c) 2019 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| //#define TRACE
 | ||||
| #include "common.hpp" | ||||
| 
 | ||||
| #include <thread> | ||||
| #include <mutex> | ||||
| #include <condition_variable> | ||||
| #include <array> | ||||
| #ifdef TRACE | ||||
| #include "Stopwatch.hpp" | ||||
| #include <iostream> | ||||
| #endif | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	using DatasetLine = std::array<uint64_t, CacheLineSize / sizeof(uint64_t)>; | ||||
| 
 | ||||
| 	class LightClientAsyncWorker : public ILightClientAsyncWorker { | ||||
| 	public: | ||||
| 		LightClientAsyncWorker(const Cache&); | ||||
| 		void prepareBlock(addr_t) final; | ||||
| 		void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final; | ||||
| 		const uint64_t* getBlock(addr_t) final; | ||||
| 		void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final; | ||||
| 		void sync() final; | ||||
| 	private: | ||||
| 		void runWorker(); | ||||
| 		std::condition_variable notifier; | ||||
| 		std::mutex mutex; | ||||
| 		alignas(16) DatasetLine currentLine; | ||||
| 		void* output; | ||||
| 		uint32_t startBlock, blockCount; | ||||
| 		bool hasWork; | ||||
| #ifdef TRACE | ||||
| 		Stopwatch sw; | ||||
| #endif | ||||
| 		std::thread workerThread; | ||||
| 	}; | ||||
| } | ||||
|  | @ -1,58 +0,0 @@ | |||
| /*
 | ||||
| Copyright (c) 2019 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include "Program.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	//                             Intel Ivy Bridge reference
 | ||||
| 	namespace SuperscalarInstructionType {        //uOPs (decode)   execution ports         latency       code size
 | ||||
| 		constexpr int ISUB_R = 0;           //1               p015                    1               3
 | ||||
| 		constexpr int IXOR_R = 1;           //1               p015                    1               3
 | ||||
| 		constexpr int IADD_RS = 2;          //1               p01                     1               4
 | ||||
| 		constexpr int IMUL_R = 3;           //1               p1                      3               4
 | ||||
| 		constexpr int IROR_C = 4;           //1               p05                     1               4
 | ||||
| 		constexpr int IADD_C7 = 5;          //1               p015                    1               7
 | ||||
| 		constexpr int IXOR_C7 = 6;          //1               p015                    1               7
 | ||||
| 		constexpr int IADD_C8 = 7;          //1+0             p015                    1               8
 | ||||
| 		constexpr int IXOR_C8 = 8;          //1+0             p015                    1               8
 | ||||
| 		constexpr int IADD_C9 = 9;          //1+0             p015                    1               9
 | ||||
| 		constexpr int IXOR_C9 = 10;         //1+0             p015                    1               9
 | ||||
| 		constexpr int IMULH_R = 11;         //1+2+1           0+(p1,p5)+0             3               3+3+3
 | ||||
| 		constexpr int ISMULH_R = 12;        //1+2+1           0+(p1,p5)+0             3               3+3+3
 | ||||
| 		constexpr int IMUL_RCP = 13;        //1+1             p015+p1                 4              10+4
 | ||||
| 
 | ||||
| 		constexpr int COUNT = 14; | ||||
| 		constexpr int INVALID = -1; | ||||
| 	} | ||||
| 
 | ||||
| 	class Blake2Generator { | ||||
| 	public: | ||||
| 		Blake2Generator(const void* seed, int nonce); | ||||
| 		uint8_t getByte(); | ||||
| 		uint32_t getInt32(); | ||||
| 	private: | ||||
| 		uint8_t data[64]; | ||||
| 		size_t dataIndex; | ||||
| 
 | ||||
| 		void checkData(const size_t); | ||||
| 	}; | ||||
| 
 | ||||
| 	double generateSuperscalar(LightProgram& prog, Blake2Generator& gen); | ||||
| } | ||||
|  | @ -53,12 +53,14 @@ namespace RandomX { | |||
| 		Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; | ||||
| 	}; | ||||
| 
 | ||||
| 	class LightProgram { | ||||
| 	static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program"); | ||||
| 
 | ||||
| 	class SuperscalarProgram { | ||||
| 	public: | ||||
| 		Instruction& operator()(int pc) { | ||||
| 			return programBuffer[pc]; | ||||
| 		} | ||||
| 		friend std::ostream& operator<<(std::ostream& os, const LightProgram& p) { | ||||
| 		friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) { | ||||
| 			p.print(os); | ||||
| 			return os; | ||||
| 		} | ||||
|  | @ -74,6 +76,15 @@ namespace RandomX { | |||
| 		void setAddressRegister(uint32_t val) { | ||||
| 			addrReg = val; | ||||
| 		} | ||||
| 		double ipc; | ||||
| 		int codeSize; | ||||
| 		int macroOps; | ||||
| 		int decodeCycles; | ||||
| 		int cpuLatency; | ||||
| 		int asicLatency; | ||||
| 		int mulCount; | ||||
| 		int cpuLatencies[8]; | ||||
| 		int asicLatencies[8]; | ||||
| 	private: | ||||
| 		void print(std::ostream& os) const { | ||||
| 			for (unsigned i = 0; i < size; ++i) { | ||||
|  | @ -85,6 +96,4 @@ namespace RandomX { | |||
| 		uint32_t size; | ||||
| 		int addrReg; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program"); | ||||
| } | ||||
|  |  | |||
|  | @ -28,7 +28,7 @@ namespace RandomX { | |||
| 	public: | ||||
| 		VirtualMachine(); | ||||
| 		virtual ~VirtualMachine() {} | ||||
| 		virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; | ||||
| 		virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; | ||||
| 		void setScratchpad(void* ptr) { | ||||
| 			scratchpad = (uint8_t*)ptr; | ||||
| 		} | ||||
|  |  | |||
|  | @ -36,7 +36,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "dataset.hpp" | ||||
| #include "Cache.hpp" | ||||
| #include "hashAes1Rx4.hpp" | ||||
| #include "LightProgramGenerator.hpp" | ||||
| #include "superscalarGenerator.hpp" | ||||
| #include "JitCompilerX86.hpp" | ||||
| 
 | ||||
| const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; | ||||
|  | @ -226,13 +226,13 @@ int main(int argc, char** argv) { | |||
| 	readOption("--legacy", argc, argv, legacy); | ||||
| 
 | ||||
| 	if (genSuperscalar) { | ||||
| 		RandomX::LightProgram p; | ||||
| 		RandomX::SuperscalarProgram p; | ||||
| 		RandomX::Blake2Generator gen(seed, programCount); | ||||
| 		RandomX::generateSuperscalar(p, gen); | ||||
| 		RandomX::AssemblyGeneratorX86 asmX86; | ||||
| 		asmX86.generateAsm(p); | ||||
| 		//std::ofstream file("lightProg2.asm");
 | ||||
| 		//asmX86.printCode(std::cout);
 | ||||
| 		asmX86.printCode(std::cout); | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
|  | @ -268,7 +268,7 @@ int main(int argc, char** argv) { | |||
| 	const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize; | ||||
| 	const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch); | ||||
| 	dataset.cache.size = cacheSize; | ||||
| 	RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES]; | ||||
| 	RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; | ||||
| 
 | ||||
| 	std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl; | ||||
| 
 | ||||
|  |  | |||
|  | @ -18,7 +18,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| */ | ||||
| 
 | ||||
| #include <stddef.h> | ||||
| #include "blake2/blake2.h" | ||||
| #include "configuration.h" | ||||
| #include "Program.hpp" | ||||
| #include "blake2/endian.h" | ||||
|  | @ -27,7 +26,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include <algorithm> | ||||
| #include <stdexcept> | ||||
| #include <iomanip> | ||||
| #include "LightProgramGenerator.hpp" | ||||
| #include "superscalarGenerator.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
|  | @ -35,6 +34,7 @@ namespace RandomX { | |||
| 		return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; | ||||
| 	} | ||||
| 
 | ||||
| 	//uOPs (micro-ops) are represented only by the execution port they can go to
 | ||||
| 	namespace ExecutionPort { | ||||
| 		using type = int; | ||||
| 		constexpr type Null = 0; | ||||
|  | @ -46,40 +46,9 @@ namespace RandomX { | |||
| 		constexpr type P015 = P0 | P1 | P5; | ||||
| 	} | ||||
| 
 | ||||
| 	Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) { | ||||
| 		memset(data, 0, sizeof(data)); | ||||
| 		memcpy(data, seed, SeedSize); | ||||
| 		store32(&data[60], nonce); | ||||
| 	} | ||||
| 
 | ||||
| 	uint8_t Blake2Generator::getByte() { | ||||
| 		checkData(1); | ||||
| 		return data[dataIndex++]; | ||||
| 	} | ||||
| 
 | ||||
| 	uint32_t Blake2Generator::getInt32() { | ||||
| 		checkData(4); | ||||
| 		auto ret = load32(&data[dataIndex]); | ||||
| 		dataIndex += 4; | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	void Blake2Generator::checkData(const size_t bytesNeeded) { | ||||
| 		if (dataIndex + bytesNeeded > sizeof(data))	{ | ||||
| 			blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); | ||||
| 			dataIndex = 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	class RegisterInfo { | ||||
| 	public: | ||||
| 		RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {} | ||||
| 		int latency; | ||||
| 		int lastOpGroup; | ||||
| 		int lastOpPar; | ||||
| 		int value; | ||||
| 	}; | ||||
| 
 | ||||
| 	//Macro-operation as output of the x86 decoder
 | ||||
| 	//Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op
 | ||||
| 	//Macro-op can consist of 1 or 2 uOPs.
 | ||||
| 	class MacroOp { | ||||
| 	public: | ||||
| 		MacroOp(const char* name, int size) | ||||
|  | @ -137,10 +106,7 @@ namespace RandomX { | |||
| 		int latency_; | ||||
| 		ExecutionPort::type uop1_; | ||||
| 		ExecutionPort::type uop2_; | ||||
| 		int cycle_; | ||||
| 		bool dependent_ = false; | ||||
| 		MacroOp* depDst_ = nullptr; | ||||
| 		MacroOp* depSrc_ = nullptr; | ||||
| 	}; | ||||
| 
 | ||||
| 	//Size: 3 bytes
 | ||||
|  | @ -174,7 +140,7 @@ namespace RandomX { | |||
| 	const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr }; | ||||
| 	const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) }; | ||||
| 
 | ||||
| 	class LightInstructionInfo { | ||||
| 	class SuperscalarInstructionInfo { | ||||
| 	public: | ||||
| 		const char* getName() const { | ||||
| 			return name_; | ||||
|  | @ -203,21 +169,21 @@ namespace RandomX { | |||
| 		int getSrcOp() const { | ||||
| 			return srcOp_; | ||||
| 		} | ||||
| 		static const LightInstructionInfo ISUB_R; | ||||
| 		static const LightInstructionInfo IXOR_R; | ||||
| 		static const LightInstructionInfo IADD_RS; | ||||
| 		static const LightInstructionInfo IMUL_R; | ||||
| 		static const LightInstructionInfo IROR_C; | ||||
| 		static const LightInstructionInfo IADD_C7; | ||||
| 		static const LightInstructionInfo IXOR_C7; | ||||
| 		static const LightInstructionInfo IADD_C8; | ||||
| 		static const LightInstructionInfo IXOR_C8; | ||||
| 		static const LightInstructionInfo IADD_C9; | ||||
| 		static const LightInstructionInfo IXOR_C9; | ||||
| 		static const LightInstructionInfo IMULH_R; | ||||
| 		static const LightInstructionInfo ISMULH_R; | ||||
| 		static const LightInstructionInfo IMUL_RCP; | ||||
| 		static const LightInstructionInfo NOP; | ||||
| 		static const SuperscalarInstructionInfo ISUB_R; | ||||
| 		static const SuperscalarInstructionInfo IXOR_R; | ||||
| 		static const SuperscalarInstructionInfo IADD_RS; | ||||
| 		static const SuperscalarInstructionInfo IMUL_R; | ||||
| 		static const SuperscalarInstructionInfo IROR_C; | ||||
| 		static const SuperscalarInstructionInfo IADD_C7; | ||||
| 		static const SuperscalarInstructionInfo IXOR_C7; | ||||
| 		static const SuperscalarInstructionInfo IADD_C8; | ||||
| 		static const SuperscalarInstructionInfo IXOR_C8; | ||||
| 		static const SuperscalarInstructionInfo IADD_C9; | ||||
| 		static const SuperscalarInstructionInfo IXOR_C9; | ||||
| 		static const SuperscalarInstructionInfo IMULH_R; | ||||
| 		static const SuperscalarInstructionInfo ISMULH_R; | ||||
| 		static const SuperscalarInstructionInfo IMUL_RCP; | ||||
| 		static const SuperscalarInstructionInfo NOP; | ||||
| 	private: | ||||
| 		const char* name_; | ||||
| 		int type_; | ||||
|  | @ -227,14 +193,14 @@ namespace RandomX { | |||
| 		int dstOp_ = 0; | ||||
| 		int srcOp_; | ||||
| 
 | ||||
| 		LightInstructionInfo(const char* name) | ||||
| 		SuperscalarInstructionInfo(const char* name) | ||||
| 			: name_(name), type_(-1), latency_(0) {} | ||||
| 		LightInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp) | ||||
| 		SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp) | ||||
| 			: name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) { | ||||
| 			ops_.push_back(MacroOp(op)); | ||||
| 		} | ||||
| 		template <size_t N> | ||||
| 		LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) | ||||
| 		SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) | ||||
| 			: name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) { | ||||
| 			for (unsigned i = 0; i < N; ++i) { | ||||
| 				ops_.push_back(MacroOp(arr[i])); | ||||
|  | @ -244,24 +210,34 @@ namespace RandomX { | |||
| 		} | ||||
| 	}; | ||||
| 
 | ||||
| 	const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1); | ||||
| 
 | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); | ||||
| 
 | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); | ||||
| 	const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); | ||||
| 	 | ||||
| 	const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP"); | ||||
| 	const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP"); | ||||
| 
 | ||||
| 	//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
 | ||||
| 	//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
 | ||||
| 	//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
 | ||||
| 	const int buffer0[] = { 4, 8, 4 }; | ||||
| 	const int buffer1[] = { 7, 3, 3, 3 }; | ||||
| 	const int buffer2[] = { 3, 7, 3, 3 }; | ||||
| 	const int buffer3[] = { 4, 9, 3 }; | ||||
| 	const int buffer4[] = { 4, 4, 4, 4 }; | ||||
| 	const int buffer5[] = { 3, 3, 10 }; | ||||
| 
 | ||||
| 	class DecoderBuffer { | ||||
| 	public: | ||||
|  | @ -318,16 +294,6 @@ namespace RandomX { | |||
| 		} | ||||
| 	}; | ||||
| 
 | ||||
| 	//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
 | ||||
| 	//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
 | ||||
| 	//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
 | ||||
| 	const int buffer0[] = { 4, 8, 4 }; | ||||
| 	const int buffer1[] = { 7, 3, 3, 3 }; | ||||
| 	const int buffer2[] = { 3, 7, 3, 3 }; | ||||
| 	const int buffer3[] = { 4, 9, 3 }; | ||||
| 	const int buffer4[] = { 4, 4, 4, 4 }; | ||||
| 	const int buffer5[] = { 3, 3, 10 }; | ||||
| 
 | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1); | ||||
| 	const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2); | ||||
|  | @ -344,13 +310,13 @@ namespace RandomX { | |||
| 
 | ||||
| 	const DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); | ||||
| 
 | ||||
| 	const LightInstructionInfo* slot_3[]  = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R }; | ||||
| 	const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R }; | ||||
| 	const LightInstructionInfo* slot_4[]  = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS }; | ||||
| 	const LightInstructionInfo* slot_7[]  = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 }; | ||||
| 	const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 }; | ||||
| 	const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 }; | ||||
| 	const LightInstructionInfo* slot_10   = &LightInstructionInfo::IMUL_RCP; | ||||
| 	const SuperscalarInstructionInfo* slot_3[]  = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R }; | ||||
| 	const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R }; | ||||
| 	const SuperscalarInstructionInfo* slot_4[]  = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS }; | ||||
| 	const SuperscalarInstructionInfo* slot_7[]  = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 }; | ||||
| 	const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 }; | ||||
| 	const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 }; | ||||
| 	const SuperscalarInstructionInfo* slot_10   = &SuperscalarInstructionInfo::IMUL_RCP; | ||||
| 
 | ||||
| 	static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) { | ||||
| 		int index; | ||||
|  | @ -367,9 +333,19 @@ namespace RandomX { | |||
| 		return true; | ||||
| 	} | ||||
| 
 | ||||
| 	class LightInstruction { | ||||
| 	class RegisterInfo { | ||||
| 	public: | ||||
| 		void toInstr(Instruction& instr) { | ||||
| 		RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {} | ||||
| 		int latency; | ||||
| 		int lastOpGroup; | ||||
| 		int lastOpPar; | ||||
| 		int value; | ||||
| 	}; | ||||
| 
 | ||||
| 	//"SuperscalarInstruction" consists of one or more macro-ops
 | ||||
| 	class SuperscalarInstruction { | ||||
| 	public: | ||||
| 		void toInstr(Instruction& instr) { //translate to a RandomX instruction format
 | ||||
| 			instr.opcode = getType(); | ||||
| 			instr.dst = dst_; | ||||
| 			instr.src = src_ >= 0 ? src_ : dst_; | ||||
|  | @ -392,7 +368,7 @@ namespace RandomX { | |||
| 			case 4: | ||||
| 				//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
 | ||||
| 				if (fetchType == 4 && !isLast) { | ||||
| 					create(&LightInstructionInfo::IMUL_R, gen); | ||||
| 					create(&SuperscalarInstructionInfo::IMUL_R, gen); | ||||
| 				} | ||||
| 				else { | ||||
| 					create(slot_4[gen.getByte() & 1], gen); | ||||
|  | @ -415,7 +391,7 @@ namespace RandomX { | |||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		void create(const LightInstructionInfo* info, Blake2Generator& gen) { | ||||
| 		void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) { | ||||
| 			info_ = info; | ||||
| 			reset(); | ||||
| 			switch (info->getType()) | ||||
|  | @ -445,7 +421,7 @@ namespace RandomX { | |||
| 				mod_ = 0; | ||||
| 				imm32_ = 0; | ||||
| 				opGroup_ = SuperscalarInstructionType::IMUL_R; | ||||
| 				opGroupPar_ = -1; | ||||
| 				groupParIsSource_ = true; | ||||
| 			} break; | ||||
| 
 | ||||
| 			case SuperscalarInstructionType::IROR_C: { | ||||
|  | @ -505,18 +481,22 @@ namespace RandomX { | |||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		bool selectDestination(int cycle, RegisterInfo (®isters)[8], Blake2Generator& gen) { | ||||
| 		bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (®isters)[8], Blake2Generator& gen) { | ||||
| 			/*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R)
 | ||||
| 				std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/ | ||||
| 			std::vector<int> availableRegisters; | ||||
| 			//Conditions for the destination register:
 | ||||
| 			// * value must be ready at the required cycle
 | ||||
| 			// * cannot be the same as the source register unless the instruction allows it
 | ||||
| 			//   - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
 | ||||
| 			// * register cannot be multiplied twice in a row unless allowChainedMul is true 
 | ||||
| 			//   - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
 | ||||
| 			//   - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
 | ||||
| 			// * either the last instruction applied to the register or its source must be different than this instruction
 | ||||
| 			//   - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
 | ||||
| 			//   - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication
 | ||||
| 			// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
 | ||||
| 			for (unsigned i = 0; i < 8; ++i) { | ||||
| 				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) | ||||
| 				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) | ||||
| 					availableRegisters.push_back(i); | ||||
| 			} | ||||
| 			return selectRegister(availableRegisters, gen, dst_); | ||||
|  | @ -560,14 +540,14 @@ namespace RandomX { | |||
| 			return opGroupPar_; | ||||
| 		} | ||||
| 
 | ||||
| 		const LightInstructionInfo& getInfo() const { | ||||
| 		const SuperscalarInstructionInfo& getInfo() const { | ||||
| 			return *info_; | ||||
| 		} | ||||
| 
 | ||||
| 		static const LightInstruction Null; | ||||
| 		static const SuperscalarInstruction Null; | ||||
| 
 | ||||
| 	private: | ||||
| 		const LightInstructionInfo* info_; | ||||
| 		const SuperscalarInstructionInfo* info_; | ||||
| 		int src_ = -1; | ||||
| 		int dst_ = -1; | ||||
| 		int mod_; | ||||
|  | @ -582,15 +562,16 @@ namespace RandomX { | |||
| 			canReuse_ = groupParIsSource_ = false; | ||||
| 		} | ||||
| 
 | ||||
| 		LightInstruction(const LightInstructionInfo* info) : info_(info) { | ||||
| 		SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) { | ||||
| 		} | ||||
| 	}; | ||||
| 
 | ||||
| 	const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP); | ||||
| 	const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP); | ||||
| 
 | ||||
| 	constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 3; | ||||
| 	constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4; | ||||
| 	constexpr int LOOK_FORWARD_CYCLES = 4; | ||||
| 	constexpr int MAX_THROWAWAY_COUNT = 256; | ||||
| 
 | ||||
| #ifndef _DEBUG | ||||
| 	constexpr bool TRACE = false; | ||||
| 	constexpr bool INFO = false; | ||||
|  | @ -602,7 +583,7 @@ namespace RandomX { | |||
| 	template<bool commit> | ||||
| 	static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) { | ||||
| 		//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
 | ||||
| 		//P1 (multiplication port) by instructions that can go to any port.
 | ||||
| 		//port P1 (multiplication) by instructions that can go to any port.
 | ||||
| 		for (; cycle < CYCLE_MAP_SIZE; ++cycle) { | ||||
| 			if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) { | ||||
| 				if (commit) { | ||||
|  | @ -666,14 +647,14 @@ namespace RandomX { | |||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	double generateSuperscalar(LightProgram& prog, Blake2Generator& gen) { | ||||
| 	void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) { | ||||
| 
 | ||||
| 		ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3]; | ||||
| 		memset(portBusy, 0, sizeof(portBusy)); | ||||
| 		RegisterInfo registers[8]; | ||||
| 
 | ||||
| 		const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default; | ||||
| 		LightInstruction currentInstruction = LightInstruction::Null; | ||||
| 		SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null; | ||||
| 		int macroOpIndex = 0; | ||||
| 		int codeSize = 0; | ||||
| 		int macroOpCount = 0; | ||||
|  | @ -719,7 +700,9 @@ namespace RandomX { | |||
| 				int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle); | ||||
| 				if (scheduleCycle < 0) { | ||||
| 					/*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; | ||||
| 					return 0; | ||||
| 					//__debugbreak();
 | ||||
| 					portsSaturated = true; | ||||
| 					break; | ||||
| 				} | ||||
| 
 | ||||
| 				//find a source register (if applicable) that will be ready when this instruction executes
 | ||||
|  | @ -737,20 +720,20 @@ namespace RandomX { | |||
| 							throwAwayCount++; | ||||
| 							macroOpIndex = currentInstruction.getInfo().getSize(); | ||||
| 							if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 							//cycle = topCycle;
 | ||||
| 							continue; | ||||
| 						} | ||||
| 						//abort this decode buffer
 | ||||
| 						/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available" << std::endl; | ||||
| 						currentInstruction = LightInstruction::Null; | ||||
| 						/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 						currentInstruction = SuperscalarInstruction::Null; | ||||
| 						break; | ||||
| 					} | ||||
| 					if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; | ||||
| 				} | ||||
| 				throwAwayCount = 0; | ||||
| 				//find a destination register that will be ready when this instruction executes
 | ||||
| 				if (macroOpIndex == currentInstruction.getInfo().getDstOp()) { | ||||
| 					int forward; | ||||
| 					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, registers, gen); ++forward) { | ||||
| 					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) { | ||||
| 						if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl; | ||||
| 						++scheduleCycle; | ||||
| 						++cycle; | ||||
|  | @ -760,16 +743,18 @@ namespace RandomX { | |||
| 							throwAwayCount++; | ||||
| 							macroOpIndex = currentInstruction.getInfo().getSize(); | ||||
| 							if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 							//cycle = topCycle;
 | ||||
| 							continue; | ||||
| 						} | ||||
| 						//abort this decode buffer
 | ||||
| 						/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; | ||||
| 						currentInstruction = LightInstruction::Null; | ||||
| 						currentInstruction = SuperscalarInstruction::Null; | ||||
| 						break; | ||||
| 					} | ||||
| 					if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; | ||||
| 				} | ||||
| 				throwAwayCount = 0; | ||||
| 
 | ||||
| 				//recalculate when the instruction can be scheduled for execution based on operand availability
 | ||||
| 				scheduleCycle = scheduleMop<true>(mop, portBusy, scheduleCycle, scheduleCycle); | ||||
| 
 | ||||
|  | @ -809,67 +794,53 @@ namespace RandomX { | |||
| 			++cycle; | ||||
| 		} | ||||
| 
 | ||||
| 		if(INFO) std::cout << "; ALU port utilization:" << std::endl; | ||||
| 		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; | ||||
| 
 | ||||
| 		int portCycles = 0; | ||||
| 		for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { | ||||
| 			//std::cout << "; " << std::setw(3) << i << " ";
 | ||||
| 			for (int j = 0; j < 3; ++j) { | ||||
| 				//std::cout << (portBusy[i][j] ? '*' : '_');
 | ||||
| 				portCycles += !!portBusy[i][j]; | ||||
| 			} | ||||
| 			//std::cout << std::endl;
 | ||||
| 		} | ||||
| 
 | ||||
| 		double ipc = (macroOpCount / (double)retireCycle); | ||||
| 
 | ||||
| 		if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl; | ||||
| 		if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; | ||||
| 		if (INFO) std::cout << "; fetch cycles: " << decodeCycle << std::endl; | ||||
| 		if (INFO) std::cout << "; RandomX instructions: " << programSize << std::endl; | ||||
| 		if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl; | ||||
| 		if (INFO) std::cout << "; IPC = " << ipc << std::endl; | ||||
| 		if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl; | ||||
| 		if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl; | ||||
| 
 | ||||
| 		int asicLatency[8]; | ||||
| 		memset(asicLatency, 0, sizeof(asicLatency)); | ||||
| 		memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies)); | ||||
| 
 | ||||
| 		//Calculate ASIC latency:
 | ||||
| 		//Assumes 1 cycle latency for all operations and unlimited parallelization.
 | ||||
| 		for (int i = 0; i < programSize; ++i) { | ||||
| 			Instruction& instr = prog(i); | ||||
| 			int latDst = asicLatency[instr.dst] + 1; | ||||
| 			int latSrc = instr.dst != instr.src ? asicLatency[instr.src] + 1 : 0; | ||||
| 			asicLatency[instr.dst] = std::max(latDst, latSrc); | ||||
| 			int latDst = prog.asicLatencies[instr.dst] + 1; | ||||
| 			int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0; | ||||
| 			prog.asicLatencies[instr.dst] = std::max(latDst, latSrc); | ||||
| 		} | ||||
| 
 | ||||
| 		//address register is the register with the highest ASIC latency
 | ||||
| 		int asicLatencyMax = 0; | ||||
| 		int addressReg = 0; | ||||
| 		for (int i = 0; i < 8; ++i) { | ||||
| 			if (asicLatency[i] > asicLatencyMax) { | ||||
| 				asicLatencyMax = asicLatency[i]; | ||||
| 			if (prog.asicLatencies[i] > asicLatencyMax) { | ||||
| 				asicLatencyMax = prog.asicLatencies[i]; | ||||
| 				addressReg = i; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		if (INFO) std::cout << "; ASIC latency: " << asicLatencyMax << std::endl; | ||||
| 
 | ||||
| 		if (INFO) { | ||||
| 			std::cout << "; ASIC latency:" << std::endl; | ||||
| 			for (int i = 0; i < 8; ++i) { | ||||
| 				std::cout << ";  r" << i << " = " << asicLatency[i] << std::endl; | ||||
| 			} | ||||
| 			if (INFO) std::cout << "; CPU latency:" << std::endl; | ||||
| 			for (int i = 0; i < 8; ++i) { | ||||
| 				std::cout << ";  r" << i << " = " << registers[i].latency << std::endl; | ||||
| 			} | ||||
| 			prog.cpuLatencies[i] = registers[i].latency; | ||||
| 		} | ||||
| 
 | ||||
| 		prog.setSize(programSize); | ||||
| 		prog.setAddressRegister(addressReg); | ||||
| 		return ipc; | ||||
| 
 | ||||
| 		prog.cpuLatency = retireCycle; | ||||
| 		prog.asicLatency = asicLatencyMax; | ||||
| 		prog.codeSize = codeSize; | ||||
| 		prog.macroOps = macroOpCount; | ||||
| 		prog.decodeCycles = decodeCycle; | ||||
| 		prog.ipc = ipc; | ||||
| 		prog.mulCount = mulCount; | ||||
| 		 | ||||
| 
 | ||||
| 		/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
 | ||||
| 		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; | ||||
| 
 | ||||
| 		int portCycles = 0; | ||||
| 		for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { | ||||
| 			std::cout << "; " << std::setw(3) << i << " "; | ||||
| 			for (int j = 0; j < 3; ++j) { | ||||
| 				std::cout << (portBusy[i][j] ? '*' : '_'); | ||||
| 				portCycles += !!portBusy[i][j]; | ||||
| 			} | ||||
| 			std::cout << std::endl; | ||||
| 		}*/ | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										47
									
								
								src/superscalarGenerator.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								src/superscalarGenerator.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,47 @@ | |||
| /*
 | ||||
| Copyright (c) 2019 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| #include "Program.hpp" | ||||
| #include "Blake2Generator.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 	                                              //                  Intel Ivy Bridge reference
 | ||||
| 	namespace SuperscalarInstructionType {        //uOPs (decode)   execution ports         latency       code size
 | ||||
| 		constexpr int ISUB_R = 0;                 //1               p015                    1               3 (sub)
 | ||||
| 		constexpr int IXOR_R = 1;                 //1               p015                    1               3 (xor)
 | ||||
| 		constexpr int IADD_RS = 2;                //1               p01                     1               4 (lea)
 | ||||
| 		constexpr int IMUL_R = 3;                 //1               p1                      3               4 (imul)
 | ||||
| 		constexpr int IROR_C = 4;                 //1               p05                     1               4 (ror)
 | ||||
| 		constexpr int IADD_C7 = 5;                //1               p015                    1               7 (add)
 | ||||
| 		constexpr int IXOR_C7 = 6;                //1               p015                    1               7 (xor)
 | ||||
| 		constexpr int IADD_C8 = 7;                //1+0             p015                    1               7+1 (add+nop)
 | ||||
| 		constexpr int IXOR_C8 = 8;                //1+0             p015                    1               7+1 (xor+nop)
 | ||||
| 		constexpr int IADD_C9 = 9;                //1+0             p015                    1               7+2 (add+nop)
 | ||||
| 		constexpr int IXOR_C9 = 10;               //1+0             p015                    1               7+2 (xor+nop)
 | ||||
| 		constexpr int IMULH_R = 11;               //1+2+1           0+(p1,p5)+0             3               3+3+3 (mov+mul+mov)
 | ||||
| 		constexpr int ISMULH_R = 12;              //1+2+1           0+(p1,p5)+0             3               3+3+3 (mov+imul+mov)
 | ||||
| 		constexpr int IMUL_RCP = 13;              //1+1             p015+p1                 4              10+4   (mov+imul)
 | ||||
| 
 | ||||
| 		constexpr int COUNT = 14; | ||||
| 		constexpr int INVALID = -1; | ||||
| 	} | ||||
| 
 | ||||
| 	void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); | ||||
| } | ||||
|  | @ -20,9 +20,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include <iostream> | ||||
| #include <cstdint> | ||||
| #include <vector> | ||||
| #include "../LightProgramGenerator.hpp" | ||||
| #include "../superscalarGenerator.hpp" | ||||
| #include "../InterpretedVirtualMachine.hpp" | ||||
| #include "../intrinPortable.h" | ||||
| #include "../Blake2Generator.hpp" | ||||
| 
 | ||||
| const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; | ||||
| 
 | ||||
|  | @ -45,9 +46,9 @@ int main() { | |||
| 			uint64_t rb[8]; | ||||
| 			memcpy(rb, ra, sizeof rb); | ||||
| 			rb[0] ^= (1ULL << bit); | ||||
| 			RandomX::LightProgram p; | ||||
| 			RandomX::SuperscalarProgram p; | ||||
| 			RandomX::Blake2Generator gen(seed, i); | ||||
| 			RandomX::generateLightProg2(p, gen); | ||||
| 			RandomX::generateSuperscalar(p, gen); | ||||
| 			RandomX::InterpretedVirtualMachine<false>::executeSuperscalar(ra, p, dummy); | ||||
| 			RandomX::InterpretedVirtualMachine<false>::executeSuperscalar(rb, p, dummy); | ||||
| 			uint64_t diff = 0; | ||||
|  |  | |||
|  | @ -21,7 +21,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include <cstdint> | ||||
| #include <vector> | ||||
| #include <unordered_set> | ||||
| #include "../LightProgramGenerator.hpp" | ||||
| #include "../superscalarGenerator.hpp" | ||||
| #include "../InterpretedVirtualMachine.hpp" | ||||
| #include "../intrinPortable.h" | ||||
| #include "../configuration.h" | ||||
|  |  | |||
|  | @ -127,6 +127,7 @@ | |||
|     <ClCompile Include="..\src\argon2_core.c" /> | ||||
|     <ClCompile Include="..\src\argon2_ref.c" /> | ||||
|     <ClCompile Include="..\src\AssemblyGeneratorX86.cpp" /> | ||||
|     <ClCompile Include="..\src\Blake2Generator.cpp" /> | ||||
|     <ClCompile Include="..\src\blake2\blake2b.c" /> | ||||
|     <ClCompile Include="..\src\Cache.cpp" /> | ||||
|     <ClCompile Include="..\src\CompiledLightVirtualMachine.cpp" /> | ||||
|  | @ -137,8 +138,7 @@ | |||
|     <ClCompile Include="..\src\instructionsPortable.cpp" /> | ||||
|     <ClCompile Include="..\src\InterpretedVirtualMachine.cpp" /> | ||||
|     <ClCompile Include="..\src\JitCompilerX86.cpp" /> | ||||
|     <ClCompile Include="..\src\LightClientAsyncWorker.cpp" /> | ||||
|     <ClCompile Include="..\src\LightProgramGenerator.cpp" /> | ||||
|     <ClCompile Include="..\src\superscalarGenerator.cpp" /> | ||||
|     <ClCompile Include="..\src\main.cpp" /> | ||||
|     <ClCompile Include="..\src\reciprocal.c" /> | ||||
|     <ClCompile Include="..\src\softAes.cpp" /> | ||||
|  | @ -153,6 +153,7 @@ | |||
|     <ClInclude Include="..\src\argon2.h" /> | ||||
|     <ClInclude Include="..\src\argon2_core.h" /> | ||||
|     <ClInclude Include="..\src\AssemblyGeneratorX86.hpp" /> | ||||
|     <ClInclude Include="..\src\Blake2Generator.hpp" /> | ||||
|     <ClInclude Include="..\src\Cache.hpp" /> | ||||
|     <ClInclude Include="..\src\catch.hpp" /> | ||||
|     <ClInclude Include="..\src\common.hpp" /> | ||||
|  | @ -167,8 +168,7 @@ | |||
|     <ClInclude Include="..\src\intrinPortable.h" /> | ||||
|     <ClInclude Include="..\src\JitCompilerX86-static.hpp" /> | ||||
|     <ClInclude Include="..\src\JitCompilerX86.hpp" /> | ||||
|     <ClInclude Include="..\src\LightClientAsyncWorker.hpp" /> | ||||
|     <ClInclude Include="..\src\LightProgramGenerator.hpp" /> | ||||
|     <ClInclude Include="..\src\superscalarGenerator.hpp" /> | ||||
|     <ClInclude Include="..\src\Program.hpp" /> | ||||
|     <ClInclude Include="..\src\reciprocal.h" /> | ||||
|     <ClInclude Include="..\src\softAes.h" /> | ||||
|  |  | |||
|  | @ -54,12 +54,6 @@ | |||
|     <ClCompile Include="..\src\JitCompilerX86.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\LightClientAsyncWorker.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\LightProgramGenerator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\main.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|  | @ -75,6 +69,12 @@ | |||
|     <ClCompile Include="..\src\blake2\blake2b.c"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\Blake2Generator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\superscalarGenerator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <MASM Include="..\src\JitCompilerX86-static.asm"> | ||||
|  | @ -136,12 +136,6 @@ | |||
|     <ClInclude Include="..\src\JitCompilerX86-static.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|     <ClInclude Include="..\src\LightClientAsyncWorker.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|     <ClInclude Include="..\src\LightProgramGenerator.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|     <ClInclude Include="..\src\Program.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|  | @ -166,5 +160,11 @@ | |||
|     <ClInclude Include="..\src\virtualMemory.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|     <ClInclude Include="..\src\Blake2Generator.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|     <ClInclude Include="..\src\superscalarGenerator.hpp"> | ||||
|       <Filter>Header Files</Filter> | ||||
|     </ClInclude> | ||||
|   </ItemGroup> | ||||
| </Project> | ||||
|  | @ -118,6 +118,7 @@ | |||
|   <ItemGroup> | ||||
|     <ClCompile Include="..\src\argon2_core.c" /> | ||||
|     <ClCompile Include="..\src\argon2_ref.c" /> | ||||
|     <ClCompile Include="..\src\Blake2Generator.cpp" /> | ||||
|     <ClCompile Include="..\src\blake2\blake2b.c" /> | ||||
|     <ClCompile Include="..\src\Cache.cpp" /> | ||||
|     <ClCompile Include="..\src\dataset.cpp" /> | ||||
|  | @ -125,9 +126,9 @@ | |||
|     <ClCompile Include="..\src\Instruction.cpp" /> | ||||
|     <ClCompile Include="..\src\instructionsPortable.cpp" /> | ||||
|     <ClCompile Include="..\src\InterpretedVirtualMachine.cpp" /> | ||||
|     <ClCompile Include="..\src\LightProgramGenerator.cpp" /> | ||||
|     <ClCompile Include="..\src\reciprocal.c" /> | ||||
|     <ClCompile Include="..\src\softAes.cpp" /> | ||||
|     <ClCompile Include="..\src\superscalarGenerator.cpp" /> | ||||
|     <ClCompile Include="..\src\tests\superscalar-avalanche.cpp" /> | ||||
|     <ClCompile Include="..\src\VirtualMachine.cpp" /> | ||||
|     <ClCompile Include="..\src\virtualMemory.cpp" /> | ||||
|  |  | |||
|  | @ -45,9 +45,6 @@ | |||
|     <ClCompile Include="..\src\blake2\blake2b.c"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\LightProgramGenerator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\hashAes1Rx4.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|  | @ -60,6 +57,12 @@ | |||
|     <ClCompile Include="..\src\virtualMemory.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\superscalarGenerator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\Blake2Generator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <MASM Include="..\src\squareHash.asm"> | ||||
|  |  | |||
|  | @ -118,6 +118,7 @@ | |||
|   <ItemGroup> | ||||
|     <ClCompile Include="..\src\argon2_core.c" /> | ||||
|     <ClCompile Include="..\src\argon2_ref.c" /> | ||||
|     <ClCompile Include="..\src\Blake2Generator.cpp" /> | ||||
|     <ClCompile Include="..\src\blake2\blake2b.c" /> | ||||
|     <ClCompile Include="..\src\Cache.cpp" /> | ||||
|     <ClCompile Include="..\src\dataset.cpp" /> | ||||
|  | @ -125,9 +126,9 @@ | |||
|     <ClCompile Include="..\src\Instruction.cpp" /> | ||||
|     <ClCompile Include="..\src\instructionsPortable.cpp" /> | ||||
|     <ClCompile Include="..\src\InterpretedVirtualMachine.cpp" /> | ||||
|     <ClCompile Include="..\src\LightProgramGenerator.cpp" /> | ||||
|     <ClCompile Include="..\src\reciprocal.c" /> | ||||
|     <ClCompile Include="..\src\softAes.cpp" /> | ||||
|     <ClCompile Include="..\src\superscalarGenerator.cpp" /> | ||||
|     <ClCompile Include="..\src\tests\superscalar-init.cpp" /> | ||||
|     <ClCompile Include="..\src\VirtualMachine.cpp" /> | ||||
|     <ClCompile Include="..\src\virtualMemory.cpp" /> | ||||
|  |  | |||
|  | @ -42,9 +42,6 @@ | |||
|     <ClCompile Include="..\src\InterpretedVirtualMachine.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\LightProgramGenerator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\reciprocal.c"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|  | @ -60,6 +57,12 @@ | |||
|     <ClCompile Include="..\src\virtualMemory.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\superscalarGenerator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|     <ClCompile Include="..\src\Blake2Generator.cpp"> | ||||
|       <Filter>Source Files</Filter> | ||||
|     </ClCompile> | ||||
|   </ItemGroup> | ||||
|   <ItemGroup> | ||||
|     <MASM Include="..\src\squareHash.asm"> | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue