mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Support for multiple threads
This commit is contained in:
		
							parent
							
								
									cb12feaf91
								
							
						
					
					
						commit
						b9d2d853aa
					
				
					 13 changed files with 436 additions and 304 deletions
				
			
		
							
								
								
									
										10
									
								
								makefile
									
										
									
									
									
								
							
							
						
						
									
										10
									
								
								makefile
									
										
									
									
									
								
							|  | @ -9,10 +9,9 @@ endif | |||
| BINDIR=bin | ||||
| SRCDIR=src | ||||
| OBJDIR=obj | ||||
| LDFLAGS= | ||||
| LDFLAGS=-lpthread | ||||
| TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) | ||||
| ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o) | ||||
| SRC1=$(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | ||||
| ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o) | ||||
| 
 | ||||
| all: release test | ||||
| 
 | ||||
|  | @ -52,7 +51,7 @@ $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-imp | |||
| $(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR) | ||||
| $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@ | ||||
| 
 | ||||
| $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR) | ||||
|  | @ -72,6 +71,9 @@ $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp | |||
|    | ||||
| $(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@ | ||||
| 
 | ||||
| $(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@ | ||||
|  |  | |||
							
								
								
									
										147
									
								
								src/Cache.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								src/Cache.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,147 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include <cstring> | ||||
| #include "Cache.hpp" | ||||
| #include "softAes.h" | ||||
| #include "argon2.h" | ||||
| #include "Pcg32.hpp" | ||||
| #include "argon2_core.h" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value"); | ||||
| 
 | ||||
| 	// This will shift and xor tmp1 into itself as 4 32-bit vals such as
 | ||||
| 	// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
 | ||||
| 	static inline __m128i sl_xor(__m128i tmp1) { | ||||
| 		__m128i tmp4; | ||||
| 		tmp4 = _mm_slli_si128(tmp1, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		tmp4 = _mm_slli_si128(tmp4, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		tmp4 = _mm_slli_si128(tmp4, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		return tmp1; | ||||
| 	} | ||||
| 
 | ||||
| 	template<uint8_t rcon, bool soft> | ||||
| 	static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) { | ||||
| 		__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon); | ||||
| 		xout1 = _mm_shuffle_epi32(xout1, 0xFF); | ||||
| 		*xout0 = sl_xor(*xout0); | ||||
| 		*xout0 = _mm_xor_si128(*xout0, xout1); | ||||
| 		xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00); | ||||
| 		xout1 = _mm_shuffle_epi32(xout1, 0xAA); | ||||
| 		*xout2 = sl_xor(*xout2); | ||||
| 		*xout2 = _mm_xor_si128(*xout2, xout1); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	static inline void expandAesKeys(const __m128i* seed, __m128i* keys) { | ||||
| 		__m128i xout0, xout2; | ||||
| 		xout0 = _mm_load_si128(seed); | ||||
| 		xout2 = _mm_load_si128(seed + 1); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aesGenKeys<0x01, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aesGenKeys<0x02, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aesGenKeys<0x04, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aesGenKeys<0x08, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 	} | ||||
| 
 | ||||
| 	void Cache::argonFill(const void* seed, size_t seedSize) { | ||||
| 		uint32_t memory_blocks, segment_length; | ||||
| 		argon2_instance_t instance; | ||||
| 		argon2_context context; | ||||
| 
 | ||||
| 		context.out = nullptr; | ||||
| 		context.outlen = 0; | ||||
| 		context.pwd = CONST_CAST(uint8_t *)seed; | ||||
| 		context.pwdlen = (uint32_t)seedSize; | ||||
| 		context.salt = CONST_CAST(uint8_t *)ArgonSalt; | ||||
| 		context.saltlen = (uint32_t)ArgonSaltSize; | ||||
| 		context.secret = NULL; | ||||
| 		context.secretlen = 0; | ||||
| 		context.ad = NULL; | ||||
| 		context.adlen = 0; | ||||
| 		context.t_cost = ArgonIterations; | ||||
| 		context.m_cost = ArgonMemorySize; | ||||
| 		context.lanes = ArgonLanes; | ||||
| 		context.threads = 1; | ||||
| 		context.allocate_cbk = NULL; | ||||
| 		context.free_cbk = NULL; | ||||
| 		context.flags = ARGON2_DEFAULT_FLAGS; | ||||
| 		context.version = ARGON2_VERSION_NUMBER; | ||||
| 
 | ||||
| 		/* 2. Align memory size */ | ||||
| 		/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ | ||||
| 		memory_blocks = context.m_cost; | ||||
| 
 | ||||
| 		segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); | ||||
| 
 | ||||
| 		instance.version = context.version; | ||||
| 		instance.memory = NULL; | ||||
| 		instance.passes = context.t_cost; | ||||
| 		instance.memory_blocks = memory_blocks; | ||||
| 		instance.segment_length = segment_length; | ||||
| 		instance.lane_length = segment_length * ARGON2_SYNC_POINTS; | ||||
| 		instance.lanes = context.lanes; | ||||
| 		instance.threads = context.threads; | ||||
| 		instance.type = Argon2_d; | ||||
| 		instance.memory = (block*)memory; | ||||
| 
 | ||||
| 		if (instance.threads > instance.lanes) { | ||||
| 			instance.threads = instance.lanes; | ||||
| 		} | ||||
| 
 | ||||
| 		/* 3. Initialization: Hashing inputs, allocating memory, filling first
 | ||||
| 		 * blocks | ||||
| 		 */ | ||||
| 		argon_initialize(&instance, &context); | ||||
| 
 | ||||
| 		fill_memory_blocks(&instance); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void Cache::initialize(const void* seed, size_t seedSize) { | ||||
| 		//Argon2d memory fill
 | ||||
| 		argonFill(seed, seedSize); | ||||
| 
 | ||||
| 		//Circular shift of the cache buffer by 512 bytes
 | ||||
| 		//realized by copying the first 512 bytes to the back 
 | ||||
| 		//of the buffer and shifting the start by 512 bytes
 | ||||
| 		memcpy(memory + CacheSize, memory, CacheShift); | ||||
| 
 | ||||
| 		//AES keys
 | ||||
| 		expandAesKeys<softAes>((__m128i*)seed, keys.data()); | ||||
| 	} | ||||
| 
 | ||||
| 	template void Cache::initialize<true>(const void*, size_t); | ||||
| 
 | ||||
| 	template void Cache::initialize<false>(const void*, size_t); | ||||
| } | ||||
							
								
								
									
										57
									
								
								src/Cache.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								src/Cache.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,57 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <new> | ||||
| #include "common.hpp" | ||||
| #include "dataset.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	class Cache { | ||||
| 	public: | ||||
| 		void* operator new(size_t size) { | ||||
| 			void* ptr = _mm_malloc(size, sizeof(__m128i)); | ||||
| 			if (ptr == nullptr) | ||||
| 				throw std::bad_alloc(); | ||||
| 			return ptr; | ||||
| 		} | ||||
| 
 | ||||
| 		void operator delete(void* ptr) { | ||||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 
 | ||||
| 		template<bool softAes> | ||||
| 		void initialize(const void* seed, size_t seedSize); | ||||
| 
 | ||||
| 		const KeysContainer& getKeys() const { | ||||
| 			return keys; | ||||
| 		} | ||||
| 
 | ||||
| 		const uint8_t* getCache() { | ||||
| 			return memory + CacheShift; | ||||
| 		} | ||||
| 	private: | ||||
| 		alignas(16) KeysContainer keys; | ||||
| 		uint8_t memory[CacheSize + CacheShift]; | ||||
| 		void argonFill(const void* seed, size_t seedSize); | ||||
| 	}; | ||||
| } | ||||
|  | @ -31,11 +31,11 @@ namespace RandomX { | |||
| #endif | ||||
| 	} | ||||
| 
 | ||||
| 	void CompiledVirtualMachine::initializeDataset(const void* seed, bool lightClient) { | ||||
| 	void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) { | ||||
| 		if (lightClient) { | ||||
| 			throw std::runtime_error("Compiled VM does not support light-client mode"); | ||||
| 		} | ||||
| 		VirtualMachine::initializeDataset(seed, lightClient); | ||||
| 		VirtualMachine::setDataset(ds, lightClient); | ||||
| 	} | ||||
| 
 | ||||
| 	void CompiledVirtualMachine::initializeProgram(const void* seed) { | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ namespace RandomX { | |||
| 	class CompiledVirtualMachine : public VirtualMachine { | ||||
| 	public: | ||||
| 		CompiledVirtualMachine(bool softAes); | ||||
| 		void initializeDataset(const void* seed, bool light = false) override; | ||||
| 		void setDataset(dataset_t ds, bool light = false) override; | ||||
| 		void initializeProgram(const void* seed) override; | ||||
| 		virtual void execute() override; | ||||
| 		void* getProgram() { | ||||
|  |  | |||
|  | @ -20,58 +20,65 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "VirtualMachine.hpp" | ||||
| #include "common.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Cache.hpp" | ||||
| #include "t1ha/t1ha.h" | ||||
| #include "blake2/blake2.h" | ||||
| #include <cstring> | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 	VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) { | ||||
| 		mem.dataset = nullptr; | ||||
| 		mem.ds.dataset = nullptr; | ||||
| 	} | ||||
| 
 | ||||
| 	void VirtualMachine::initializeDataset(const void* seed, bool light) { | ||||
| 	VirtualMachine::~VirtualMachine() { | ||||
| 		if (lightClient) { | ||||
| 			_mm_free(mem.lcm->cache); | ||||
| 			_mm_free(mem.lcm->block); | ||||
| 			delete mem.ds.lightDataset->block; | ||||
| 			delete mem.ds.lightDataset; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void VirtualMachine::setDataset(dataset_t ds, bool light) { | ||||
| 		if (mem.ds.dataset != nullptr) { | ||||
| 			throw std::runtime_error("Dataset is already initialized"); | ||||
| 		} | ||||
| 		_mm_free(mem.dataset); | ||||
| 		lightClient = light; | ||||
| 		if (light) { | ||||
| 			auto lds = mem.ds.lightDataset = new LightClientDataset(); | ||||
| 			lds->cache = ds.cache; | ||||
| 			lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i)); | ||||
| 			lds->blockNumber = -1; | ||||
| 			if (lds->block == nullptr) { | ||||
| 				throw std::bad_alloc(); | ||||
| 			} | ||||
| 			if (softAes) { | ||||
| 				datasetInitLight<true>(seed, mem.lcm); | ||||
| 				readDataset = &datasetReadLight<true>; | ||||
| 			} | ||||
| 			else { | ||||
| 				datasetInitLight<false>(seed, mem.lcm); | ||||
| 				readDataset = &datasetReadLight<false>; | ||||
| 			} | ||||
| 		} | ||||
| 		else { | ||||
| 			mem.ds = ds; | ||||
| 			readDataset = &datasetRead; | ||||
| 			if (softAes) { | ||||
| 				datasetInit<true>(seed, mem.dataset); | ||||
| 			} | ||||
| 			else { | ||||
| 				datasetInit<false>(seed, mem.dataset); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void VirtualMachine::initializeScratchpad(uint32_t index) { | ||||
| 		if (lightClient) { | ||||
| 			auto cache = mem.ds.lightDataset->cache; | ||||
| 			if (softAes) { | ||||
| 				for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) { | ||||
| 					initBlock<true>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys); | ||||
| 					initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys()); | ||||
| 				} | ||||
| 			} | ||||
| 			else { | ||||
| 				for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) { | ||||
| 					initBlock<false>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys); | ||||
| 					initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys()); | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		else { | ||||
| 			memcpy(scratchpad, mem.dataset + ScratchpadSize * index, ScratchpadSize); | ||||
| 			memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -26,30 +26,12 @@ namespace RandomX { | |||
| 	class VirtualMachine { | ||||
| 	public: | ||||
| 		VirtualMachine(bool softAes); | ||||
| 		virtual ~VirtualMachine() {} | ||||
| 		virtual void initializeDataset(const void* seed, bool light = false); | ||||
| 		virtual ~VirtualMachine(); | ||||
| 		virtual void setDataset(dataset_t ds, bool light = false); | ||||
| 		void initializeScratchpad(uint32_t index); | ||||
| 		virtual void initializeProgram(const void* seed) = 0; | ||||
| 		virtual void execute() = 0; | ||||
| 		void getResult(void*); | ||||
| 		const RegisterFile& getRegisterFile() const { | ||||
| 			return reg; | ||||
| 		} | ||||
| 		const convertible_t* getScratchpad() const { | ||||
| 			return scratchpad; | ||||
| 		} | ||||
| 		const void* getCache() { | ||||
| 			if (lightClient) { | ||||
| 				return mem.lcm->cache; | ||||
| 			} | ||||
| 			return nullptr; | ||||
| 		} | ||||
| 		const __m128i* getKeys() { | ||||
| 			if (lightClient) { | ||||
| 				return mem.lcm->keys; | ||||
| 			} | ||||
| 			return nullptr; | ||||
| 		} | ||||
| 	protected: | ||||
| 		bool softAes, lightClient; | ||||
| 		RegisterFile reg; | ||||
|  |  | |||
|  | @ -473,7 +473,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) | |||
| 	blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); | ||||
| } | ||||
| 
 | ||||
| int initialize(argon2_instance_t *instance, argon2_context *context) { | ||||
| int argon_initialize(argon2_instance_t *instance, argon2_context *context) { | ||||
| 	uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; | ||||
| 	int result = ARGON2_OK; | ||||
| 
 | ||||
|  |  | |||
|  | @ -204,7 +204,7 @@ void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); | |||
|  * @return Zero if successful, -1 if memory failed to allocate. @context->state | ||||
|  * will be modified if successful. | ||||
|  */ | ||||
| int initialize(argon2_instance_t *instance, argon2_context *context); | ||||
| int argon_initialize(argon2_instance_t *instance, argon2_context *context); | ||||
| 
 | ||||
| /*
 | ||||
|  * XORing the last block of each lane, hashing it, making the tag. Deallocates | ||||
|  |  | |||
|  | @ -20,8 +20,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <new> | ||||
| #include "intrinPortable.h" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
|  | @ -55,13 +53,13 @@ namespace RandomX { | |||
| 	constexpr bool trace = false; | ||||
| #endif | ||||
| 
 | ||||
| 	typedef union { | ||||
| 	union convertible_t { | ||||
| 		double f64; | ||||
| 		int64_t i64; | ||||
| 		uint64_t u64; | ||||
| 		int32_t i32; | ||||
| 		uint32_t u32; | ||||
| 	} convertible_t; | ||||
| 	}; | ||||
| 
 | ||||
| 	constexpr int ProgramLength = 512; | ||||
| 	constexpr int InstructionCount = 1024 * 1024; | ||||
|  | @ -71,34 +69,27 @@ namespace RandomX { | |||
| 	constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t); | ||||
| 	constexpr int RegistersCount = 8; | ||||
| 
 | ||||
| 	class Cache; | ||||
| 
 | ||||
| 	inline int wrapInstr(int i) { | ||||
| 		return i % RandomX::ProgramLength; | ||||
| 	} | ||||
| 
 | ||||
| 	struct LightClientMemory { | ||||
| 		uint8_t* cache; | ||||
| 	struct LightClientDataset { | ||||
| 		Cache* cache; | ||||
| 		uint8_t* block; | ||||
| 		uint32_t blockNumber; | ||||
| 		alignas(16) __m128i keys[10]; | ||||
| 	}; | ||||
| 
 | ||||
| 		void* operator new(size_t size) { | ||||
| 			void* ptr = _mm_malloc(size, sizeof(__m128i)); | ||||
| 			if (ptr == nullptr) | ||||
| 				throw std::bad_alloc(); | ||||
| 			return ptr; | ||||
| 		} | ||||
| 
 | ||||
| 		void operator delete(void* ptr) { | ||||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 	union dataset_t { | ||||
| 		uint8_t* dataset; | ||||
| 		Cache* cache; | ||||
| 		LightClientDataset* lightDataset; | ||||
| 	}; | ||||
| 
 | ||||
| 	struct MemoryRegisters { | ||||
| 		addr_t ma, mx; | ||||
| 		union { | ||||
| 			uint8_t* dataset; | ||||
| 			LightClientMemory* lcm; | ||||
| 		}; | ||||
| 		dataset_t ds; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); | ||||
|  |  | |||
							
								
								
									
										216
									
								
								src/dataset.cpp
									
										
									
									
									
								
							
							
						
						
									
										216
									
								
								src/dataset.cpp
									
										
									
									
									
								
							|  | @ -19,135 +19,25 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| 
 | ||||
| // Parts of this file are originally copyright (c) xmr-stak
 | ||||
| 
 | ||||
| #include "common.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Pcg32.hpp" | ||||
| #include "argon2_core.h" | ||||
| #include <new> | ||||
| #include <algorithm> | ||||
| #include <stdexcept> | ||||
| #include <cstring> | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| #if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2) | ||||
| #define __SSE2__ 1 | ||||
| #endif | ||||
| #endif | ||||
| #include "common.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Pcg32.hpp" | ||||
| #include "Cache.hpp" | ||||
| 
 | ||||
| #if defined(__SSE2__) | ||||
| #include <wmmintrin.h> | ||||
| #define PREFETCH(memory) _mm_prefetch((const char *)((memory).dataset + (memory).ma), _MM_HINT_T0) | ||||
| #define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0) | ||||
| #else | ||||
| #define PREFETCH(memory) | ||||
| #endif | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	void initializeCache(const void* input, size_t inputLength, void* memory) { | ||||
| 		uint32_t memory_blocks, segment_length; | ||||
| 		argon2_instance_t instance; | ||||
| 		argon2_context context; | ||||
| 
 | ||||
| 		context.out = nullptr; | ||||
| 		context.outlen = 0; | ||||
| 		context.pwd = CONST_CAST(uint8_t *)input; | ||||
| 		context.pwdlen = (uint32_t)inputLength; | ||||
| 		context.salt = CONST_CAST(uint8_t *)ArgonSalt; | ||||
| 		context.saltlen = (uint32_t)ArgonSaltSize; | ||||
| 		context.secret = NULL; | ||||
| 		context.secretlen = 0; | ||||
| 		context.ad = NULL; | ||||
| 		context.adlen = 0; | ||||
| 		context.t_cost = ArgonIterations; | ||||
| 		context.m_cost = ArgonMemorySize; | ||||
| 		context.lanes = ArgonLanes; | ||||
| 		context.threads = 1; | ||||
| 		context.allocate_cbk = NULL; | ||||
| 		context.free_cbk = NULL; | ||||
| 		context.flags = ARGON2_DEFAULT_FLAGS; | ||||
| 		context.version = ARGON2_VERSION_NUMBER; | ||||
| 
 | ||||
| 		/* 2. Align memory size */ | ||||
| 		/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ | ||||
| 		memory_blocks = context.m_cost; | ||||
| 
 | ||||
| 		segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); | ||||
| 
 | ||||
| 		instance.version = context.version; | ||||
| 		instance.memory = NULL; | ||||
| 		instance.passes = context.t_cost; | ||||
| 		instance.memory_blocks = memory_blocks; | ||||
| 		instance.segment_length = segment_length; | ||||
| 		instance.lane_length = segment_length * ARGON2_SYNC_POINTS; | ||||
| 		instance.lanes = context.lanes; | ||||
| 		instance.threads = context.threads; | ||||
| 		instance.type = Argon2_d; | ||||
| 		instance.memory = (block*)memory; | ||||
| 
 | ||||
| 		if (instance.threads > instance.lanes) { | ||||
| 			instance.threads = instance.lanes; | ||||
| 		} | ||||
| 
 | ||||
| 		/* 3. Initialization: Hashing inputs, allocating memory, filling first
 | ||||
| 		 * blocks | ||||
| 		 */ | ||||
| 		initialize(&instance, &context); | ||||
| 
 | ||||
| 		fill_memory_blocks(&instance); | ||||
| 	} | ||||
| 
 | ||||
| 	// This will shift and xor tmp1 into itself as 4 32-bit vals such as
 | ||||
| 	// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
 | ||||
| 	static inline __m128i sl_xor(__m128i tmp1) { | ||||
| 		__m128i tmp4; | ||||
| 		tmp4 = _mm_slli_si128(tmp1, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		tmp4 = _mm_slli_si128(tmp4, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		tmp4 = _mm_slli_si128(tmp4, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		return tmp1; | ||||
| 	} | ||||
| 
 | ||||
| 	template<uint8_t rcon, bool soft> | ||||
| 	static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) { | ||||
| 		__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon); | ||||
| 		xout1 = _mm_shuffle_epi32(xout1, 0xFF); | ||||
| 		*xout0 = sl_xor(*xout0); | ||||
| 		*xout0 = _mm_xor_si128(*xout0, xout1); | ||||
| 		xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00); | ||||
| 		xout1 = _mm_shuffle_epi32(xout1, 0xAA); | ||||
| 		*xout2 = sl_xor(*xout2); | ||||
| 		*xout2 = _mm_xor_si128(*xout2, xout1); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	void expandAesKeys(const __m128i* seed, __m128i* keys) { | ||||
| 		__m128i xout0, xout2; | ||||
| 		xout0 = _mm_load_si128(seed); | ||||
| 		xout2 = _mm_load_si128(seed + 1); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x01, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x02, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x04, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x08, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void expandAesKeys<true>(const __m128i* seed, __m128i* keys); | ||||
| 
 | ||||
| 	template | ||||
| 		void expandAesKeys<false>(const __m128i* seed, __m128i* keys); | ||||
| 
 | ||||
| 	template<typename T> | ||||
| 	static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) { | ||||
| 		auto count = bytes / sizeof(T); | ||||
|  | @ -157,8 +47,18 @@ namespace RandomX { | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	static inline __m128i aesenc(__m128i in, __m128i key) { | ||||
| 		return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	static inline __m128i aesdec(__m128i in, __m128i key) { | ||||
| 		return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft, bool enc> | ||||
| 	void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]) { | ||||
| 	void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) { | ||||
| 		__m128i xin, xout; | ||||
| 		//Initialization vector = block number extended to 128 bits
 | ||||
| 		xout = _mm_cvtsi32_si128(blockNumber); | ||||
|  | @ -200,20 +100,20 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<true, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 		void initBlock<true, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&); | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<true, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 		void initBlock<true, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&); | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<false, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 		void initBlock<false, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&); | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<false, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 		void initBlock<false, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&); | ||||
| 
 | ||||
| 	convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) { | ||||
| 		convertible_t data; | ||||
| 		data.u64 = *(uint64_t*)(memory.dataset + memory.ma); | ||||
| 		data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma); | ||||
| 		memory.ma += 8; | ||||
| 		memory.mx ^= addr; | ||||
| 		if ((memory.mx & 0xFFF8) == 0) { | ||||
|  | @ -224,24 +124,25 @@ namespace RandomX { | |||
| 	} | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i k[10]) { | ||||
| 	void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) { | ||||
| 		if (blockNumber % 2 == 1) { | ||||
| 			initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, k); | ||||
| 			initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys); | ||||
| 		} | ||||
| 		else { | ||||
| 			initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, k); | ||||
| 			initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) { | ||||
| 		convertible_t data; | ||||
| 		LightClientDataset* lds = memory.ds.lightDataset; | ||||
| 		auto blockNumber = memory.ma / DatasetBlockSize; | ||||
| 		if (memory.lcm->blockNumber != blockNumber) { | ||||
| 			initBlock<softAes>(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys); | ||||
| 			memory.lcm->blockNumber = blockNumber; | ||||
| 		if (lds->blockNumber != blockNumber) { | ||||
| 			initBlock<softAes>(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys()); | ||||
| 			lds->blockNumber = blockNumber; | ||||
| 		} | ||||
| 		data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize)); | ||||
| 		data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize)); | ||||
| 		memory.ma += 8; | ||||
| 		memory.mx ^= addr; | ||||
| 		if ((memory.mx & 0xFFF8) == 0) { | ||||
|  | @ -256,54 +157,37 @@ namespace RandomX { | |||
| 	template | ||||
| 		convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInit(const void* seed, uint8_t*& dataset) { | ||||
| 	void datasetAlloc(dataset_t& ds) { | ||||
| 		if (sizeof(size_t) <= 4) | ||||
| 			throw std::runtime_error("Platform doesn't support enough memory for the dataset"); | ||||
| 		dataset = (uint8_t*)_mm_malloc(DatasetSize, sizeof(__m128i)); | ||||
| 		if (dataset == nullptr) { | ||||
| 			throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed."); | ||||
| 		ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64); | ||||
| 		if (ds.dataset == nullptr) { | ||||
| 			throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed."); | ||||
| 		} | ||||
| 		uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i)); | ||||
| 		if (cache == nullptr) { | ||||
| 			throw std::bad_alloc(); | ||||
| 		} | ||||
| 		initializeCache(seed, SeedSize, cache); | ||||
| 		memcpy(cache + CacheSize, cache, CacheShift); | ||||
| 		alignas(16) __m128i keys[10]; | ||||
| 		expandAesKeys<softAes>((const __m128i*)seed, keys); | ||||
| 		for (uint32_t i = 0; i < DatasetBlockCount; ++i) { | ||||
| 			initBlock<softAes>(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys); | ||||
| 		} | ||||
| 		_mm_free(cache); | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInit<false>(const void*, uint8_t*&); | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInit<true>(const void*, uint8_t*&); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInitLight(const void* seed, LightClientMemory*& lcm) { | ||||
| 		lcm = new LightClientMemory(); | ||||
| 		lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i)); | ||||
| 		if (lcm->cache == nullptr) { | ||||
| 			throw std::bad_alloc(); | ||||
| 	void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) { | ||||
| 		for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) { | ||||
| 			initBlock<softAes>(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys()); | ||||
| 		} | ||||
| 		initializeCache(seed, SeedSize, lcm->cache); | ||||
| 		memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift); | ||||
| 		expandAesKeys<softAes>((__m128i*)seed, lcm->keys); | ||||
| 		lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i)); | ||||
| 		if (lcm->block == nullptr) { | ||||
| 			throw std::bad_alloc(); | ||||
| 		} | ||||
| 		lcm->blockNumber = -1; | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInitLight<false>(const void*, LightClientMemory*&); | ||||
| 		void datasetInit<false>(Cache*, dataset_t, uint32_t, uint32_t); | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInitLight<true>(const void*, LightClientMemory*&); | ||||
| 		void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInitCache(const void* seed, dataset_t& ds) { | ||||
| 		ds.cache = new Cache(); | ||||
| 		ds.cache->initialize<softAes>(seed, SeedSize); | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInitCache<false>(const void*, dataset_t&); | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInitCache<true>(const void*, dataset_t&); | ||||
| } | ||||
|  |  | |||
|  | @ -20,43 +20,30 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <array> | ||||
| #include "intrinPortable.h" | ||||
| #include "argon2.h" | ||||
| #include "common.hpp" | ||||
| #include "softAes.h" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value"); | ||||
| 
 | ||||
| 	void initializeCache(const void* input, size_t inputLength, void* memory); | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	void expandAesKeys(const __m128i* seed, __m128i* keys); | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	inline __m128i aesenc(__m128i in, __m128i key) { | ||||
| 		return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	inline __m128i aesdec(__m128i in, __m128i key) { | ||||
| 		return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key); | ||||
| 	} | ||||
| 	using KeysContainer = std::array<__m128i, 10>; | ||||
| 
 | ||||
| 	template<bool soft, bool enc> | ||||
| 	void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 	void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 	void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys); | ||||
| 
 | ||||
| 	void datasetAlloc(dataset_t& ds); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInit(const void* seed, uint8_t*& dataset); | ||||
| 	void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount); | ||||
| 
 | ||||
| 	convertible_t datasetRead(addr_t addr, MemoryRegisters& memory); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInitLight(const void* seed, LightClientMemory*& lcm); | ||||
| 	void datasetInitCache(const void* seed, dataset_t& dataset); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory); | ||||
|  |  | |||
							
								
								
									
										177
									
								
								src/main.cpp
									
										
									
									
									
								
							
							
						
						
									
										177
									
								
								src/main.cpp
									
										
									
									
									
								
							|  | @ -30,6 +30,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "Program.hpp" | ||||
| #include <string> | ||||
| #include "instructions.hpp" | ||||
| #include <thread> | ||||
| #include <atomic> | ||||
| #include "dataset.hpp" | ||||
| #include "Cache.hpp" | ||||
| 
 | ||||
| const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; | ||||
| 
 | ||||
|  | @ -45,7 +49,6 @@ void outputHex(std::ostream& os, const char* data, int length) { | |||
| 		os << hexmap[(data[i] & 0xF0) >> 4]; | ||||
| 		os << hexmap[data[i] & 0x0F]; | ||||
| 	} | ||||
| 	os << std::endl; | ||||
| } | ||||
| 
 | ||||
| void readOption(const char* option, int argc, char** argv, bool& out) { | ||||
|  | @ -58,6 +61,15 @@ void readOption(const char* option, int argc, char** argv, bool& out) { | |||
| 	out = false; | ||||
| } | ||||
| 
 | ||||
| void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) { | ||||
| 	for (int i = 0; i < argc - 1; ++i) { | ||||
| 		if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) { | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| 	out = defaultValue; | ||||
| } | ||||
| 
 | ||||
| void readInt(int argc, char** argv, int& out, int defaultValue) { | ||||
| 	for (int i = 0; i < argc; ++i) { | ||||
| 		if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) { | ||||
|  | @ -75,81 +87,144 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { | |||
| 	return os; | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char** argv) { | ||||
| 	bool softAes, lightClient, genAsm, compiled; | ||||
| 	int programCount; | ||||
| 	readOption("--softAes", argc, argv, softAes); | ||||
| 	readOption("--lightClient", argc, argv, lightClient); | ||||
| 	readOption("--genAsm", argc, argv, genAsm); | ||||
| 	readOption("--compiled", argc, argv, compiled); | ||||
| 	readInt(argc, argv, programCount, 1000); | ||||
| class AtomicHash { | ||||
| public: | ||||
| 	AtomicHash() { | ||||
| 		for (int i = 0; i < 4; ++i) | ||||
| 			hash[i].store(0); | ||||
| 	} | ||||
| 	void xorWith(uint64_t update[4]) { | ||||
| 		for (int i = 0; i < 4; ++i) | ||||
| 			hash[i].fetch_xor(update[i]); | ||||
| 	} | ||||
| 	void print(std::ostream& os) { | ||||
| 		for (int i = 0; i < 4; ++i) | ||||
| 			print(hash[i], os); | ||||
| 		os << std::endl; | ||||
| 	} | ||||
| private: | ||||
| 	void print(std::atomic<uint64_t>& hash, std::ostream& os) { | ||||
| 		auto h = hash.load(); | ||||
| 		outputHex(std::cout, (char*)&h, sizeof(h)); | ||||
| 	} | ||||
| 	std::atomic<uint64_t> hash[4]; | ||||
| }; | ||||
| 
 | ||||
| void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) { | ||||
| 	uint64_t hash[4]; | ||||
| 	unsigned char blockTemplate[] = { | ||||
| 		0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, | ||||
| 		0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, | ||||
| 		0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, | ||||
| 		0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 | ||||
| 	}; | ||||
| 	int* nonce = (int*)(blockTemplate + 39); | ||||
| 	uint8_t hash[RandomX::ResultSize]; | ||||
| 	int* noncePtr = (int*)(blockTemplate + 39); | ||||
| 	int nonce = atomicNonce.fetch_add(1); | ||||
| 
 | ||||
| 	if (genAsm) { | ||||
| 		*nonce = programCount; | ||||
| 	while (nonce < noncesCount) { | ||||
| 		//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
 | ||||
| 		*noncePtr = nonce; | ||||
| 		blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); | ||||
| 		RandomX::AssemblyGeneratorX86 asmX86; | ||||
| 		asmX86.generateProgram(hash); | ||||
| 		asmX86.printCode(std::cout); | ||||
| 		return 0; | ||||
| 		int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8); | ||||
| 		vm->initializeScratchpad(spIndex); | ||||
| 		vm->initializeProgram(hash); | ||||
| 		vm->execute(); | ||||
| 		vm->getResult(hash); | ||||
| 		result.xorWith(hash); | ||||
| 		if (RandomX::trace) { | ||||
| 			std::cout << "Nonce: " << nonce << " "; | ||||
| 			outputHex(std::cout, (char*)hash, sizeof(hash)); | ||||
| 			std::cout << std::endl; | ||||
| 		} | ||||
| 		nonce = atomicNonce.fetch_add(1); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char** argv) { | ||||
| 	bool softAes, lightClient, genAsm, compiled; | ||||
| 	int programCount, threadCount; | ||||
| 	readOption("--softAes", argc, argv, softAes); | ||||
| 	readOption("--lightClient", argc, argv, lightClient); | ||||
| 	readOption("--genAsm", argc, argv, genAsm); | ||||
| 	readOption("--compiled", argc, argv, compiled); | ||||
| 	readIntOption("--threads", argc, argv, threadCount, 1); | ||||
| 	readIntOption("--nonces", argc, argv, programCount, 1000); | ||||
| 
 | ||||
| 	std::atomic<int> atomicNonce(0); | ||||
| 	AtomicHash result; | ||||
| 	std::vector<RandomX::VirtualMachine*> vms; | ||||
| 	std::vector<std::thread> threads; | ||||
| 	RandomX::dataset_t dataset; | ||||
| 
 | ||||
| 	if (softAes) | ||||
| 		std::cout << "Using software AES." << std::endl; | ||||
| 
 | ||||
| 	char cumulative[RandomX::ResultSize] = { 0 }; | ||||
| 
 | ||||
| 	RandomX::VirtualMachine* vm; | ||||
| 	std::cout << "Initializing..." << std::endl; | ||||
| 
 | ||||
| 	try { | ||||
| 		if (compiled) { | ||||
| 			vm = new RandomX::CompiledVirtualMachine(softAes); | ||||
| 		Stopwatch sw(true); | ||||
| 		if (softAes) { | ||||
| 			RandomX::datasetInitCache<true>(seed, dataset); | ||||
| 		} | ||||
| 		else { | ||||
| 			vm = new RandomX::InterpretedVirtualMachine(softAes); | ||||
| 			RandomX::datasetInitCache<false>(seed, dataset); | ||||
| 		} | ||||
| 		std::cout << "Initializing..." << std::endl; | ||||
| 		Stopwatch sw(true); | ||||
| 		vm->initializeDataset(seed, lightClient); | ||||
| 		if(lightClient) | ||||
| 		if (RandomX::trace) { | ||||
| 			std::cout << "Keys: " << std::endl; | ||||
| 			for (int i = 0; i < dataset.cache->getKeys().size(); ++i) { | ||||
| 				outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i)); | ||||
| 			} | ||||
| 			std::cout << std::endl; | ||||
| 			std::cout << "Cache: " << std::endl; | ||||
| 			outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i)); | ||||
| 			std::cout << std::endl; | ||||
| 		} | ||||
| 		if (lightClient) { | ||||
| 			std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl; | ||||
| 		else | ||||
| 		} | ||||
| 		else { | ||||
| 			RandomX::Cache* cache = dataset.cache; | ||||
| 			RandomX::datasetAlloc(dataset); | ||||
| 			auto perThread = RandomX::DatasetBlockCount / threadCount; | ||||
| 			auto remainder = RandomX::DatasetBlockCount % threadCount; | ||||
| 			for (int i = 0; i < threadCount; ++i) { | ||||
| 				auto count = perThread + (i == threadCount - 1 ? remainder : 0); | ||||
| 				if (softAes) { | ||||
| 					threads.push_back(std::thread(&RandomX::datasetInit<true>, cache, dataset, i * perThread, count)); | ||||
| 				} | ||||
| 				else { | ||||
| 					threads.push_back(std::thread(&RandomX::datasetInit<false>, cache, dataset, i * perThread, count)); | ||||
| 				} | ||||
| 			} | ||||
| 			for (int i = 0; i < threads.size(); ++i) { | ||||
| 				threads[i].join(); | ||||
| 			} | ||||
| 			delete cache; | ||||
| 			threads.clear(); | ||||
| 			std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl; | ||||
| 		} | ||||
| 		std::cout << "Initializing " << threadCount << " virtual machine(s)..." << std::endl; | ||||
| 		for (int i = 0; i < threadCount; ++i) { | ||||
| 			RandomX::VirtualMachine* vm; | ||||
| 			if (compiled) { | ||||
| 				vm = new RandomX::CompiledVirtualMachine(softAes); | ||||
| 			} | ||||
| 			else { | ||||
| 				vm = new RandomX::InterpretedVirtualMachine(softAes); | ||||
| 			} | ||||
| 			vm->setDataset(dataset, lightClient); | ||||
| 			vms.push_back(vm); | ||||
| 		} | ||||
| 		std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl; | ||||
| 		sw.restart(); | ||||
| 		for (int i = 0; i < programCount; ++i) { | ||||
| 			*nonce = i; | ||||
| 			if (RandomX::trace) std::cout << "Nonce: " << i << " "; | ||||
| 			blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); | ||||
| 			int spIndex = hash[24] | ((hash[25] & 63) << 8); | ||||
| 			vm->initializeScratchpad(spIndex); | ||||
| 			//dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, "scratchpad-before.txt");
 | ||||
| 			//return 0;
 | ||||
| 			vm->initializeProgram(hash); | ||||
| 			vm->execute(); | ||||
| 			/*std::string fileName("scratchpad-after-");
 | ||||
| 			fileName = fileName + std::to_string(i) + ".txt"; | ||||
| 			dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, fileName.c_str());*/ | ||||
| 			vm->getResult(hash); | ||||
| 			if (RandomX::trace) { | ||||
| 				outputHex(std::cout, (char*)hash, sizeof(hash)); | ||||
| 			} | ||||
| 			((uint64_t*)cumulative)[0] ^= ((uint64_t*)hash)[0]; | ||||
| 			((uint64_t*)cumulative)[1] ^= ((uint64_t*)hash)[1]; | ||||
| 			((uint64_t*)cumulative)[2] ^= ((uint64_t*)hash)[2]; | ||||
| 			((uint64_t*)cumulative)[3] ^= ((uint64_t*)hash)[3]; | ||||
| 		for (int i = 0; i < vms.size(); ++i) { | ||||
| 			threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i)); | ||||
| 		} | ||||
| 		for (int i = 0; i < threads.size(); ++i) { | ||||
| 			threads[i].join(); | ||||
| 		} | ||||
| 		double elapsed = sw.getElapsed(); | ||||
| 		std::cout << "Calculated result: "; | ||||
| 		outputHex(std::cout, cumulative, sizeof(cumulative)); | ||||
| 		result.print(std::cout); | ||||
| 		if(programCount == 1000) | ||||
| 		std::cout << "Reference result:  d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl; | ||||
| 		std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue