mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Dataset size increased to 2080 MiB
Implemented dataset base offset Tweaked SuperscalarHash constants to prevent register collisions
This commit is contained in:
		
							parent
							
								
									f66da3911e
								
							
						
					
					
						commit
						270a4f97fe
					
				
					 19 changed files with 56 additions and 44 deletions
				
			
		|  | @ -13,4 +13,5 @@ | |||
| 	mov ebx, ebp                       ;# ecx = ma
 | ||||
| 	and ebx, 2147483584                ;# align "ma" to the start of a cache line
 | ||||
| 	shr ebx, 6                         ;# ebx = Dataset block number
 | ||||
| 	;# add ebx, datasetOffset / 64
 | ||||
| 	;# call 32768
 | ||||
|  | @ -2,14 +2,14 @@ r0_mul: | |||
| 	;#/ 6364136223846793005
 | ||||
| 	db 45, 127, 149, 76, 45, 244, 81, 88 | ||||
| r1_add: | ||||
| 	;#/ 9298410992540426748
 | ||||
| 	db 252, 161, 245, 89, 136, 151, 10, 129 | ||||
| 	;#/ 9298411001130361340
 | ||||
| 	db 252, 161, 245, 89, 138, 151, 10, 129 | ||||
| r2_add: | ||||
| 	;#/ 12065312585734608966
 | ||||
| 	db 70, 216, 194, 56, 223, 153, 112, 167 | ||||
| r3_add: | ||||
| 	;#/ 9306329213124610396
 | ||||
| 	db 92, 9, 34, 191, 28, 185, 38, 129 | ||||
| 	;#/ 9306329213124626780
 | ||||
| 	db 92, 73, 34, 191, 28, 185, 38, 129 | ||||
| r4_add: | ||||
| 	;#/ 5281919268842080866
 | ||||
| 	db 98, 138, 159, 23, 151, 37, 77, 73 | ||||
|  |  | |||
|  | @ -28,8 +28,9 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| namespace randomx { | ||||
| 
 | ||||
| 	static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); | ||||
| 	static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2."); | ||||
| 	static_assert(RANDOMX_DATASET_SIZE <= 4294967296ULL, "RANDOMX_DATASET_SIZE must not exceed 4294967296."); | ||||
| 	static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2."); | ||||
| 	static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296."); | ||||
| 	static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); | ||||
| 	static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); | ||||
| 	static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); | ||||
| 	static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); | ||||
|  | @ -56,8 +57,10 @@ namespace randomx { | |||
| 	constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; | ||||
| 	constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE; | ||||
| 	constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3; | ||||
| 	constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1); | ||||
| 	constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024; | ||||
| 	constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1); | ||||
| 	constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize; | ||||
| 	constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE; | ||||
| 	constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE; | ||||
| 
 | ||||
| #ifdef TRACE | ||||
| 	constexpr bool trace = true; | ||||
|  |  | |||
|  | @ -37,8 +37,11 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #define RANDOMX_SUPERSCALAR_LATENCY   170 | ||||
| #define RANDOMX_SUPERSCALAR_MAX_SIZE  512 | ||||
| 
 | ||||
| //Dataset size in bytes. Must be a power of 2.
 | ||||
| #define RANDOMX_DATASET_SIZE       (2ULL * 1024 * 1024 * 1024) | ||||
| //Dataset base size in bytes. Must be a power of 2.
 | ||||
| #define RANDOMX_DATASET_BASE_SIZE  (2ULL * 1024 * 1024 * 1024) | ||||
| 
 | ||||
| //Dataset extra size. Must be divisible by 64.
 | ||||
| #define RANDOMX_DATASET_EXTRA_SIZE 33554368 | ||||
| 
 | ||||
| //Number of instructions in a RandomX program
 | ||||
| #define RANDOMX_PROGRAM_SIZE       256 | ||||
|  |  | |||
|  | @ -45,6 +45,7 @@ randomx_dataset::~randomx_dataset() { | |||
| } | ||||
| 
 | ||||
| static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); | ||||
| static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE"); | ||||
| 
 | ||||
| void randomx_cache::initialize(const void *seed, size_t seedSize) { | ||||
| 	uint32_t memory_blocks, segment_length; | ||||
|  | @ -117,22 +118,22 @@ namespace randomx { | |||
| 
 | ||||
| 	template<class Allocator> | ||||
| 	void Dataset<Allocator>::allocate() { | ||||
| 		memory = (uint8_t*)Allocator::allocMemory(RANDOMX_DATASET_SIZE); | ||||
| 		memory = (uint8_t*)Allocator::allocMemory(DatasetSize); | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator> | ||||
| 	Dataset<Allocator>::~Dataset() { | ||||
| 		Allocator::freeMemory(memory, RANDOMX_DATASET_SIZE); | ||||
| 		Allocator::freeMemory(memory, DatasetSize); | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator> | ||||
| 	void Cache<Allocator>::allocate() { | ||||
| 		memory = (uint8_t*)Allocator::allocMemory(RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE); | ||||
| 		memory = (uint8_t*)Allocator::allocMemory(CacheSize); | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator> | ||||
| 	Cache<Allocator>::~Cache() { | ||||
| 		Allocator::freeMemory(memory, RANDOMX_ARGON_MEMORY * ARGON2_BLOCK_SIZE); | ||||
| 		Allocator::freeMemory(memory, CacheSize); | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator> | ||||
|  | @ -160,16 +161,16 @@ namespace randomx { | |||
| 	template class CacheWithJit<LargePageAllocator>; | ||||
| 
 | ||||
| 	constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; | ||||
| 	constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; | ||||
| 	constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL; | ||||
| 	constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; | ||||
| 	constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; | ||||
| 	constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL; | ||||
| 	constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; | ||||
| 	constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; | ||||
| 	constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; | ||||
| 	constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; | ||||
| 
 | ||||
| 	static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) { | ||||
| 		constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); | ||||
| 		constexpr uint32_t mask = CacheSize / CacheLineSize - 1; | ||||
| 		return memory + (registerValue & mask) * CacheLineSize; | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -217,6 +217,7 @@ namespace randomx { | |||
| 	static const uint8_t RET = 0xc3; | ||||
| 	static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; | ||||
| 	static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; | ||||
| 	static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; | ||||
| 
 | ||||
| 	static const uint8_t NOP1[] = { 0x90 }; | ||||
| 	static const uint8_t NOP2[] = { 0x66, 0x90 }; | ||||
|  | @ -250,9 +251,11 @@ namespace randomx { | |||
| 		generateProgramEpilogue(prog); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) { | ||||
| 	void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) { | ||||
| 		generateProgramPrologue(prog, pcfg); | ||||
| 		emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); | ||||
| 		emit(ADD_EBX_I); | ||||
| 		emit32(datasetOffset / CacheLineSize); | ||||
| 		emitByte(CALL); | ||||
| 		emit32(superScalarHashOffset - (codePos + 4)); | ||||
| 		emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); | ||||
|  |  | |||
|  | @ -41,7 +41,7 @@ namespace randomx { | |||
| 		JitCompilerX86(); | ||||
| 		~JitCompilerX86(); | ||||
| 		void generateProgram(Program&, ProgramConfiguration&); | ||||
| 		void generateProgramLight(Program&, ProgramConfiguration&); | ||||
| 		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t); | ||||
| 		template<size_t N> | ||||
| 		void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &); | ||||
| 		void generateDatasetInitCode(); | ||||
|  |  | |||
|  | @ -91,7 +91,7 @@ extern "C" { | |||
| 	} | ||||
| 
 | ||||
| 	unsigned long randomx_dataset_item_count() { | ||||
| 		return RANDOMX_DATASET_SIZE / RANDOMX_DATASET_ITEM_SIZE; | ||||
| 		return randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE; | ||||
| 	} | ||||
| 
 | ||||
| 	void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) { | ||||
|  |  | |||
|  | @ -225,7 +225,7 @@ int main(int argc, char** argv) { | |||
| 		std::cout << "Calculated result: "; | ||||
| 		result.print(std::cout); | ||||
| 		if (noncesCount == 1000 && seedValue == 0) | ||||
| 			std::cout << "Reference result:  b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl; | ||||
| 			std::cout << "Reference result:  918a8bc3ce0e537eec9d3c5e1a8bb3204ae3954f14c50c14810b38e49588a9e0" << std::endl; | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; | ||||
| 		} | ||||
|  |  | |||
|  | @ -24,27 +24,25 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include "../superscalar.hpp" | ||||
| #include "../common.hpp" | ||||
| 
 | ||||
| const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; | ||||
| 
 | ||||
| int main() { | ||||
| 	std::cout << "THIS PROGRAM REQUIRES MORE THAN 10 GB OF RAM TO COMPLETE" << std::endl; | ||||
| 	std::cout << "THIS PROGRAM REQUIRES MORE THAN 16 GB OF RAM TO COMPLETE" << std::endl; | ||||
| 	std::vector<uint64_t> dummy; | ||||
| 	constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; | ||||
| 	constexpr uint64_t superscalarAdd1 = 9298410992540426748ULL; //9298410992540426048ULL
 | ||||
| 	constexpr uint64_t superscalarAdd1 = 0x810A978A59F5A1FC; //9298410992540426748ULL; //9298410992540426048ULL
 | ||||
| 	constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; | ||||
| 	constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL; | ||||
| 	constexpr uint64_t superscalarAdd3 = 0x8126B91CBF22495C; //9306329213124610396ULL;
 | ||||
| 	constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL; | ||||
| 	constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL; | ||||
| 	constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL; | ||||
| 	constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL; | ||||
| 	constexpr uint32_t totalBlocks = RANDOMX_DATASET_SIZE / randomx::CacheLineSize; | ||||
| 	constexpr uint32_t totalItems = randomx::DatasetSize / randomx::CacheLineSize; | ||||
| 	std::unordered_set<uint64_t> registerValues; | ||||
| 	registerValues.reserve(totalBlocks); | ||||
| 	registerValues.rehash(totalBlocks); | ||||
| 	registerValues.reserve(totalItems); | ||||
| 	registerValues.rehash(totalItems); | ||||
| 	int collisionCount[9] = { 0 }; | ||||
| 	for (uint32_t blockNumber = 0; blockNumber < totalBlocks; ++blockNumber) { | ||||
| 	for (uint32_t itemNumber = 0; itemNumber < totalItems; ++itemNumber) { | ||||
| 		uint64_t rl[8]; | ||||
| 		rl[0] = (blockNumber + 1) * superscalarMul0; | ||||
| 		rl[0] = (itemNumber + 1) * superscalarMul0; | ||||
| 		rl[1] = rl[0] ^ superscalarAdd1; | ||||
| 		rl[2] = rl[0] ^ superscalarAdd2; | ||||
| 		rl[3] = rl[0] ^ superscalarAdd3; | ||||
|  | @ -57,19 +55,19 @@ int main() { | |||
| 			uint64_t reducedValue = rl[i] & 0x3FFFFFFFFFFFF8; //bits 3-53 only
 | ||||
| 			if (registerValues.find(reducedValue) != registerValues.end()) { | ||||
| 				blockCollisions++; | ||||
| 				std::cout << "Block " << blockNumber << ": collision of register r" << i << std::endl; | ||||
| 				std::cout << "Item " << itemNumber << ": collision of register r" << i << std::endl; | ||||
| 			} | ||||
| 			else { | ||||
| 				registerValues.insert(reducedValue); | ||||
| 			} | ||||
| 		} | ||||
| 		collisionCount[blockCollisions]++; | ||||
| 		if ((blockNumber % (320 * 1024)) == 0) | ||||
| 			std::cout << "Block " << blockNumber << " processed" << std::endl; | ||||
| 		if ((itemNumber % (320 * 1024)) == 0) | ||||
| 			std::cout << "Item " << itemNumber << " processed" << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	for (int i = 0; i < 9; ++i) { | ||||
| 		std::cout << i << " register(s) collide in " << collisionCount[i] << " blocks" << std::endl; | ||||
| 		std::cout << i << " register(s) collide in " << collisionCount[i] << " items" << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
|  |  | |||
|  | @ -69,7 +69,7 @@ void randomx_vm::initialize() { | |||
| 	config.readReg2 = 4 + (addressRegisters & 1); | ||||
| 	addressRegisters >>= 1; | ||||
| 	config.readReg3 = 6 + (addressRegisters & 1); | ||||
| 	//datasetBase = program.getEntropy(13) % datasetRange;
 | ||||
| 	datasetOffset = (program.getEntropy(13) & randomx::DatasetExtraItems) * randomx::CacheLineSize; | ||||
| 	constexpr uint64_t mask22bit = (1ULL << 22) - 1; | ||||
| 	constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>(); | ||||
| 	store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240); | ||||
|  |  | |||
|  | @ -44,6 +44,8 @@ protected: | |||
| 	alignas(16) randomx::ProgramConfiguration config; | ||||
| 	randomx::MemoryRegisters mem; | ||||
| 	uint8_t* scratchpad; | ||||
| 	uint8_t* datasetBasePtr; | ||||
| 	uint32_t datasetOffset; | ||||
| }; | ||||
| 
 | ||||
| namespace randomx { | ||||
|  |  | |||
|  | @ -28,7 +28,7 @@ namespace randomx { | |||
| 	template<class Allocator, bool softAes> | ||||
| 	void CompiledVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) { | ||||
| 		mem.memory = dataset->memory; | ||||
| 		//datasetBasePtr = dataset.memory;
 | ||||
| 		datasetBasePtr = dataset->memory; | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
|  | @ -36,7 +36,7 @@ namespace randomx { | |||
| 		VmBase<Allocator, softAes>::generateProgram(seed); | ||||
| 		randomx_vm::initialize(); | ||||
| 		compiler.generateProgram(program, config); | ||||
| 		//mem.memory = datasetBasePtr + (datasetBase * CacheLineSize);
 | ||||
| 		mem.memory = datasetBasePtr + datasetOffset; | ||||
| 		execute(); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -48,11 +48,12 @@ namespace randomx { | |||
| 		using VmBase<Allocator, softAes>::config; | ||||
| 		using VmBase<Allocator, softAes>::reg; | ||||
| 		using VmBase<Allocator, softAes>::scratchpad; | ||||
| 		using VmBase<Allocator, softAes>::datasetBasePtr; | ||||
| 		using VmBase<Allocator, softAes>::datasetOffset; | ||||
| 	protected: | ||||
| 		void execute(); | ||||
| 
 | ||||
| 		JitCompilerX86 compiler; | ||||
| 		uint8_t* datasetBasePtr; | ||||
| 	}; | ||||
| 
 | ||||
| 	using CompiledVmDefault = CompiledVm<AlignedAllocator<CacheLineSize>, true>; | ||||
|  |  | |||
|  | @ -27,15 +27,13 @@ namespace randomx { | |||
| 	void CompiledLightVm<Allocator, softAes>::setCache(randomx_cache* cache) { | ||||
| 		mem.memory = cache->memory; | ||||
| 		compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); | ||||
| 		//datasetBasePtr = ds.dataset.memory;
 | ||||
| 	} | ||||
| 
 | ||||
| 	template<class Allocator, bool softAes> | ||||
| 	void CompiledLightVm<Allocator, softAes>::run(void* seed) { | ||||
| 		VmBase<Allocator, softAes>::generateProgram(seed); | ||||
| 		randomx_vm::initialize(); | ||||
| 		compiler.generateProgramLight(program, config); | ||||
| 		//mem.memory = datasetBasePtr + (datasetBase * CacheLineSize);
 | ||||
| 		compiler.generateProgramLight(program, config, datasetOffset); | ||||
| 		CompiledVm<Allocator, softAes>::execute(); | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -44,6 +44,7 @@ namespace randomx { | |||
| 		using CompiledVm<Allocator, softAes>::compiler; | ||||
| 		using CompiledVm<Allocator, softAes>::program; | ||||
| 		using CompiledVm<Allocator, softAes>::config; | ||||
| 		using CompiledVm<Allocator, softAes>::datasetOffset; | ||||
| 	}; | ||||
| 
 | ||||
| 	using CompiledLightVmDefault = CompiledLightVm<AlignedAllocator<CacheLineSize>, true>; | ||||
|  |  | |||
|  | @ -331,7 +331,7 @@ namespace randomx { | |||
| 
 | ||||
| 			mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; | ||||
| 			mem.mx &= CacheLineAlignMask; | ||||
| 			datasetRead(mem.ma, r); | ||||
| 			datasetRead(datasetOffset + mem.ma, r); | ||||
| 			std::swap(mem.mx, mem.ma); | ||||
| 
 | ||||
| 			if (trace) { | ||||
|  |  | |||
|  | @ -57,6 +57,8 @@ namespace randomx { | |||
| 		using VmBase<Allocator, softAes>::program; | ||||
| 		using VmBase<Allocator, softAes>::config; | ||||
| 		using VmBase<Allocator, softAes>::reg; | ||||
| 		using VmBase<Allocator, softAes>::datasetBasePtr; | ||||
| 		using VmBase<Allocator, softAes>::datasetOffset; | ||||
| 		void* operator new(size_t size) { | ||||
| 			void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size); | ||||
| 			if (ptr == nullptr) | ||||
|  |  | |||
|  | @ -25,7 +25,6 @@ namespace randomx { | |||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedLightVm<Allocator, softAes>::setCache(randomx_cache* cache) { | ||||
| 		mem.memory = cache->memory; | ||||
| 		//datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
 | ||||
| 		cachePtr = cache; | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue