diff --git a/src/Cache.cpp b/src/Cache.cpp index dece1e4..2de6023 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -25,8 +25,9 @@ along with RandomX. If not, see. namespace RandomX { static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); + static_assert(RANDOMX_ARGON_GROWTH % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_GROWTH - invalid value"); - void Cache::argonFill(const void* seed, size_t seedSize) { + void argonFill(Cache& cache, const void* seed, size_t seedSize) { uint32_t memory_blocks, segment_length; argon2_instance_t instance; argon2_context context; @@ -42,7 +43,7 @@ namespace RandomX { context.ad = NULL; context.adlen = 0; context.t_cost = RANDOMX_ARGON_ITERATIONS; - context.m_cost = RANDOMX_ARGON_MEMORY; + context.m_cost = cache.size / ArgonBlockSize; context.lanes = RANDOMX_ARGON_LANES; context.threads = 1; context.allocate_cbk = NULL; @@ -65,7 +66,7 @@ namespace RandomX { instance.lanes = context.lanes; instance.threads = context.threads; instance.type = Argon2_d; - instance.memory = (block*)memory; + instance.memory = (block*)cache.memory; if (instance.threads > instance.lanes) { instance.threads = instance.lanes; @@ -78,9 +79,4 @@ namespace RandomX { fill_memory_blocks(&instance); } - - void Cache::initialize(const void* seed, size_t seedSize) { - //Argon2d memory fill - argonFill(seed, seedSize); - } } \ No newline at end of file diff --git a/src/Cache.hpp b/src/Cache.hpp index 927c5e4..5656baf 100644 --- a/src/Cache.hpp +++ b/src/Cache.hpp @@ -22,50 +22,31 @@ along with RandomX. If not, see. #include #include #include "common.hpp" -#include "dataset.hpp" +#include "intrinPortable.h" #include "virtualMemory.hpp" namespace RandomX { - class Cache { - public: - static void* alloc(bool largePages) { - if (largePages) { - return allocLargePagesMemory(sizeof(Cache)); - } - else { - void* ptr = _mm_malloc(sizeof(Cache), sizeof(__m128i)); - if (ptr == nullptr) - throw std::bad_alloc(); - return ptr; - } + void argonFill(Cache& cache, const void* seed, size_t seedSize); + + inline uint8_t* allocCache(size_t size, bool largePages) { + if (largePages) { + return (uint8_t*)allocLargePagesMemory(size); } - static void dealloc(Cache* cache, bool largePages) { - if (largePages) { - freePagedMemory(cache, sizeof(Cache)); - } - else { - _mm_free(cache); - } - } - /*void* operator new(size_t size) { + else { void* ptr = _mm_malloc(size, sizeof(__m128i)); if (ptr == nullptr) throw std::bad_alloc(); - return ptr; + return (uint8_t*)ptr; } + } - void operator delete(void* ptr) { - _mm_free(ptr); - }*/ - - void initialize(const void* seed, size_t seedSize); - - const uint8_t* getCache() const { - return memory; + inline void deallocCache(Cache cache, bool largePages) { + if (largePages) { + freePagedMemory(cache.memory, cache.size); } - private: - uint8_t memory[CacheSize]; - void argonFill(const void* seed, size_t seedSize); - }; + else { + _mm_free(cache.memory); + } + } } \ No newline at end of file diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index e6b9316..c313209 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -23,25 +23,26 @@ along with RandomX. If not, see. namespace RandomX { - static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); + //static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct RandomX::RegisterFile"); CompiledVirtualMachine::CompiledVirtualMachine() { - totalSize = 0; } - void CompiledVirtualMachine::setDataset(dataset_t ds) { + void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size) { mem.ds = ds; + datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; + datasetBasePtr = ds.dataset.memory; } void CompiledVirtualMachine::initialize() { VirtualMachine::initialize(); compiler.generateProgram(program); + mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize); } void CompiledVirtualMachine::execute() { //executeProgram(reg, mem, scratchpad, InstructionCount); - //totalSize += compiler.getCodeSize(); compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS); #ifdef TRACEVM for (int32_t i = InstructionCount - 1; i >= 0; --i) { diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index 3837589..17d79df 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -42,20 +42,17 @@ namespace RandomX { _mm_free(ptr); } CompiledVirtualMachine(); - void setDataset(dataset_t ds) override; + void setDataset(dataset_t ds, uint64_t size) override; void initialize() override; virtual void execute() override; void* getProgram() { return compiler.getCode(); } - uint64_t getTotalSize() { - return totalSize; - } private: #ifdef TRACEVM convertible_t tracepad[InstructionCount]; #endif JitCompilerX86 compiler; - uint64_t totalSize; + uint8_t* datasetBasePtr; }; } \ No newline at end of file diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 2dd7c8e..ad9bec8 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -50,13 +50,13 @@ namespace RandomX { } } - void InterpretedVirtualMachine::setDataset(dataset_t ds) { + void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size) { if (asyncWorker) { if (softAes) { - mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache); + mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache); } else { - mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache); + mem.ds.asyncWorker = new LightClientAsyncWorker(ds.cache); } readDataset = &datasetReadLightAsync; } @@ -64,6 +64,7 @@ namespace RandomX { mem.ds = ds; readDataset = &datasetReadLight; } + datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; } void InterpretedVirtualMachine::initialize() { @@ -337,20 +338,20 @@ namespace RandomX { if (asyncWorker) { ILightClientAsyncWorker* aw = mem.ds.asyncWorker; - const uint64_t* datasetLine = aw->getBlock(mem.ma); + const uint64_t* datasetLine = aw->getBlock(datasetBase + mem.ma); for (int i = 0; i < RegistersCount; ++i) r[i] ^= datasetLine[i]; mem.mx ^= r[readReg2] ^ r[readReg3]; mem.mx &= CacheLineAlignMask; //align to cache line std::swap(mem.mx, mem.ma); - aw->prepareBlock(mem.ma); + aw->prepareBlock(datasetBase + mem.ma); } else { mem.mx ^= r[readReg2] ^ r[readReg3]; - mem.mx &= CacheLineAlignMask; - Cache* cache = mem.ds.cache; + //mem.mx &= CacheLineAlignMask; + Cache& cache = mem.ds.cache; uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; - initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize); + initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize); for (int i = 0; i < RegistersCount; ++i) r[i] ^= datasetLine[i]; std::swap(mem.mx, mem.ma); diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index 7ade73f..b3c7f80 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -72,7 +72,7 @@ namespace RandomX { } InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {} ~InterpretedVirtualMachine(); - void setDataset(dataset_t ds) override; + void setDataset(dataset_t ds, uint64_t size) override; void initialize() override; void execute() override; private: diff --git a/src/LightClientAsyncWorker.cpp b/src/LightClientAsyncWorker.cpp index d9f62a2..3d895b8 100644 --- a/src/LightClientAsyncWorker.cpp +++ b/src/LightClientAsyncWorker.cpp @@ -23,8 +23,7 @@ along with RandomX. If not, see. namespace RandomX { - template - LightClientAsyncWorker::LightClientAsyncWorker(const Cache* c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), + LightClientAsyncWorker::LightClientAsyncWorker(const Cache& c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), #ifdef TRACE sw(true), #endif @@ -32,8 +31,7 @@ namespace RandomX { } - template - void LightClientAsyncWorker::prepareBlock(addr_t addr) { + void LightClientAsyncWorker::prepareBlock(addr_t addr) { #ifdef TRACE std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr / CacheLineSize << std::endl; #endif @@ -50,14 +48,13 @@ namespace RandomX { notifier.notify_one(); } - template - const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) { + const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) { #ifdef TRACE std::cout << sw.getElapsed() << ": getBlock-enter " << addr / CacheLineSize << std::endl; #endif uint32_t currentBlock = addr / CacheLineSize; if (currentBlock != startBlock || output != currentLine.data()) { - initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock); + initBlock(cache, (uint8_t*)currentLine.data(), currentBlock); } else { sync(); @@ -68,8 +65,7 @@ namespace RandomX { return currentLine.data(); } - template - void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { + void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { #ifdef TRACE std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl; #endif @@ -83,21 +79,18 @@ namespace RandomX { } } - template - void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { + void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { for (uint32_t i = 0; i < blockCount; ++i) { - initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i); + initBlock(cache, (uint8_t*)out + CacheLineSize * i, startBlock + i); } } - template - void LightClientAsyncWorker::sync() { + void LightClientAsyncWorker::sync() { std::unique_lock lk(mutex); notifier.wait(lk, [this] { return !hasWork; }); } - template - void LightClientAsyncWorker::runWorker() { + void LightClientAsyncWorker::runWorker() { #ifdef TRACE std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl; #endif @@ -108,7 +101,7 @@ namespace RandomX { std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl; #endif //getBlocks(output, startBlock, blockCount); - initBlock(cache->getCache(), (uint8_t*)output, startBlock); + initBlock(cache, (uint8_t*)output, startBlock); hasWork = false; #ifdef TRACE std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl; @@ -117,7 +110,4 @@ namespace RandomX { notifier.notify_one(); } } - - template class LightClientAsyncWorker; - template class LightClientAsyncWorker; } \ No newline at end of file diff --git a/src/LightClientAsyncWorker.hpp b/src/LightClientAsyncWorker.hpp index 29571e5..7c45e53 100644 --- a/src/LightClientAsyncWorker.hpp +++ b/src/LightClientAsyncWorker.hpp @@ -31,14 +31,11 @@ along with RandomX. If not, see. namespace RandomX { - class Cache; - using DatasetLine = std::array; - template class LightClientAsyncWorker : public ILightClientAsyncWorker { public: - LightClientAsyncWorker(const Cache*); + LightClientAsyncWorker(const Cache&); void prepareBlock(addr_t) final; void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final; const uint64_t* getBlock(addr_t) final; diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp index f39217e..d15bb4d 100644 --- a/src/VirtualMachine.cpp +++ b/src/VirtualMachine.cpp @@ -58,7 +58,7 @@ namespace RandomX { } VirtualMachine::VirtualMachine() { - mem.ds.dataset = nullptr; + mem.ds.dataset.memory = nullptr; } void VirtualMachine::resetRoundingMode() { @@ -84,6 +84,7 @@ namespace RandomX { readReg2 = 4 + (addressRegisters & 1); addressRegisters >>= 1; readReg3 = 6 + (addressRegisters & 1); + datasetBase = program.getEntropy(14) % datasetRange; } template diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp index afeef37..00a14de 100644 --- a/src/VirtualMachine.hpp +++ b/src/VirtualMachine.hpp @@ -30,7 +30,7 @@ namespace RandomX { public: VirtualMachine(); virtual ~VirtualMachine() {} - virtual void setDataset(dataset_t ds) = 0; + virtual void setDataset(dataset_t ds, uint64_t size) = 0; void setScratchpad(void* ptr) { scratchpad = (uint8_t*)ptr; } @@ -51,5 +51,7 @@ namespace RandomX { MemoryRegisters mem; uint8_t* scratchpad; uint32_t readReg0, readReg1, readReg2, readReg3; + uint32_t datasetRange; + uint32_t datasetBase; }; } \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index 3b2a3dc..118f053 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -29,7 +29,8 @@ namespace RandomX { static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2."); static_assert(RANDOMX_DATASET_SIZE <= 4294967296ULL, "RANDOMX_DATASET_SIZE must not exceed 4294967296."); - static_assert(RANDOMX_DS_GROWTH_RATE % 64 == 0, "RANDOMX_DS_GROWTH_RATE must be divisible by 64."); + static_assert(RANDOMX_DS_GROWTH % 64 == 0, "RANDOMX_DS_GROWTH must be divisible by 64."); + static_assert(RANDOMX_ARGON_GROWTH >= 0, "RANDOMX_ARGON_GROWTH must be greater than or equal to 0."); static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); @@ -54,6 +55,7 @@ namespace RandomX { constexpr int SeedSize = 32; constexpr int ResultSize = 64; + constexpr int ArgonBlockSize = 1024; constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; constexpr int CacheLineSize = 64; constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1); @@ -94,7 +96,13 @@ namespace RandomX { constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; constexpr int RegistersCount = 8; - class Cache; + struct Cache { + uint8_t* memory; + uint64_t size; + }; + + struct Dataset : public Cache { + }; class ILightClientAsyncWorker { public: @@ -104,17 +112,17 @@ namespace RandomX { virtual const uint64_t* getBlock(addr_t) = 0; virtual void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0; virtual void sync() = 0; - const Cache* getCache() { + const Cache& getCache() { return cache; } protected: - ILightClientAsyncWorker(const Cache* c) : cache(c) {} - const Cache* cache; + ILightClientAsyncWorker(const Cache& c) : cache(c) {} + const Cache& cache; }; union dataset_t { - uint8_t* dataset; - Cache* cache; + Dataset dataset; + Cache cache; ILightClientAsyncWorker* asyncWorker; }; diff --git a/src/configuration.h b/src/configuration.h index 72c4f5e..5a34217 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -22,6 +22,9 @@ along with RandomX. If not, see. //Cache size in KiB. Must be a power of 2. #define RANDOMX_ARGON_MEMORY (256 * 1024) +//Cache growth per epoch in KiB. +#define RANDOMX_ARGON_GROWTH 0 + //Number of Argon2d iterations for Cache initialization #define RANDOMX_ARGON_ITERATIONS 3 @@ -38,7 +41,10 @@ along with RandomX. If not, see. #define RANDOMX_DATASET_SIZE (4ULL * 1024 * 1024 * 1024) //Dataset growth per epoch in bytes. Must be divisible by 64. -#define RANDOMX_DS_GROWTH_RATE (2 * 1024 * 1024) +#define RANDOMX_DS_GROWTH (2 * 1024 * 1024) + +//Number of blocks per epoch +#define RANDOMX_EPOCH_BLOCKS 1024 //Number of instructions in a RandomX program #define RANDOMX_PROGRAM_SIZE 256 @@ -47,7 +53,7 @@ along with RandomX. If not, see. #define RANDOMX_PROGRAM_ITERATIONS 2048 //Number of chained VM executions per hash -#define RANDOMX_PROGRAM_COUNT 8 +#define RANDOMX_PROGRAM_COUNT 8 //Scratchpad L3 size in bytes. Must be a power of 2. #define RANDOMX_SCRATCHPAD_L3 (2 * 1024 * 1024) @@ -63,39 +69,42 @@ Instruction frequencies (per 256 opcodes) Total sum of frequencies must be 256 */ -#define RANDOMX_FREQ_IADD_R 12 -#define RANDOMX_FREQ_IADD_M 7 -#define RANDOMX_FREQ_IADD_RC 16 -#define RANDOMX_FREQ_ISUB_R 12 -#define RANDOMX_FREQ_ISUB_M 7 -#define RANDOMX_FREQ_IMUL_9C 9 -#define RANDOMX_FREQ_IMUL_R 16 -#define RANDOMX_FREQ_IMUL_M 4 -#define RANDOMX_FREQ_IMULH_R 4 -#define RANDOMX_FREQ_IMULH_M 1 -#define RANDOMX_FREQ_ISMULH_R 4 -#define RANDOMX_FREQ_ISMULH_M 1 -#define RANDOMX_FREQ_IMUL_RCP 8 -#define RANDOMX_FREQ_INEG_R 2 -#define RANDOMX_FREQ_IXOR_R 16 -#define RANDOMX_FREQ_IXOR_M 4 -#define RANDOMX_FREQ_IROR_R 10 -#define RANDOMX_FREQ_IROL_R 0 -#define RANDOMX_FREQ_ISWAP_R 4 +#define RANDOMX_FREQ_IADD_R 12 +#define RANDOMX_FREQ_IADD_M 7 +#define RANDOMX_FREQ_IADD_RC 16 +#define RANDOMX_FREQ_ISUB_R 12 +#define RANDOMX_FREQ_ISUB_M 7 +#define RANDOMX_FREQ_IMUL_9C 9 +#define RANDOMX_FREQ_IMUL_R 16 +#define RANDOMX_FREQ_IMUL_M 4 +#define RANDOMX_FREQ_IMULH_R 4 +#define RANDOMX_FREQ_IMULH_M 1 +#define RANDOMX_FREQ_ISMULH_R 4 +#define RANDOMX_FREQ_ISMULH_M 1 +#define RANDOMX_FREQ_IMUL_RCP 8 +#define RANDOMX_FREQ_INEG_R 2 +#define RANDOMX_FREQ_IXOR_R 16 +#define RANDOMX_FREQ_IXOR_M 4 +#define RANDOMX_FREQ_IROR_R 10 +#define RANDOMX_FREQ_IROL_R 0 +#define RANDOMX_FREQ_ISWAP_R 4 -#define RANDOMX_FREQ_FSWAP_R 8 -#define RANDOMX_FREQ_FADD_R 20 -#define RANDOMX_FREQ_FADD_M 5 -#define RANDOMX_FREQ_FSUB_R 20 -#define RANDOMX_FREQ_FSUB_M 5 -#define RANDOMX_FREQ_FSCAL_R 6 -#define RANDOMX_FREQ_FMUL_R 20 -#define RANDOMX_FREQ_FDIV_M 4 -#define RANDOMX_FREQ_FSQRT_R 6 +#define RANDOMX_FREQ_FSWAP_R 8 +#define RANDOMX_FREQ_FADD_R 20 +#define RANDOMX_FREQ_FADD_M 5 +#define RANDOMX_FREQ_FSUB_R 20 +#define RANDOMX_FREQ_FSUB_M 5 +#define RANDOMX_FREQ_FSCAL_R 6 +#define RANDOMX_FREQ_FMUL_R 20 +#define RANDOMX_FREQ_FDIV_M 4 +#define RANDOMX_FREQ_FSQRT_R 6 -#define RANDOMX_FREQ_COND_R 7 -#define RANDOMX_FREQ_COND_M 1 -#define RANDOMX_FREQ_CFROUND 1 -#define RANDOMX_FREQ_ISTORE 16 +#define RANDOMX_FREQ_COND_R 7 +#define RANDOMX_FREQ_COND_M 1 +#define RANDOMX_FREQ_CFROUND 1 +#define RANDOMX_FREQ_ISTORE 16 -#define RANDOMX_FREQ_NOP 0 +#define RANDOMX_FREQ_NOP 0 +/* ------ + 256 +*/ diff --git a/src/dataset.cpp b/src/dataset.cpp index b99ce8e..098a23c 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -21,6 +21,7 @@ along with RandomX. If not, see. #include #include #include +#include #include "common.hpp" #include "dataset.hpp" @@ -39,7 +40,7 @@ along with RandomX. If not, see. namespace RandomX { - void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber) { + void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber) { uint64_t c0, c1, c2, c3, c4, c5, c6, c7; c0 = 4ULL * blockNumber; @@ -48,7 +49,15 @@ namespace RandomX { constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask; for (auto i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - const uint8_t* mixBlock = cache + (c0 & mask); + const uint8_t* mixBlock; + if (RANDOMX_ARGON_GROWTH == 0) { + constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1); + mixBlock = cache.memory + (c0 & mask) * CacheLineSize; + } + else { + const uint32_t modulus = cache.size / CacheLineSize; + mixBlock = cache.memory + (c0 % modulus) * CacheLineSize; + } PREFETCHNTA(mixBlock); c0 = squareHash(c0); c0 ^= load64(mixBlock + 0); @@ -72,11 +81,11 @@ namespace RandomX { } void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) { - uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset + memory.ma); + uint64_t* datasetLine = (uint64_t*)(memory.ds.dataset.memory + memory.ma); memory.mx ^= addr; memory.mx &= -64; //align to cache line std::swap(memory.mx, memory.ma); - PREFETCHNTA(memory.ds.dataset + memory.ma); + PREFETCHNTA(memory.ds.dataset.memory + memory.ma); for (int i = 0; i < RegistersCount; ++i) reg.r[i] ^= datasetLine[i]; } @@ -84,9 +93,9 @@ namespace RandomX { void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t (®)[RegistersCount]) { memory.mx ^= addr; memory.mx &= CacheLineAlignMask; //align to cache line - Cache* cache = memory.ds.cache; + Cache& cache = memory.ds.cache; uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; - initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize); + initBlock(cache, (uint8_t*)datasetLine, memory.ma / CacheLineSize); for (int i = 0; i < RegistersCount; ++i) reg[i] ^= datasetLine[i]; std::swap(memory.mx, memory.ma); @@ -103,28 +112,28 @@ namespace RandomX { aw->prepareBlock(memory.ma); } - void datasetAlloc(dataset_t& ds, uint64_t size, bool largePages) { - if (sizeof(size_t) <= 4) + void datasetAlloc(dataset_t& ds, bool largePages) { + if (std::numeric_limits::max() < RANDOMX_DATASET_SIZE) throw std::runtime_error("Platform doesn't support enough memory for the dataset"); if (largePages) { - ds.dataset = (uint8_t*)allocLargePagesMemory(size); + ds.dataset.memory = (uint8_t*)allocLargePagesMemory(ds.dataset.size); } else { - ds.dataset = (uint8_t*)_mm_malloc(size, 64); - if (ds.dataset == nullptr) { + ds.dataset.memory = (uint8_t*)_mm_malloc(ds.dataset.size, 64); + if (ds.dataset.memory == nullptr) { throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed."); } } } - void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) { - for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) { - initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i); + void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount) { + for (uint64_t i = startBlock; i < startBlock + blockCount; ++i) { + initBlock(cache, ds.memory + i * CacheLineSize, i); } } void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) { - ds.cache = new(Cache::alloc(largePages)) Cache(); - ds.cache->initialize(seed, SeedSize); + ds.cache.memory = allocCache(ds.cache.size, largePages); + argonFill(ds.cache, seed, SeedSize); } } diff --git a/src/dataset.hpp b/src/dataset.hpp index 6aba805..8ad47f7 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -25,14 +25,11 @@ along with RandomX. If not, see. namespace RandomX { - template - void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber); + void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber); - void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber); + void datasetAlloc(dataset_t& ds, bool largePages); - void datasetAlloc(dataset_t& ds, uint64_t size, bool largePages); - - void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount); + void datasetInit(Cache& cache, Dataset& ds, uint32_t startBlock, uint32_t blockCount); void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&); diff --git a/src/main.cpp b/src/main.cpp index f2df3fb..0d23841 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -203,7 +203,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi int main(int argc, char** argv) { bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative; - int programCount, threadCount, initThreadCount; + int programCount, threadCount, initThreadCount, epoch; readOption("--softAes", argc, argv, softAes); readOption("--genAsm", argc, argv, genAsm); @@ -212,6 +212,7 @@ int main(int argc, char** argv) { readIntOption("--threads", argc, argv, threadCount, 1); readIntOption("--nonces", argc, argv, programCount, 1000); readIntOption("--init", argc, argv, initThreadCount, 1); + readIntOption("--epoch", argc, argv, epoch, 0); readOption("--largePages", argc, argv, largePages); readOption("--async", argc, argv, async); readOption("--genNative", argc, argv, genNative); @@ -246,6 +247,9 @@ int main(int argc, char** argv) { std::vector vms; std::vector threads; RandomX::dataset_t dataset; + const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize; + const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch); + dataset.cache.size = cacheSize; std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl; @@ -259,33 +263,34 @@ int main(int argc, char** argv) { RandomX::datasetInitCache(seed, dataset, largePages); if (RandomX::trace) { std::cout << "Cache: " << std::endl; - outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i)); + outputHex(std::cout, (char*)dataset.cache.memory, sizeof(__m128i)); std::cout << std::endl; } if (!miningMode) { - std::cout << "Cache (256 MiB) initialized in " << sw.getElapsed() << " s" << std::endl; + std::cout << "Cache (" << cacheSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; } else { - RandomX::Cache* cache = dataset.cache; - RandomX::datasetAlloc(dataset, RANDOMX_DATASET_SIZE, largePages); - const uint64_t datasetBlockCount = RANDOMX_DATASET_SIZE / RandomX::CacheLineSize; + auto cache = dataset.cache; + dataset.dataset.size = datasetSize; + RandomX::datasetAlloc(dataset, largePages); + const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize; if (initThreadCount > 1) { auto perThread = datasetBlockCount / initThreadCount; auto remainder = datasetBlockCount % initThreadCount; for (int i = 0; i < initThreadCount; ++i) { auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); - threads.push_back(std::thread(&RandomX::datasetInit, cache, dataset, i * perThread, count)); + threads.push_back(std::thread(&RandomX::datasetInit, std::ref(cache), std::ref(dataset.dataset), i * perThread, count)); } for (unsigned i = 0; i < threads.size(); ++i) { threads[i].join(); } } else { - RandomX::datasetInit(cache, dataset, 0, datasetBlockCount); + RandomX::datasetInit(cache, dataset.dataset, 0, datasetBlockCount); } - RandomX::Cache::dealloc(cache, largePages); + RandomX::deallocCache(cache, largePages); threads.clear(); - std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl; + std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl; } std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl; for (int i = 0; i < threadCount; ++i) { @@ -296,7 +301,7 @@ int main(int argc, char** argv) { else { vm = new RandomX::InterpretedVirtualMachine(softAes, async); } - vm->setDataset(dataset); + vm->setDataset(dataset, datasetSize); vms.push_back(vm); } uint8_t* scratchpadMem; @@ -331,7 +336,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if(programCount == 1000) - std::cout << "Reference result: 128599cc10f9f6251e7917fa1d09ab2116ab4081bf1357149bd4054275dd8ee9" << std::endl; + std::cout << "Reference result: 9c28aa1b38c55233dfa8676838db77f2ed02415ea8f7052474ce8fcdee62dcc4" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; }