diff --git a/src/common.hpp b/src/common.hpp index 1cdc347..f7a6b1a 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -113,6 +113,10 @@ namespace randomx { typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]); typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); typedef void(*DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); + + typedef void(*DatasetDeallocFunc)(randomx_dataset*); + typedef void(*CacheDeallocFunc)(randomx_cache*); + typedef void(*CacheInitializeFunc)(randomx_cache*, const void*, size_t); } std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf); diff --git a/src/dataset.cpp b/src/dataset.cpp index 0e292b8..8321797 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -40,126 +40,84 @@ along with RandomX. If not, see. #include "argon2.h" #include "argon2_core.h" -randomx_dataset::~randomx_dataset() { - -} - static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE"); -void randomx_cache::initialize(const void *seed, size_t seedSize) { - uint32_t memory_blocks, segment_length; - argon2_instance_t instance; - argon2_context context; +namespace randomx { - context.out = nullptr; - context.outlen = 0; - context.pwd = CONST_CAST(uint8_t *)seed; - context.pwdlen = (uint32_t)seedSize; - context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; - context.saltlen = (uint32_t)randomx::ArgonSaltSize; - context.secret = NULL; - context.secretlen = 0; - context.ad = NULL; - context.adlen = 0; - context.t_cost = RANDOMX_ARGON_ITERATIONS; - context.m_cost = RANDOMX_ARGON_MEMORY; - context.lanes = RANDOMX_ARGON_LANES; - context.threads = 1; - context.allocate_cbk = NULL; - context.free_cbk = NULL; - context.flags = ARGON2_DEFAULT_FLAGS; - context.version = ARGON2_VERSION_NUMBER; + void initCache(randomx_cache* cache, const void* seed, size_t seedSize) { + uint32_t memory_blocks, segment_length; + argon2_instance_t instance; + argon2_context context; - /* 2. Align memory size */ - /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ - memory_blocks = context.m_cost; + context.out = nullptr; + context.outlen = 0; + context.pwd = CONST_CAST(uint8_t *)seed; + context.pwdlen = (uint32_t)seedSize; + context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; + context.saltlen = (uint32_t)randomx::ArgonSaltSize; + context.secret = NULL; + context.secretlen = 0; + context.ad = NULL; + context.adlen = 0; + context.t_cost = RANDOMX_ARGON_ITERATIONS; + context.m_cost = RANDOMX_ARGON_MEMORY; + context.lanes = RANDOMX_ARGON_LANES; + context.threads = 1; + context.allocate_cbk = NULL; + context.free_cbk = NULL; + context.flags = ARGON2_DEFAULT_FLAGS; + context.version = ARGON2_VERSION_NUMBER; - segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); + /* 2. Align memory size */ + /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ + memory_blocks = context.m_cost; - instance.version = context.version; - instance.memory = NULL; - instance.passes = context.t_cost; - instance.memory_blocks = memory_blocks; - instance.segment_length = segment_length; - instance.lane_length = segment_length * ARGON2_SYNC_POINTS; - instance.lanes = context.lanes; - instance.threads = context.threads; - instance.type = Argon2_d; - instance.memory = (block*)memory; + segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); - if (instance.threads > instance.lanes) { - instance.threads = instance.lanes; - } + instance.version = context.version; + instance.memory = NULL; + instance.passes = context.t_cost; + instance.memory_blocks = memory_blocks; + instance.segment_length = segment_length; + instance.lane_length = segment_length * ARGON2_SYNC_POINTS; + instance.lanes = context.lanes; + instance.threads = context.threads; + instance.type = Argon2_d; + instance.memory = (block*)cache->memory; - /* 3. Initialization: Hashing inputs, allocating memory, filling first - * blocks - */ - argon_initialize(&instance, &context); + if (instance.threads > instance.lanes) { + instance.threads = instance.lanes; + } - fill_memory_blocks(&instance); + /* 3. Initialization: Hashing inputs, allocating memory, filling first + * blocks + */ + argon_initialize(&instance, &context); - reciprocalCache.clear(); - randomx::Blake2Generator gen(seed, seedSize); - for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - randomx::generateSuperscalar(programs[i], gen); - for (unsigned j = 0; j < programs[i].getSize(); ++j) { - auto& instr = programs[i](j); - if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) { - auto rcp = randomx_reciprocal(instr.getImm32()); - instr.setImm32(reciprocalCache.size()); - reciprocalCache.push_back(rcp); + fill_memory_blocks(&instance); + + cache->reciprocalCache.clear(); + randomx::Blake2Generator gen(seed, seedSize); + for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { + randomx::generateSuperscalar(cache->programs[i], gen); + for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) { + auto& instr = cache->programs[i](j); + if (instr.opcode == randomx::SuperscalarInstructionType::IMUL_RCP) { + auto rcp = randomx_reciprocal(instr.getImm32()); + instr.setImm32(cache->reciprocalCache.size()); + cache->reciprocalCache.push_back(rcp); + } } } } -} -namespace randomx { - - template - void Dataset::allocate() { - memory = (uint8_t*)Allocator::allocMemory(DatasetSize); + void initCacheCompile(randomx_cache* cache, const void* seed, size_t seedSize) { + initCache(cache, seed, seedSize); + cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache); + cache->jit->generateDatasetInitCode(); } - template - Dataset::~Dataset() { - Allocator::freeMemory(memory, DatasetSize); - } - - template - void Cache::allocate() { - memory = (uint8_t*)Allocator::allocMemory(CacheSize); - } - - template - Cache::~Cache() { - Allocator::freeMemory(memory, CacheSize); - } - - template - DatasetInitFunc Cache::getInitFunc() { - return &initDataset; - } - - template - DatasetInitFunc CacheWithJit::getInitFunc() { - return jit.getDatasetInitFunc(); - } - - template - void CacheWithJit::initialize(const void *seed, size_t seedSize) { - randomx_cache::initialize(seed, seedSize); - jit.generateSuperscalarHash(programs, reciprocalCache); - jit.generateDatasetInitCode(); - } - - template class Dataset>; - template class Dataset; - template class Cache>; - template class Cache; - template class CacheWithJit>; - template class CacheWithJit; - constexpr uint64_t superscalarMul0 = 6364136223846793005ULL; constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL; constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL; diff --git a/src/dataset.hpp b/src/dataset.hpp index a3c325a..4e072ff 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -21,6 +21,7 @@ along with RandomX. If not, see. #include #include +#include #include "common.hpp" #include "superscalar_program.hpp" #include "jit_compiler_x86.hpp" @@ -28,51 +29,45 @@ along with RandomX. If not, see. /* Global scope for C binding */ struct randomx_dataset { - virtual ~randomx_dataset() = 0; - virtual void allocate() = 0; uint8_t* memory = nullptr; + randomx::DatasetDeallocFunc dealloc; }; /* Global scope for C binding */ -struct randomx_cache : public randomx_dataset { - virtual randomx::DatasetInitFunc getInitFunc() = 0; - virtual void initialize(const void *seed, size_t seedSize); +struct randomx_cache { + uint8_t* memory = nullptr; + randomx::CacheDeallocFunc dealloc; + randomx::JitCompilerX86* jit; + randomx::CacheInitializeFunc initialize; + randomx::DatasetInitFunc datasetInit; randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; std::vector reciprocalCache; }; +//A pointer to a standard-layout struct object points to its initial member +static_assert(std::is_standard_layout(), "randomx_dataset must be a standard-layout struct"); +static_assert(std::is_standard_layout(), "randomx_cache must be a standard-layout struct"); + namespace randomx { - template - struct Dataset : public randomx_dataset { - ~Dataset() override; - void allocate() override; - }; - - using DatasetDefault = Dataset>; - using DatasetLargePage = Dataset; + using DefaultAllocator = AlignedAllocator; template - struct Cache : public randomx_cache { - ~Cache() override; - void allocate() override; - DatasetInitFunc getInitFunc() override; - }; + void deallocDataset(randomx_dataset* dataset) { + if (dataset->memory != nullptr) + Allocator::freeMemory(dataset->memory, DatasetSize); + } template - struct CacheWithJit : public Cache { - using Cache::programs; - using Cache::reciprocalCache; - void initialize(const void *seed, size_t seedSize) override; - DatasetInitFunc getInitFunc() override; - JitCompilerX86 jit; - }; - - using CacheDefault = Cache>; - using CacheWithJitDefault = CacheWithJit>; - using CacheLargePage = Cache; - using CacheWithJitLargePage = CacheWithJit; + void deallocCache(randomx_cache* cache) { + if(cache->memory != nullptr) + Allocator::freeMemory(cache->memory, CacheSize); + if (cache->jit != nullptr) + delete cache->jit; + } + void initCache(randomx_cache*, const void*, size_t); + void initCacheCompile(randomx_cache*, const void*, size_t); void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber); void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock); } diff --git a/src/instruction.hpp b/src/instruction.hpp index 966a389..0dc382f 100644 --- a/src/instruction.hpp +++ b/src/instruction.hpp @@ -21,6 +21,7 @@ along with RandomX. If not, see. #include #include +#include #include "blake2/endian.h" namespace randomx { @@ -93,10 +94,9 @@ namespace randomx { uint8_t opcode; uint8_t dst; uint8_t src; - private: uint8_t mod; uint32_t imm32; - + private: void print(std::ostream&) const; static const char* names[256]; static InstructionFormatter engine[256]; @@ -136,5 +136,5 @@ namespace randomx { }; static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction"); - + static_assert(std::is_standard_layout(), "randomx::Instruction must be a standard-layout struct"); } \ No newline at end of file diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S index bd3ff1e..3b8e82e 100644 --- a/src/jit_compiler_x86_static.S +++ b/src/jit_compiler_x86_static.S @@ -89,7 +89,7 @@ DECL(randomx_dataset_init): push r13 push r14 push r15 - mov rdi, qword ptr [rdi+8] ;# after virtual method table pointer + mov rdi, qword ptr [rdi] ;# cache->memory ;# dataset in rsi mov rbp, rdx ;# block index push rcx ;# max. block index diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm index de4dbb8..3153a8f 100644 --- a/src/jit_compiler_x86_static.asm +++ b/src/jit_compiler_x86_static.asm @@ -92,7 +92,7 @@ randomx_dataset_init PROC push r13 push r14 push r15 - mov rdi, qword ptr [rcx+8] ;# after virtual method table pointer + mov rdi, qword ptr [rcx] ;# cache->memory mov rsi, rdx ;# dataset mov rbp, r8 ;# block index push r9 ;# max. block index diff --git a/src/randomx.cpp b/src/randomx.cpp index e126687..a80d727 100644 --- a/src/randomx.cpp +++ b/src/randomx.cpp @@ -28,34 +28,48 @@ along with RandomX. If not, see. extern "C" { randomx_cache *randomx_alloc_cache(randomx_flags flags) { - randomx_cache *cache = nullptr; + randomx_cache *cache = new randomx_cache(); try { switch (flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES)) { case RANDOMX_FLAG_DEFAULT: - cache = new randomx::CacheDefault(); + cache->dealloc = &randomx::deallocCache; + cache->jit = nullptr; + cache->initialize = &randomx::initCache; + cache->datasetInit = &randomx::initDataset; + cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); break; case RANDOMX_FLAG_JIT: - cache = new randomx::CacheWithJitDefault(); + cache->dealloc = &randomx::deallocCache; + cache->jit = new randomx::JitCompilerX86(); + cache->initialize = &randomx::initCacheCompile; + cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); break; case RANDOMX_FLAG_LARGE_PAGES: - cache = new randomx::CacheLargePage(); + cache->dealloc = &randomx::deallocCache; + cache->jit = nullptr; + cache->initialize = &randomx::initCache; + cache->datasetInit = &randomx::initDataset; + cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize); break; case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES: - cache = new randomx::CacheWithJitLargePage(); + cache->dealloc = &randomx::deallocCache; + cache->jit = new randomx::JitCompilerX86(); + cache->initialize = &randomx::initCacheCompile; + cache->datasetInit = cache->jit->getDatasetInitFunc(); + cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize); break; default: UNREACHABLE; } - - cache->allocate(); } catch (std::exception &ex) { - delete cache; + randomx_release_cache(cache); cache = nullptr; } @@ -63,27 +77,29 @@ extern "C" { } void randomx_init_cache(randomx_cache *cache, const void *seed, size_t seedSize) { - cache->initialize(seed, seedSize); + cache->initialize(cache, seed, seedSize); } void randomx_release_cache(randomx_cache* cache) { + cache->dealloc(cache); delete cache; } randomx_dataset *randomx_alloc_dataset(randomx_flags flags) { - randomx_dataset *dataset = nullptr; + randomx_dataset *dataset = new randomx_dataset(); try { if (flags & RANDOMX_FLAG_LARGE_PAGES) { - dataset = new randomx::DatasetLargePage(); + dataset->dealloc = &randomx::deallocDataset; + dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize); } else { - dataset = new randomx::DatasetDefault(); + dataset->dealloc = &randomx::deallocDataset; + dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize); } - dataset->allocate(); } catch (std::exception &ex) { - delete dataset; + randomx_release_dataset(dataset); dataset = nullptr; } @@ -95,8 +111,7 @@ extern "C" { } void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) { - randomx::DatasetInitFunc dsfunc = cache->getInitFunc(); - dsfunc(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount); + cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount); } void *randomx_get_dataset_memory(randomx_dataset *dataset) { @@ -104,6 +119,7 @@ extern "C" { } void randomx_release_dataset(randomx_dataset *dataset) { + dataset->dealloc(dataset); delete dataset; } diff --git a/src/superscalar_program.hpp b/src/superscalar_program.hpp index 5d83a39..4f4e88f 100644 --- a/src/superscalar_program.hpp +++ b/src/superscalar_program.hpp @@ -46,6 +46,10 @@ namespace randomx { void setAddressRegister(uint32_t val) { addrReg = val; } + + Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE]; + uint32_t size; + int addrReg; double ipc; int codeSize; int macroOps; @@ -62,9 +66,6 @@ namespace randomx { os << instr; } } - Instruction programBuffer[RANDOMX_SUPERSCALAR_MAX_SIZE]; - uint32_t size; - int addrReg; }; } \ No newline at end of file diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 3ea5525..e5014db 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -165,15 +165,13 @@ int main(int argc, char** argv) { Stopwatch sw(true); cache = randomx_alloc_cache(flags); if (cache == nullptr) { - std::cout << "ERROR: Cache allocation failed" << std::endl; - return 1; + throw std::runtime_error("Cache allocation failed"); } randomx_init_cache(cache, &seed, sizeof(seed)); if (miningMode) { dataset = randomx_alloc_dataset(flags); if (dataset == nullptr) { - std::cout << "ERROR: Dataset allocation failed" << std::endl; - return 1; + throw std::runtime_error("Dataset allocation failed"); } uint32_t datasetItemCount = randomx_dataset_item_count(); if (initThreadCount > 1) { @@ -200,8 +198,7 @@ int main(int argc, char** argv) { for (int i = 0; i < threadCount; ++i) { randomx_vm *vm = randomx_create_vm(flags, cache, dataset); if (vm == nullptr) { - std::cout << "ERROR: Unsupported virtual machine options" << std::endl; - return 1; + throw std::runtime_error("Unsupported virtual machine options"); } vms.push_back(vm); } @@ -221,7 +218,14 @@ int main(int argc, char** argv) { else { mine(vms[0], std::ref(atomicNonce), std::ref(result), noncesCount, 0); } + double elapsed = sw.getElapsed(); + for (unsigned i = 0; i < vms.size(); ++i) + randomx_destroy_vm(vms[i]); + if (miningMode) + randomx_release_dataset(dataset); + else + randomx_release_cache(cache); std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) diff --git a/src/virtual_machine.cpp b/src/virtual_machine.cpp index 3881685..e97fad7 100644 --- a/src/virtual_machine.cpp +++ b/src/virtual_machine.cpp @@ -103,7 +103,7 @@ namespace randomx { template void VmBase::allocate() { - if (mem.memory == nullptr) + if (datasetPtr == nullptr) throw std::invalid_argument("Cache/Dataset not set"); if (!softAes) { //if hardware AES is not supported, it's better to fail now than to return a ticking bomb __m128i tmp = _mm_load_si128((const __m128i*)&aesDummy); diff --git a/src/virtual_machine.hpp b/src/virtual_machine.hpp index 69910b7..40e48e3 100644 --- a/src/virtual_machine.hpp +++ b/src/virtual_machine.hpp @@ -44,7 +44,10 @@ protected: alignas(16) randomx::ProgramConfiguration config; randomx::MemoryRegisters mem; uint8_t* scratchpad; - uint8_t* datasetBasePtr; + union { + randomx_cache* cachePtr = nullptr; + randomx_dataset* datasetPtr; + }; uint32_t datasetOffset; }; diff --git a/src/vm_compiled.cpp b/src/vm_compiled.cpp index 25232cb..a58901c 100644 --- a/src/vm_compiled.cpp +++ b/src/vm_compiled.cpp @@ -27,8 +27,7 @@ namespace randomx { template void CompiledVm::setDataset(randomx_dataset* dataset) { - mem.memory = dataset->memory; - datasetBasePtr = dataset->memory; + datasetPtr = dataset; } template @@ -36,7 +35,7 @@ namespace randomx { VmBase::generateProgram(seed); randomx_vm::initialize(); compiler.generateProgram(program, config); - mem.memory = datasetBasePtr + datasetOffset; + mem.memory = datasetPtr->memory + datasetOffset; execute(); } diff --git a/src/vm_compiled.hpp b/src/vm_compiled.hpp index f39b90a..e780d60 100644 --- a/src/vm_compiled.hpp +++ b/src/vm_compiled.hpp @@ -48,7 +48,7 @@ namespace randomx { using VmBase::config; using VmBase::reg; using VmBase::scratchpad; - using VmBase::datasetBasePtr; + using VmBase::datasetPtr; using VmBase::datasetOffset; protected: void execute(); diff --git a/src/vm_compiled_light.cpp b/src/vm_compiled_light.cpp index 2cec985..bf4d46a 100644 --- a/src/vm_compiled_light.cpp +++ b/src/vm_compiled_light.cpp @@ -25,6 +25,7 @@ namespace randomx { template void CompiledLightVm::setCache(randomx_cache* cache) { + cachePtr = cache; mem.memory = cache->memory; compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache); } diff --git a/src/vm_compiled_light.hpp b/src/vm_compiled_light.hpp index 1ac3629..eaf23d2 100644 --- a/src/vm_compiled_light.hpp +++ b/src/vm_compiled_light.hpp @@ -44,6 +44,7 @@ namespace randomx { using CompiledVm::compiler; using CompiledVm::program; using CompiledVm::config; + using CompiledVm::cachePtr; using CompiledVm::datasetOffset; }; diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index b508f3d..a5bba0f 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -45,6 +45,7 @@ namespace randomx { template void InterpretedVm::setDataset(randomx_dataset* dataset) { + datasetPtr = dataset; mem.memory = dataset->memory; } diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index 4b08b18..8a15785 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -57,7 +57,7 @@ namespace randomx { using VmBase::program; using VmBase::config; using VmBase::reg; - using VmBase::datasetBasePtr; + using VmBase::datasetPtr; using VmBase::datasetOffset; void* operator new(size_t size) { void* ptr = AlignedAllocator::allocMemory(size); diff --git a/src/vm_interpreted_light.cpp b/src/vm_interpreted_light.cpp index 06757d5..f15aac9 100644 --- a/src/vm_interpreted_light.cpp +++ b/src/vm_interpreted_light.cpp @@ -24,8 +24,8 @@ namespace randomx { template void InterpretedLightVm::setCache(randomx_cache* cache) { - mem.memory = cache->memory; cachePtr = cache; + mem.memory = cache->memory; } template diff --git a/src/vm_interpreted_light.hpp b/src/vm_interpreted_light.hpp index 9e68fee..735f48c 100644 --- a/src/vm_interpreted_light.hpp +++ b/src/vm_interpreted_light.hpp @@ -28,6 +28,7 @@ namespace randomx { class InterpretedLightVm : public InterpretedVm { public: using VmBase::mem; + using VmBase::cachePtr; void* operator new(size_t size) { void* ptr = AlignedAllocator::allocMemory(size); if (ptr == nullptr) @@ -41,8 +42,6 @@ namespace randomx { void setCache(randomx_cache* cache) override; protected: void datasetRead(uint32_t address, int_reg_t(&r)[8]) override; - private: - randomx_cache* cachePtr; }; using InterpretedLightVmDefault = InterpretedLightVm, true>;