From e65d9da66c35ee7b1e963a3391275e812bbf3412 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 8 Mar 2019 15:34:34 +0100 Subject: [PATCH] Configurable parameters separated into configuration.h --- src/AssemblyGeneratorX86.cpp | 7 +-- src/Cache.cpp | 10 +-- src/CompiledVirtualMachine.cpp | 5 +- src/CompiledVirtualMachine.hpp | 4 ++ src/Instruction.cpp | 9 --- src/InterpretedVirtualMachine.cpp | 25 ++------ src/InterpretedVirtualMachine.hpp | 2 +- src/JitCompilerX86.cpp | 6 +- src/Program.cpp | 2 +- src/Program.hpp | 2 +- src/common.hpp | 71 ++++++++++----------- src/configuration.h | 101 ++++++++++++++++++++++++++++++ src/dataset.cpp | 8 +-- src/dataset.hpp | 2 +- src/instructionWeights.hpp | 63 +------------------ src/main.cpp | 35 ++++++----- 16 files changed, 182 insertions(+), 170 deletions(-) create mode 100644 src/configuration.h diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index efc1d77..5c91f14 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -47,7 +47,7 @@ namespace RandomX { void AssemblyGeneratorX86::generateProgram(Program& prog) { asmCode.str(std::string()); //clear - for (unsigned i = 0; i < ProgramLength; ++i) { + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; @@ -491,7 +491,6 @@ namespace RandomX { INST_HANDLE(ISMULH_R) INST_HANDLE(ISMULH_M) INST_HANDLE(IMUL_RCP) - INST_HANDLE(ISDIV_C) INST_HANDLE(INEG_R) INST_HANDLE(IXOR_R) INST_HANDLE(IXOR_M) @@ -511,8 +510,6 @@ namespace RandomX { //Floating point group E INST_HANDLE(FMUL_R) - INST_HANDLE(FMUL_M) - INST_HANDLE(FDIV_R) INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) @@ -520,9 +517,7 @@ namespace RandomX { INST_HANDLE(COND_R) INST_HANDLE(COND_M) INST_HANDLE(CFROUND) - INST_HANDLE(ISTORE) - INST_HANDLE(FSTORE) INST_HANDLE(NOP) }; diff --git a/src/Cache.cpp b/src/Cache.cpp index 60b7755..dece1e4 100644 --- a/src/Cache.cpp +++ b/src/Cache.cpp @@ -24,7 +24,7 @@ along with RandomX. If not, see. namespace RandomX { - static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value"); + static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value"); void Cache::argonFill(const void* seed, size_t seedSize) { uint32_t memory_blocks, segment_length; @@ -35,15 +35,15 @@ namespace RandomX { context.outlen = 0; context.pwd = CONST_CAST(uint8_t *)seed; context.pwdlen = (uint32_t)seedSize; - context.salt = CONST_CAST(uint8_t *)ArgonSalt; + context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT; context.saltlen = (uint32_t)ArgonSaltSize; context.secret = NULL; context.secretlen = 0; context.ad = NULL; context.adlen = 0; - context.t_cost = ArgonIterations; - context.m_cost = ArgonMemorySize; - context.lanes = ArgonLanes; + context.t_cost = RANDOMX_ARGON_ITERATIONS; + context.m_cost = RANDOMX_ARGON_MEMORY; + context.lanes = RANDOMX_ARGON_LANES; context.threads = 1; context.allocate_cbk = NULL; context.free_cbk = NULL; diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index 7abf422..e6b9316 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -23,6 +23,9 @@ along with RandomX. If not, see. namespace RandomX { + static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); + static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct RandomX::RegisterFile"); + CompiledVirtualMachine::CompiledVirtualMachine() { totalSize = 0; } @@ -39,7 +42,7 @@ namespace RandomX { void CompiledVirtualMachine::execute() { //executeProgram(reg, mem, scratchpad, InstructionCount); //totalSize += compiler.getCodeSize(); - compiler.getProgramFunc()(reg, mem, scratchpad, InstructionCount); + compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS); #ifdef TRACEVM for (int32_t i = InstructionCount - 1; i >= 0; --i) { std::cout << std::hex << tracepad[i].u64 << std::endl; diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index e3b6bf0..3837589 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -26,6 +26,10 @@ along with RandomX. If not, see. namespace RandomX { + extern "C" { + void executeProgram(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); + } + class CompiledVirtualMachine : public VirtualMachine { public: void* operator new(size_t size) { diff --git a/src/Instruction.cpp b/src/Instruction.cpp index 205aaaa..833a291 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -346,12 +346,10 @@ namespace RandomX { INST_NAME(ISMULH_R) INST_NAME(ISMULH_M) INST_NAME(IMUL_RCP) - INST_NAME(ISDIV_C) INST_NAME(INEG_R) INST_NAME(IXOR_R) INST_NAME(IXOR_M) INST_NAME(IROR_R) - INST_NAME(IROL_R) INST_NAME(ISWAP_R) //Common floating point @@ -366,8 +364,6 @@ namespace RandomX { //Floating point group E INST_NAME(FMUL_R) - INST_NAME(FMUL_M) - INST_NAME(FDIV_R) INST_NAME(FDIV_M) INST_NAME(FSQRT_R) @@ -377,7 +373,6 @@ namespace RandomX { INST_NAME(CFROUND) INST_NAME(ISTORE) - INST_NAME(FSTORE) INST_NAME(NOP) }; @@ -397,7 +392,6 @@ namespace RandomX { INST_HANDLE(ISMULH_R) INST_HANDLE(ISMULH_M) INST_HANDLE(IMUL_RCP) - INST_HANDLE(ISDIV_C) INST_HANDLE(INEG_R) INST_HANDLE(IXOR_R) INST_HANDLE(IXOR_M) @@ -417,8 +411,6 @@ namespace RandomX { //Floating point group E INST_HANDLE(FMUL_R) - INST_HANDLE(FMUL_M) - INST_HANDLE(FDIV_R) INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) @@ -428,7 +420,6 @@ namespace RandomX { INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) - INST_HANDLE(FSTORE) INST_HANDLE(NOP) }; diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 08d4536..2dd7c8e 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -68,7 +68,7 @@ namespace RandomX { void InterpretedVirtualMachine::initialize() { VirtualMachine::initialize(); - for (unsigned i = 0; i < ProgramLength; ++i) { + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { program(i).src %= RegistersCount; program(i).dst %= RegistersCount; } @@ -81,7 +81,7 @@ namespace RandomX { } template<> - void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { + void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { } static void print(int_reg_t r) { @@ -299,7 +299,7 @@ namespace RandomX { printState(r, f, e, a); } - for(unsigned iter = 0; iter < InstructionCount; ++iter) { + for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { //std::cout << "Iteration " << iter << std::endl; uint64_t spMix = r[readReg0] ^ r[readReg1]; spAddr0 ^= spMix; @@ -326,7 +326,7 @@ namespace RandomX { e[3] = ieee_set_exponent<-240>(load_cvt_i32x2(scratchpad + spAddr1 + 56)); if (trace) { - std::cout << "iteration " << std::dec << iter << std::endl; + std::cout << "iteration " << std::dec << ic << std::endl; std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; printState(r, f, e, a); @@ -357,7 +357,7 @@ namespace RandomX { } if (trace) { - std::cout << "iteration " << std::dec << iter << std::endl; + std::cout << "iteration " << std::dec << ic << std::endl; std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl; std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl; printState(r, f, e, a); @@ -421,7 +421,7 @@ namespace RandomX { #include "instructionWeights.hpp" void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { - for (unsigned i = 0; i < ProgramLength; ++i) { + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { auto& instr = program(i); auto& ibc = byteCode[i]; switch (instr.opcode) { @@ -593,10 +593,6 @@ namespace RandomX { } } break; - CASE_REP(ISDIV_C) { - ibc.type = InstructionType::NOP; - } break; - CASE_REP(INEG_R) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::INEG_R; @@ -731,12 +727,6 @@ namespace RandomX { ibc.fsrc = &a[src]; } break; - CASE_REP(FMUL_M) { - } break; - - CASE_REP(FDIV_R) { - } break; - CASE_REP(FDIV_M) { auto dst = instr.dst % 4; auto src = instr.src % 8; @@ -789,9 +779,6 @@ namespace RandomX { ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); } break; - CASE_REP(FSTORE) { - } break; - CASE_REP(NOP) { ibc.type = InstructionType::NOP; } break; diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index f29c20d..7ade73f 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -79,7 +79,7 @@ namespace RandomX { static InstructionHandler engine[256]; DatasetReadFunc readDataset; bool softAes, asyncWorker; - InstructionByteCode byteCode[ProgramLength]; + InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; #ifdef STATS int count_ADD_64 = 0; diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index f26d33a..4e27cd1 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -195,7 +195,7 @@ namespace RandomX { emitByte(0xc0 + readReg1); memcpy(code + codePos, codeLoopLoad, loopLoadSize); codePos += loopLoadSize; - for (unsigned i = 0; i < ProgramLength; ++i) { + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; @@ -658,7 +658,6 @@ namespace RandomX { INST_HANDLE(ISMULH_R) INST_HANDLE(ISMULH_M) INST_HANDLE(IMUL_RCP) - INST_HANDLE(ISDIV_C) INST_HANDLE(INEG_R) INST_HANDLE(IXOR_R) INST_HANDLE(IXOR_M) @@ -672,15 +671,12 @@ namespace RandomX { INST_HANDLE(FSUB_M) INST_HANDLE(FSCAL_R) INST_HANDLE(FMUL_R) - INST_HANDLE(FMUL_M) - INST_HANDLE(FDIV_R) INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) INST_HANDLE(COND_R) INST_HANDLE(COND_M) INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) - INST_HANDLE(FSTORE) INST_HANDLE(NOP) }; diff --git a/src/Program.cpp b/src/Program.cpp index bb4e086..ebd271d 100644 --- a/src/Program.cpp +++ b/src/Program.cpp @@ -22,7 +22,7 @@ along with RandomX. If not, see. namespace RandomX { void Program::print(std::ostream& os) const { - for (int i = 0; i < RandomX::ProgramLength; ++i) { + for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { auto instr = programBuffer[i]; os << instr; } diff --git a/src/Program.hpp b/src/Program.hpp index 1f695a0..47a2fc5 100644 --- a/src/Program.hpp +++ b/src/Program.hpp @@ -41,7 +41,7 @@ namespace RandomX { private: void print(std::ostream&) const; uint64_t entropyBuffer[16]; - Instruction programBuffer[ProgramLength]; + Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; }; static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program"); diff --git a/src/common.hpp b/src/common.hpp index 5642535..3b2a3dc 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -22,30 +22,43 @@ along with RandomX. If not, see. #include #include #include "blake2/endian.h" +#include "configuration.h" namespace RandomX { + static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2."); + static_assert((RANDOMX_DATASET_SIZE & (RANDOMX_DATASET_SIZE - 1)) == 0, "RANDOMX_DATASET_SIZE must be a power of 2."); + static_assert(RANDOMX_DATASET_SIZE <= 4294967296ULL, "RANDOMX_DATASET_SIZE must not exceed 4294967296."); + static_assert(RANDOMX_DS_GROWTH_RATE % 64 == 0, "RANDOMX_DS_GROWTH_RATE must be divisible by 64."); + static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); + static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); + static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); + static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2."); + static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2."); + static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2."); + static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1."); + static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); + static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); + + constexpr int wtSum = RANDOMX_FREQ_IADD_R + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_IADD_RC + RANDOMX_FREQ_ISUB_R + \ + RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_9C + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ + RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \ + RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \ + RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \ + RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_COND_R + \ + RANDOMX_FREQ_COND_M + RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; + + static_assert(wtSum == 256, "Sum of instruction frequencies must be 256."); + using addr_t = uint32_t; constexpr int SeedSize = 32; constexpr int ResultSize = 64; - - constexpr int ArgonIterations = 3; - constexpr uint32_t ArgonMemorySize = 262144; //KiB - constexpr int ArgonLanes = 1; - const char ArgonSalt[] = "Monero\x1A$"; - constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1; - + constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1; constexpr int CacheLineSize = 64; - - constexpr uint64_t DatasetSize = 4ULL * 1024 * 1024 * 1024; //4 GiB - constexpr uint32_t CacheLineAlignMask = (DatasetSize - 1) & ~(CacheLineSize - 1); - constexpr uint32_t CacheSize = ArgonMemorySize * 1024; + constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_SIZE - 1) & ~(CacheLineSize - 1); + constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * 1024; constexpr int CacheBlockCount = CacheSize / CacheLineSize; - constexpr int DatasetExpansionRatio = DatasetSize / CacheSize; - constexpr int DatasetBlockCount = DatasetExpansionRatio * CacheBlockCount; - constexpr int DatasetIterations = DatasetExpansionRatio; - #ifdef TRACE constexpr bool trace = true; @@ -70,29 +83,19 @@ namespace RandomX { double hi; }; - constexpr int ProgramLength = 256; - constexpr uint32_t InstructionCount = 2048; - constexpr int ChainLength = 8; - constexpr uint32_t ScratchpadSize = 2 * 1024 * 1024; - constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(int_reg_t); - constexpr uint32_t ScratchpadL1 = ScratchpadSize / 128 / sizeof(int_reg_t); - constexpr uint32_t ScratchpadL2 = ScratchpadSize / 8 / sizeof(int_reg_t); - constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(int_reg_t); + constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t); + constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t); + constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t); constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8; constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8; constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16; constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16; - constexpr int ScratchpadL3Mask = (ScratchpadLength - 1) * 8; - constexpr int ScratchpadL3Mask64 = (ScratchpadLength / 8 - 1) * 64; - constexpr uint32_t TransformationCount = 90; + constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; + constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; constexpr int RegistersCount = 8; class Cache; - inline int wrapInstr(int i) { - return i % RandomX::ProgramLength; - } - class ILightClientAsyncWorker { public: virtual ~ILightClientAsyncWorker() {} @@ -120,8 +123,6 @@ namespace RandomX { dataset_t ds; }; - static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); - struct RegisterFile { int_reg_t r[RegistersCount]; fpu_reg_t f[RegistersCount / 2]; @@ -129,15 +130,9 @@ namespace RandomX { fpu_reg_t a[RegistersCount / 2]; }; - static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct RandomX::RegisterFile"); - typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]); typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); - - extern "C" { - void executeProgram(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); - } } std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf); diff --git a/src/configuration.h b/src/configuration.h new file mode 100644 index 0000000..72c4f5e --- /dev/null +++ b/src/configuration.h @@ -0,0 +1,101 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once + +//Cache size in KiB. Must be a power of 2. +#define RANDOMX_ARGON_MEMORY (256 * 1024) + +//Number of Argon2d iterations for Cache initialization +#define RANDOMX_ARGON_ITERATIONS 3 + +//Number of parallel lanes for Cache initialization +#define RANDOMX_ARGON_LANES 1 + +//Argon2d salt +#define RANDOMX_ARGON_SALT "RandomX\x03" + +//Number of random Cache accesses per Dataset block. Minimum is 2. +#define RANDOMX_CACHE_ACCESSES 16 + +//Dataset size in bytes. Must be a power of 2. +#define RANDOMX_DATASET_SIZE (4ULL * 1024 * 1024 * 1024) + +//Dataset growth per epoch in bytes. Must be divisible by 64. +#define RANDOMX_DS_GROWTH_RATE (2 * 1024 * 1024) + +//Number of instructions in a RandomX program +#define RANDOMX_PROGRAM_SIZE 256 + +//Number of iterations during VM execution +#define RANDOMX_PROGRAM_ITERATIONS 2048 + +//Number of chained VM executions per hash +#define RANDOMX_PROGRAM_COUNT 8 + +//Scratchpad L3 size in bytes. Must be a power of 2. +#define RANDOMX_SCRATCHPAD_L3 (2 * 1024 * 1024) + +//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3. +#define RANDOMX_SCRATCHPAD_L2 (256 * 1024) + +//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2. +#define RANDOMX_SCRATCHPAD_L1 (16 * 1024) + +/* +Instruction frequencies (per 256 opcodes) +Total sum of frequencies must be 256 +*/ + +#define RANDOMX_FREQ_IADD_R 12 +#define RANDOMX_FREQ_IADD_M 7 +#define RANDOMX_FREQ_IADD_RC 16 +#define RANDOMX_FREQ_ISUB_R 12 +#define RANDOMX_FREQ_ISUB_M 7 +#define RANDOMX_FREQ_IMUL_9C 9 +#define RANDOMX_FREQ_IMUL_R 16 +#define RANDOMX_FREQ_IMUL_M 4 +#define RANDOMX_FREQ_IMULH_R 4 +#define RANDOMX_FREQ_IMULH_M 1 +#define RANDOMX_FREQ_ISMULH_R 4 +#define RANDOMX_FREQ_ISMULH_M 1 +#define RANDOMX_FREQ_IMUL_RCP 8 +#define RANDOMX_FREQ_INEG_R 2 +#define RANDOMX_FREQ_IXOR_R 16 +#define RANDOMX_FREQ_IXOR_M 4 +#define RANDOMX_FREQ_IROR_R 10 +#define RANDOMX_FREQ_IROL_R 0 +#define RANDOMX_FREQ_ISWAP_R 4 + +#define RANDOMX_FREQ_FSWAP_R 8 +#define RANDOMX_FREQ_FADD_R 20 +#define RANDOMX_FREQ_FADD_M 5 +#define RANDOMX_FREQ_FSUB_R 20 +#define RANDOMX_FREQ_FSUB_M 5 +#define RANDOMX_FREQ_FSCAL_R 6 +#define RANDOMX_FREQ_FMUL_R 20 +#define RANDOMX_FREQ_FDIV_M 4 +#define RANDOMX_FREQ_FSQRT_R 6 + +#define RANDOMX_FREQ_COND_R 7 +#define RANDOMX_FREQ_COND_M 1 +#define RANDOMX_FREQ_CFROUND 1 +#define RANDOMX_FREQ_ISTORE 16 + +#define RANDOMX_FREQ_NOP 0 diff --git a/src/dataset.cpp b/src/dataset.cpp index a5132fd..b99ce8e 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -47,7 +47,7 @@ namespace RandomX { constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask; - for (auto i = 0; i < DatasetIterations; ++i) { + for (auto i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { const uint8_t* mixBlock = cache + (c0 & mask); PREFETCHNTA(mixBlock); c0 = squareHash(c0); @@ -103,14 +103,14 @@ namespace RandomX { aw->prepareBlock(memory.ma); } - void datasetAlloc(dataset_t& ds, bool largePages) { + void datasetAlloc(dataset_t& ds, uint64_t size, bool largePages) { if (sizeof(size_t) <= 4) throw std::runtime_error("Platform doesn't support enough memory for the dataset"); if (largePages) { - ds.dataset = (uint8_t*)allocLargePagesMemory(DatasetSize); + ds.dataset = (uint8_t*)allocLargePagesMemory(size); } else { - ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, 64); + ds.dataset = (uint8_t*)_mm_malloc(size, 64); if (ds.dataset == nullptr) { throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed."); } diff --git a/src/dataset.hpp b/src/dataset.hpp index c01835a..6aba805 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -30,7 +30,7 @@ namespace RandomX { void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber); - void datasetAlloc(dataset_t& ds, bool largePages); + void datasetAlloc(dataset_t& ds, uint64_t size, bool largePages); void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount); diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp index 31f0c54..7fceb0b 100644 --- a/src/instructionWeights.hpp +++ b/src/instructionWeights.hpp @@ -19,67 +19,6 @@ along with RandomX. If not, see. #pragma once -//Integer -#define WT_IADD_R 12 -#define WT_IADD_M 7 -#define WT_IADD_RC 16 -#define WT_ISUB_R 12 -#define WT_ISUB_M 7 -#define WT_IMUL_9C 9 -#define WT_IMUL_R 16 -#define WT_IMUL_M 4 -#define WT_IMULH_R 4 -#define WT_IMULH_M 1 -#define WT_ISMULH_R 4 -#define WT_ISMULH_M 1 -#define WT_IMUL_RCP 8 -#define WT_ISDIV_C 0 -#define WT_INEG_R 2 -#define WT_IXOR_R 16 -#define WT_IXOR_M 4 -#define WT_IROR_R 10 -#define WT_IROL_R 0 -#define WT_ISWAP_R 4 - -//Common floating point -#define WT_FSWAP_R 8 - -//Floating point group F -#define WT_FADD_R 20 -#define WT_FADD_M 5 -#define WT_FSUB_R 20 -#define WT_FSUB_M 5 -#define WT_FSCAL_R 6 - -//Floating point group E -#define WT_FMUL_R 20 -#define WT_FMUL_M 0 -#define WT_FDIV_R 0 -#define WT_FDIV_M 4 -#define WT_FSQRT_R 6 - -//Control -#define WT_COND_R 7 -#define WT_COND_M 1 -#define WT_CFROUND 1 - -//Store -#define WT_ISTORE 16 -#define WT_FSTORE 0 - -#define WT_NOP 0 - -constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \ -WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \ -WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IMUL_RCP + WT_ISDIV_C + \ -WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \ -WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \ -WT_FSCAL_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \ -WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP; - -static_assert(wtSum == 256, - "Sum of instruction weights must be 256"); - #define REP0(x) #define REP1(x) x, #define REP2(x) REP1(x) x, @@ -121,7 +60,7 @@ static_assert(wtSum == 256, #define REPNX(x,N) REP##N(x) #define REPN(x,N) REPNX(x,N) #define NUM(x) x -#define WT(x) NUM(WT_##x) +#define WT(x) NUM(RANDOMX_FREQ_##x) #define REPCASE0(x) #define REPCASE1(x) case __COUNTER__: diff --git a/src/main.cpp b/src/main.cpp index ad6e856..f2df3fb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -132,8 +132,8 @@ void generateAsm(uint32_t nonce) { memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); store32(blockTemplate + 39, nonce); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); - uint8_t scratchpad[RandomX::ScratchpadSize]; - fillAes1Rx4((void*)hash, RandomX::ScratchpadSize, scratchpad); + uint8_t scratchpad[RANDOMX_SCRATCHPAD_L3]; + fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); RandomX::AssemblyGeneratorX86 asmX86; RandomX::Program p; fillAes1Rx4(hash, sizeof(p), &p); @@ -148,11 +148,11 @@ void generateNative(uint32_t nonce) { memcpy(blockTemplate, blockTemplate__, sizeof(blockTemplate)); store32(blockTemplate + 39, nonce); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); - uint8_t scratchpad[RandomX::ScratchpadSize]; - fillAes1Rx4((void*)hash, RandomX::ScratchpadSize, scratchpad); + uint8_t scratchpad[RANDOMX_SCRATCHPAD_L3]; + fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); alignas(16) RandomX::Program prog; fillAes1Rx4((void*)hash, sizeof(prog), &prog); - for (int i = 0; i < RandomX::ProgramLength; ++i) { + for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { prog(i).dst %= 8; prog(i).src %= 8; } @@ -171,11 +171,11 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi //std::cout << "Thread " << thread << " nonce " << nonce << std::endl; store32(noncePtr, nonce); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); - fillAes1Rx4((void*)hash, RandomX::ScratchpadSize, scratchpad); + fillAes1Rx4((void*)hash, RANDOMX_SCRATCHPAD_L3, scratchpad); vm->resetRoundingMode(); vm->setScratchpad(scratchpad); //dump((char*)scratchpad, RandomX::ScratchpadSize, "spad-before.txt"); - for (int chain = 0; chain < RandomX::ChainLength - 1; ++chain) { + for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { fillAes1Rx4((void*)hash, sizeof(RandomX::Program), vm->getProgramBuffer()); vm->initialize(); vm->execute(); @@ -190,7 +190,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, Atomi std::cout << std::hex << std::setw(16) << std::setfill('0') << res << std::endl; } }*/ - vm->getResult(scratchpad, RandomX::ScratchpadSize, hash); + vm->getResult(scratchpad, RANDOMX_SCRATCHPAD_L3, hash); result.xorWith(hash); if (RandomX::trace) { std::cout << "Nonce: " << nonce << " "; @@ -267,10 +267,11 @@ int main(int argc, char** argv) { } else { RandomX::Cache* cache = dataset.cache; - RandomX::datasetAlloc(dataset, largePages); + RandomX::datasetAlloc(dataset, RANDOMX_DATASET_SIZE, largePages); + const uint64_t datasetBlockCount = RANDOMX_DATASET_SIZE / RandomX::CacheLineSize; if (initThreadCount > 1) { - auto perThread = RandomX::DatasetBlockCount / initThreadCount; - auto remainder = RandomX::DatasetBlockCount % initThreadCount; + auto perThread = datasetBlockCount / initThreadCount; + auto remainder = datasetBlockCount % initThreadCount; for (int i = 0; i < initThreadCount; ++i) { auto count = perThread + (i == initThreadCount - 1 ? remainder : 0); threads.push_back(std::thread(&RandomX::datasetInit, cache, dataset, i * perThread, count)); @@ -280,7 +281,7 @@ int main(int argc, char** argv) { } } else { - RandomX::datasetInit(cache, dataset, 0, RandomX::DatasetBlockCount); + RandomX::datasetInit(cache, dataset, 0, datasetBlockCount); } RandomX::Cache::dealloc(cache, largePages); threads.clear(); @@ -300,19 +301,19 @@ int main(int argc, char** argv) { } uint8_t* scratchpadMem; if (largePages) { - scratchpadMem = (uint8_t*)allocLargePagesMemory(threadCount * RandomX::ScratchpadSize); + scratchpadMem = (uint8_t*)allocLargePagesMemory(threadCount * RANDOMX_SCRATCHPAD_L3); } else { - scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize); + scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RANDOMX_SCRATCHPAD_L3, RandomX::CacheLineSize); } std::cout << "Running benchmark (" << programCount << " nonces) ..." << std::endl; sw.restart(); if (threadCount > 1) { for (unsigned i = 0; i < vms.size(); ++i) { if (softAes) - threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i)); + threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RANDOMX_SCRATCHPAD_L3 * i)); else - threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i)); + threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RANDOMX_SCRATCHPAD_L3 * i)); } for (unsigned i = 0; i < threads.size(); ++i) { threads[i].join(); @@ -330,7 +331,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if(programCount == 1000) - std::cout << "Reference result: e1b4144293ff9ab5aa4c98f2389bb18950d8c3fd874891ac64628e028a286006" << std::endl; + std::cout << "Reference result: 128599cc10f9f6251e7917fa1d09ab2116ab4081bf1357149bd4054275dd8ee9" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; }