diff --git a/doc/configuration.md b/doc/configuration.md index 5090f91..a53b57e 100644 --- a/doc/configuration.md +++ b/doc/configuration.md @@ -24,7 +24,9 @@ These parameters can be modified in source file [configuration.h](../src/configu |`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`| |`RANDOMX_FREQ_*` (29x)|Instruction frequencies|multiple values| -Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected. Follow the guidelines below. +Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected (checked at compile-time). + +**Disclaimer: The compile-time checks only prevent obviously broken configurations. Passing the checks does not imply that the configuration is safe and will not cause crashes or other issues. We recommend that each non-standard configuration is thoroughly tested before being deployed.** ### RANDOMX_ARGON_MEMORY @@ -80,7 +82,7 @@ This value directly determines the performance ratio between the 'fast' and 'lig Target latency for SuperscalarHash, in cycles of the reference CPU. #### Permitted values -Any positive integer. +Integers in the range 1 - 10000. #### Notes The default value was tuned so that a high-performance superscalar CPU running at 2-4 GHz will execute SuperscalarHash in similar time it takes to load data from RAM (40-80 ns). Using a lower value will make Dataset generation (and light mode) more memory bound, while increasing this value will make Dataset generation (and light mode) more compute bound. @@ -110,7 +112,7 @@ This constant affects the memory requirements in fast mode. Some values are unsa The number of instructions in a RandomX program. #### Permitted values -Any positive integer divisible by 8. +Positive integers divisible by 8 in the range 8 - 32768 (inclusive). #### Notes Smaller values will make RandomX more DRAM-latency bound, while higher values will make RandomX more compute-bound. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations). diff --git a/src/common.hpp b/src/common.hpp index d2b45a3..d349abd 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -46,6 +46,7 @@ namespace randomx { static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64."); static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB."); static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0"); + static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768"); static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0"); static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0"); static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2."); @@ -56,6 +57,7 @@ namespace randomx { static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0"); + static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000"); static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0."); static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0."); static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16."); diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 81e0ef7..81d8cda 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -76,6 +76,24 @@ namespace randomx { */ + //Calculate the required code buffer size that is sufficient for the largest possible program: + + constexpr size_t MaxRandomXInstrCodeSize = 32; //FDIV_M requires up to 32 bytes of x86 code + constexpr size_t MaxSuperscalarInstrSize = 14; //IMUL_RCP requires 14 bytes of x86 code + constexpr size_t SuperscalarProgramHeader = 128; //overhead per superscalar program + constexpr size_t CodeAlign = 4096; //align code size to a multiple of 4 KiB + constexpr size_t ReserveCodeSize = CodeAlign; //function prologue/epilogue + reserve + + constexpr size_t RandomXCodeSize = alignSize(ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_SIZE, CodeAlign); + constexpr size_t SuperscalarSize = alignSize(ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_ACCESSES, CodeAlign); + + static_assert(RandomXCodeSize < INT32_MAX / 2, "RandomXCodeSize is too large"); + static_assert(SuperscalarSize < INT32_MAX / 2, "SuperscalarSize is too large"); + + constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize; + + constexpr int32_t superScalarHashOffset = RandomXCodeSize; + const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue; const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; @@ -106,7 +124,6 @@ namespace randomx { const int32_t codeSshInitSize = codeProgramEnd - codeShhInit; const int32_t epilogueOffset = CodeSize - epilogueSize; - constexpr int32_t superScalarHashOffset = 32768; static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 }; static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 }; @@ -181,7 +198,7 @@ namespace randomx { static const uint8_t REX_TEST[] = { 0x49, 0xF7 }; static const uint8_t JZ[] = { 0x0f, 0x84 }; static const uint8_t RET = 0xc3; - static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d }; + static const uint8_t LEA_32[] = { 0x41, 0x8d }; static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 }; static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 }; @@ -197,7 +214,7 @@ namespace randomx { static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 }; size_t JitCompilerX86::getCodeSize() { - return codePos < prologueSize ? 0 : codePos - prologueSize; + return CodeSize; } JitCompilerX86::JitCompilerX86() { diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp index bd068c7..47d49a2 100644 --- a/src/jit_compiler_x86.hpp +++ b/src/jit_compiler_x86.hpp @@ -43,8 +43,6 @@ namespace randomx { typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); - constexpr uint32_t CodeSize = 64 * 1024; - class JitCompilerX86 { public: JitCompilerX86(); diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S index eab1b4c..b36cad0 100644 --- a/src/jit_compiler_x86_static.S +++ b/src/jit_compiler_x86_static.S @@ -60,6 +60,8 @@ #define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64) #define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64) #define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1) +#define RANDOMX_ALIGN 4096 +#define SUPERSCALAR_OFFSET ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN)) #define db .byte @@ -128,8 +130,7 @@ init_block_loop: prefetchw byte ptr [rsi] mov rbx, rbp .byte 232 ;# 0xE8 = call - ;# .set CALL_LOC, - .int 32768 - (call_offset - DECL(randomx_dataset_init)) + .int SUPERSCALAR_OFFSET - (call_offset - DECL(randomx_dataset_init)) call_offset: mov qword ptr [rsi+0], r8 mov qword ptr [rsi+8], r9 diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm index b2fad7a..f1d2f95 100644 --- a/src/jit_compiler_x86_static.asm +++ b/src/jit_compiler_x86_static.asm @@ -51,6 +51,8 @@ include asm/configuration.asm RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64) RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64) RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1) +RANDOMX_ALIGN EQU 4096 +SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN)) ALIGN 64 randomx_program_prologue PROC @@ -115,7 +117,7 @@ init_block_loop: prefetchw byte ptr [rsi] mov rbx, rbp db 232 ;# 0xE8 = call - dd 32768 - distance + dd SUPERSCALAR_OFFSET - distance distance equ $ - offset randomx_dataset_init mov qword ptr [rsi+0], r8 mov qword ptr [rsi+8], r9 diff --git a/src/tests/jit-performance.cpp b/src/tests/jit-performance.cpp index 57e8c7d..71c0169 100644 --- a/src/tests/jit-performance.cpp +++ b/src/tests/jit-performance.cpp @@ -39,6 +39,6 @@ int main(int argc, char** argv) { std::cout << "Elapsed: " << sw.getElapsed() << " s" << std::endl; - dump((const char*)jit.getProgramFunc(), randomx::CodeSize, "program.bin"); + dump((const char*)jit.getProgramFunc(), jit.getCodeSize(), "program.bin"); return 0; } \ No newline at end of file diff --git a/src/virtual_memory.cpp b/src/virtual_memory.cpp index c27a536..3ddacdf 100644 --- a/src/virtual_memory.cpp +++ b/src/virtual_memory.cpp @@ -97,17 +97,13 @@ void* allocExecutableMemory(std::size_t bytes) { return mem; } -constexpr std::size_t align(std::size_t pos, std::size_t align) { - return ((pos - 1) / align + 1) * align; -} - void* allocLargePagesMemory(std::size_t bytes) { void* mem; #if defined(_WIN32) || defined(__CYGWIN__) setPrivilege("SeLockMemoryPrivilege", 1); auto pageMinimum = GetLargePageMinimum(); if (pageMinimum > 0) - mem = VirtualAlloc(NULL, align(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); + mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); else throw std::runtime_error("allocLargePagesMemory - Large pages are not supported"); if (mem == nullptr) diff --git a/src/virtual_memory.hpp b/src/virtual_memory.hpp index d3b31db..3d4956e 100644 --- a/src/virtual_memory.hpp +++ b/src/virtual_memory.hpp @@ -30,6 +30,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +constexpr std::size_t alignSize(std::size_t pos, std::size_t align) { + return ((pos - 1) / align + 1) * align; +} + void* allocExecutableMemory(std::size_t); void* allocLargePagesMemory(std::size_t); void freePagedMemory(void*, std::size_t);