Fix: hardcoded JIT code buffer size (#98)

* code buffer size is calculated based on RandomX parameters
* added a maximum value constraint for program size and superscalar latency
* reduced the x86 code size of memory instructions by 1 byte
* disclaimer note in configuration documentation
This commit is contained in:
tevador 2019-07-09 20:27:10 +02:00 committed by GitHub
parent 47ade5e894
commit c433f6d3a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 39 additions and 17 deletions

View file

@ -24,7 +24,9 @@ These parameters can be modified in source file [configuration.h](../src/configu
|`RANDOMX_SCRATCHPAD_L1`|Scratchpad L1 size in bytes|`16384`|
|`RANDOMX_FREQ_*` (29x)|Instruction frequencies|multiple values|
Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected. Follow the guidelines below.
Not all of the parameters can be changed safely and most parameters have some contraints on what values can be selected (checked at compile-time).
**Disclaimer: The compile-time checks only prevent obviously broken configurations. Passing the checks does not imply that the configuration is safe and will not cause crashes or other issues. We recommend that each non-standard configuration is thoroughly tested before being deployed.**
### RANDOMX_ARGON_MEMORY
@ -80,7 +82,7 @@ This value directly determines the performance ratio between the 'fast' and 'lig
Target latency for SuperscalarHash, in cycles of the reference CPU.
#### Permitted values
Any positive integer.
Integers in the range 1 - 10000.
#### Notes
The default value was tuned so that a high-performance superscalar CPU running at 2-4 GHz will execute SuperscalarHash in similar time it takes to load data from RAM (40-80 ns). Using a lower value will make Dataset generation (and light mode) more memory bound, while increasing this value will make Dataset generation (and light mode) more compute bound.
@ -110,7 +112,7 @@ This constant affects the memory requirements in fast mode. Some values are unsa
The number of instructions in a RandomX program.
#### Permitted values
Any positive integer divisible by 8.
Positive integers divisible by 8 in the range 8 - 32768 (inclusive).
#### Notes
Smaller values will make RandomX more DRAM-latency bound, while higher values will make RandomX more compute-bound. Some values are unsafe. See [Unsafe configurations](#unsafe-configurations).

View file

@ -46,6 +46,7 @@ namespace randomx {
static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768");
static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
@ -56,6 +57,7 @@ namespace randomx {
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000");
static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");

View file

@ -76,6 +76,24 @@ namespace randomx {
*/
//Calculate the required code buffer size that is sufficient for the largest possible program:
constexpr size_t MaxRandomXInstrCodeSize = 32; //FDIV_M requires up to 32 bytes of x86 code
constexpr size_t MaxSuperscalarInstrSize = 14; //IMUL_RCP requires 14 bytes of x86 code
constexpr size_t SuperscalarProgramHeader = 128; //overhead per superscalar program
constexpr size_t CodeAlign = 4096; //align code size to a multiple of 4 KiB
constexpr size_t ReserveCodeSize = CodeAlign; //function prologue/epilogue + reserve
constexpr size_t RandomXCodeSize = alignSize(ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_SIZE, CodeAlign);
constexpr size_t SuperscalarSize = alignSize(ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_ACCESSES, CodeAlign);
static_assert(RandomXCodeSize < INT32_MAX / 2, "RandomXCodeSize is too large");
static_assert(SuperscalarSize < INT32_MAX / 2, "SuperscalarSize is too large");
constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize;
constexpr int32_t superScalarHashOffset = RandomXCodeSize;
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
@ -106,7 +124,6 @@ namespace randomx {
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
const int32_t epilogueOffset = CodeSize - epilogueSize;
constexpr int32_t superScalarHashOffset = 32768;
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
@ -181,7 +198,7 @@ namespace randomx {
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x67, 0x41, 0x8d };
static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 };
@ -197,7 +214,7 @@ namespace randomx {
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
size_t JitCompilerX86::getCodeSize() {
return codePos < prologueSize ? 0 : codePos - prologueSize;
return CodeSize;
}
JitCompilerX86::JitCompilerX86() {

View file

@ -43,8 +43,6 @@ namespace randomx {
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
constexpr uint32_t CodeSize = 64 * 1024;
class JitCompilerX86 {
public:
JitCompilerX86();

View file

@ -60,6 +60,8 @@
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1)
#define RANDOMX_ALIGN 4096
#define SUPERSCALAR_OFFSET ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
#define db .byte
@ -128,8 +130,7 @@ init_block_loop:
prefetchw byte ptr [rsi]
mov rbx, rbp
.byte 232 ;# 0xE8 = call
;# .set CALL_LOC,
.int 32768 - (call_offset - DECL(randomx_dataset_init))
.int SUPERSCALAR_OFFSET - (call_offset - DECL(randomx_dataset_init))
call_offset:
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9

View file

@ -51,6 +51,8 @@ include asm/configuration.asm
RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64)
RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64)
RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1)
RANDOMX_ALIGN EQU 4096
SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
ALIGN 64
randomx_program_prologue PROC
@ -115,7 +117,7 @@ init_block_loop:
prefetchw byte ptr [rsi]
mov rbx, rbp
db 232 ;# 0xE8 = call
dd 32768 - distance
dd SUPERSCALAR_OFFSET - distance
distance equ $ - offset randomx_dataset_init
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9

View file

@ -39,6 +39,6 @@ int main(int argc, char** argv) {
std::cout << "Elapsed: " << sw.getElapsed() << " s" << std::endl;
dump((const char*)jit.getProgramFunc(), randomx::CodeSize, "program.bin");
dump((const char*)jit.getProgramFunc(), jit.getCodeSize(), "program.bin");
return 0;
}

View file

@ -97,17 +97,13 @@ void* allocExecutableMemory(std::size_t bytes) {
return mem;
}
constexpr std::size_t align(std::size_t pos, std::size_t align) {
return ((pos - 1) / align + 1) * align;
}
void* allocLargePagesMemory(std::size_t bytes) {
void* mem;
#if defined(_WIN32) || defined(__CYGWIN__)
setPrivilege("SeLockMemoryPrivilege", 1);
auto pageMinimum = GetLargePageMinimum();
if (pageMinimum > 0)
mem = VirtualAlloc(NULL, align(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
else
throw std::runtime_error("allocLargePagesMemory - Large pages are not supported");
if (mem == nullptr)

View file

@ -30,6 +30,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef>
constexpr std::size_t alignSize(std::size_t pos, std::size_t align) {
return ((pos - 1) / align + 1) * align;
}
void* allocExecutableMemory(std::size_t);
void* allocLargePagesMemory(std::size_t);
void freePagedMemory(void*, std::size_t);