From 107270d93d99968c569561a33bd1387721b558d9 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 22 Mar 2019 12:53:16 +0100 Subject: [PATCH] Reduced Dataset size to 2 GiB with 8 memory accesses per block Disabled Dataset growth --- src/InterpretedVirtualMachine.cpp | 1 + src/JitCompilerX86.cpp | 4 + src/asm/program_read_dataset.inc | 3 +- src/asm/program_read_dataset_light.inc | 1 + src/asm/program_read_dataset_light_sub.inc | 137 --------------------- src/configuration.h | 6 +- src/main.cpp | 2 +- 7 files changed, 12 insertions(+), 142 deletions(-) diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 54dd7be..15a5049 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -349,6 +349,7 @@ namespace RandomX { executeBytecode(r, f, e, a); mem.mx ^= r[readReg2] ^ r[readReg3]; + mem.mx &= CacheLineAlignMask; Cache& cache = mem.ds.cache; uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index fef890d..5ddc382 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -204,6 +204,10 @@ namespace RandomX { } void JitCompilerX86::generateProgramLight(Program& prog) { + if (RANDOMX_CACHE_ACCESSES != 8) + throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES"); + if (RANDOMX_ARGON_GROWTH != 0) + throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH"); generateProgramPrologue(prog); memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); codePos += readDatasetLightSize; diff --git a/src/asm/program_read_dataset.inc b/src/asm/program_read_dataset.inc index bae4817..4cdabb9 100644 --- a/src/asm/program_read_dataset.inc +++ b/src/asm/program_read_dataset.inc @@ -1,9 +1,10 @@ xor rbp, rax ;# modify "mx" - and rbp, -64 ;# align "mx" to the start of a cache line mov edx, ebp ;# edx = mx + and edx, 2147483584 ;# align "mx" to the start of a cache line prefetchnta byte ptr [rdi+rdx] ror rbp, 32 ;# swap "ma" and "mx" mov edx, ebp ;# edx = ma + and edx, 2147483584 ;# align "ma" to the start of a cache line lea rcx, [rdi+rdx] ;# dataset cache line xor r8, qword ptr [rcx+0] xor r9, qword ptr [rcx+8] diff --git a/src/asm/program_read_dataset_light.inc b/src/asm/program_read_dataset_light.inc index 14395d8..65d2b8d 100644 --- a/src/asm/program_read_dataset_light.inc +++ b/src/asm/program_read_dataset_light.inc @@ -1,4 +1,5 @@ xor rbp, rax ;# modify "mx" ror rbp, 32 ;# swap "ma" and "mx" mov ecx, ebp ;# ecx = ma + and ecx, 2147483584 ;# align "ma" to the start of a cache line shr ecx, 6 ;# ecx = Dataset block number diff --git a/src/asm/program_read_dataset_light_sub.inc b/src/asm/program_read_dataset_light_sub.inc index 9c26fb2..6fe07f0 100644 --- a/src/asm/program_read_dataset_light_sub.inc +++ b/src/asm/program_read_dataset_light_sub.inc @@ -156,143 +156,6 @@ xor r13, qword ptr [rbx+40] xor r14, qword ptr [rbx+48] xor r15, qword ptr [rbx+56] - ;# iteration 1 - ;# c0 - mov rbx, r8 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r8+r9] - call squareHashSub - mov r9, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c1 - mov rbx, r9 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r9+r10] - call squareHashSub - mov r10, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c2 - mov rbx, r10 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r10+r11] - call squareHashSub - mov r11, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c3 - mov rbx, r11 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r11+r12] - call squareHashSub - mov r12, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c4 - mov rbx, r12 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r12+r13] - call squareHashSub - mov r13, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c5 - mov rbx, r13 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r13+r14] - call squareHashSub - mov r14, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c6 - mov rbx, r14 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r14+r15] - call squareHashSub - mov r15, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] - ;# c7 - mov rbx, r15 - and rbx, 4194303 - shl rbx, 6 - add rbx, rdi - prefetchnta byte ptr [rbx] - lea rcx, [r15+r8] - call squareHashSub - mov r8, rax - xor r8, qword ptr [rbx+0] - xor r9, qword ptr [rbx+8] - xor r10, qword ptr [rbx+16] - xor r11, qword ptr [rbx+24] - xor r12, qword ptr [rbx+32] - xor r13, qword ptr [rbx+40] - xor r14, qword ptr [rbx+48] - xor r15, qword ptr [rbx+56] ;# -------------------------- mov rbx, qword ptr [rsp+64] xor r8, qword ptr [rsp+56] diff --git a/src/configuration.h b/src/configuration.h index 4c30b59..bf982f9 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -35,13 +35,13 @@ along with RandomX. If not, see. #define RANDOMX_ARGON_SALT "RandomX\x03" //Number of random Cache accesses per Dataset block. Minimum is 2. -#define RANDOMX_CACHE_ACCESSES 16 +#define RANDOMX_CACHE_ACCESSES 8 //Dataset size in bytes. Must be a power of 2. -#define RANDOMX_DATASET_SIZE (4ULL * 1024 * 1024 * 1024) +#define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024) //Dataset growth per epoch in bytes. Must be divisible by 64. -#define RANDOMX_DS_GROWTH (2 * 1024 * 1024) +#define RANDOMX_DS_GROWTH 0 //Number of blocks per epoch #define RANDOMX_EPOCH_BLOCKS 1024 diff --git a/src/main.cpp b/src/main.cpp index 740ec28..a28bc52 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -340,7 +340,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if(programCount == 1000) - std::cout << "Reference result: 9e636a04a2517f37d8ed40b67a7051e02a7577e878fbba5c4352996b2c653f90" << std::endl; + std::cout << "Reference result: 83875c55fb9ff4a75205a744b82926ebbe23219c6291889c9ee91603c845c597" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; }