Reduced Dataset size to 2 GiB with 8 memory accesses per block

Disabled Dataset growth
This commit is contained in:
tevador 2019-03-22 12:53:16 +01:00
parent 233af9f14f
commit 107270d93d
7 changed files with 12 additions and 142 deletions

View file

@ -349,6 +349,7 @@ namespace RandomX {
executeBytecode(r, f, e, a); executeBytecode(r, f, e, a);
mem.mx ^= r[readReg2] ^ r[readReg3]; mem.mx ^= r[readReg2] ^ r[readReg3];
mem.mx &= CacheLineAlignMask;
Cache& cache = mem.ds.cache; Cache& cache = mem.ds.cache;
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)]; uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8); initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);

View file

@ -204,6 +204,10 @@ namespace RandomX {
} }
void JitCompilerX86::generateProgramLight(Program& prog) { void JitCompilerX86::generateProgramLight(Program& prog) {
if (RANDOMX_CACHE_ACCESSES != 8)
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_CACHE_ACCESSES");
if (RANDOMX_ARGON_GROWTH != 0)
throw std::runtime_error("JIT compiler: Unsupported value of RANDOMX_ARGON_GROWTH");
generateProgramPrologue(prog); generateProgramPrologue(prog);
memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
codePos += readDatasetLightSize; codePos += readDatasetLightSize;

View file

@ -1,9 +1,10 @@
xor rbp, rax ;# modify "mx" xor rbp, rax ;# modify "mx"
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx mov edx, ebp ;# edx = mx
and edx, 2147483584 ;# align "mx" to the start of a cache line
prefetchnta byte ptr [rdi+rdx] prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx" ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma mov edx, ebp ;# edx = ma
and edx, 2147483584 ;# align "ma" to the start of a cache line
lea rcx, [rdi+rdx] ;# dataset cache line lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0] xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8] xor r9, qword ptr [rcx+8]

View file

@ -1,4 +1,5 @@
xor rbp, rax ;# modify "mx" xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx" ror rbp, 32 ;# swap "ma" and "mx"
mov ecx, ebp ;# ecx = ma mov ecx, ebp ;# ecx = ma
and ecx, 2147483584 ;# align "ma" to the start of a cache line
shr ecx, 6 ;# ecx = Dataset block number shr ecx, 6 ;# ecx = Dataset block number

View file

@ -156,143 +156,6 @@
xor r13, qword ptr [rbx+40] xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48] xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56] xor r15, qword ptr [rbx+56]
;# iteration 1
;# c0
mov rbx, r8
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r8+r9]
call squareHashSub
mov r9, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c1
mov rbx, r9
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r9+r10]
call squareHashSub
mov r10, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c2
mov rbx, r10
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r10+r11]
call squareHashSub
mov r11, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c3
mov rbx, r11
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r11+r12]
call squareHashSub
mov r12, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c4
mov rbx, r12
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r12+r13]
call squareHashSub
mov r13, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c5
mov rbx, r13
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r13+r14]
call squareHashSub
mov r14, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c6
mov rbx, r14
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r14+r15]
call squareHashSub
mov r15, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c7
mov rbx, r15
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r15+r8]
call squareHashSub
mov r8, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# -------------------------- ;# --------------------------
mov rbx, qword ptr [rsp+64] mov rbx, qword ptr [rsp+64]
xor r8, qword ptr [rsp+56] xor r8, qword ptr [rsp+56]

View file

@ -35,13 +35,13 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define RANDOMX_ARGON_SALT "RandomX\x03" #define RANDOMX_ARGON_SALT "RandomX\x03"
//Number of random Cache accesses per Dataset block. Minimum is 2. //Number of random Cache accesses per Dataset block. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 16 #define RANDOMX_CACHE_ACCESSES 8
//Dataset size in bytes. Must be a power of 2. //Dataset size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_SIZE (4ULL * 1024 * 1024 * 1024) #define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024)
//Dataset growth per epoch in bytes. Must be divisible by 64. //Dataset growth per epoch in bytes. Must be divisible by 64.
#define RANDOMX_DS_GROWTH (2 * 1024 * 1024) #define RANDOMX_DS_GROWTH 0
//Number of blocks per epoch //Number of blocks per epoch
#define RANDOMX_EPOCH_BLOCKS 1024 #define RANDOMX_EPOCH_BLOCKS 1024

View file

@ -340,7 +340,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
result.print(std::cout); result.print(std::cout);
if(programCount == 1000) if(programCount == 1000)
std::cout << "Reference result: 9e636a04a2517f37d8ed40b67a7051e02a7577e878fbba5c4352996b2c653f90" << std::endl; std::cout << "Reference result: 83875c55fb9ff4a75205a744b82926ebbe23219c6291889c9ee91603c845c597" << std::endl;
if (!miningMode) { if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
} }