CompiledLightVirtualMachine

This commit is contained in:
tevador 2019-03-21 20:44:59 +01:00
parent 1617d8e34e
commit 73a11f5c01
12 changed files with 451 additions and 12 deletions

View file

@ -0,0 +1,40 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "CompiledLightVirtualMachine.hpp"
#include "common.hpp"
#include <stdexcept>
namespace RandomX {
CompiledLightVirtualMachine::CompiledLightVirtualMachine() {
}
void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size) {
mem.ds = ds;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
//datasetBasePtr = ds.dataset.memory;
}
void CompiledLightVirtualMachine::initialize() {
VirtualMachine::initialize();
compiler.generateProgramLight(program);
//mem.ds.dataset.memory = datasetBasePtr + (datasetBase * CacheLineSize);
}
}

View file

@ -0,0 +1,44 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#pragma once
//#define TRACEVM
#include <new>
#include "CompiledVirtualMachine.hpp"
#include "JitCompilerX86.hpp"
#include "intrinPortable.h"
namespace RandomX {
class CompiledLightVirtualMachine : public CompiledVirtualMachine {
public:
void* operator new(size_t size) {
void* ptr = _mm_malloc(size, 64);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void operator delete(void* ptr) {
_mm_free(ptr);
}
CompiledLightVirtualMachine();
void setDataset(dataset_t ds, uint64_t size) override;
void initialize() override;
};
}

View file

@ -48,10 +48,7 @@ namespace RandomX {
void* getProgram() {
return compiler.getCode();
}
private:
#ifdef TRACEVM
convertible_t tracepad[InstructionCount];
#endif
protected:
JitCompilerX86 compiler;
uint8_t* datasetBasePtr;
};

View file

@ -24,6 +24,8 @@ PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_read_dataset_light
PUBLIC randomx_program_read_dataset_light_sub
PUBLIC randomx_program_loop_store
PUBLIC randomx_program_loop_end
PUBLIC randomx_program_epilogue
@ -54,6 +56,10 @@ randomx_program_read_dataset PROC
include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP
randomx_program_read_dataset_light PROC
include asm/program_read_dataset_light.inc
randomx_program_read_dataset_light ENDP
randomx_program_loop_store PROC
include asm/program_loop_store.inc
randomx_program_loop_store ENDP
@ -62,6 +68,13 @@ randomx_program_loop_end PROC
nop
randomx_program_loop_end ENDP
ALIGN 64
randomx_program_read_dataset_light_sub PROC
include asm/program_read_dataset_light_sub.inc
squareHashSub:
include asm/squareHash.inc
randomx_program_read_dataset_light_sub ENDP
ALIGN 64
randomx_program_epilogue PROC
include asm/program_epilogue_win64.inc

View file

@ -23,8 +23,10 @@ extern "C" {
void randomx_program_loop_load();
void randomx_program_start();
void randomx_program_read_dataset();
void randomx_program_read_dataset_light();
void randomx_program_loop_store();
void randomx_program_loop_end();
void randomx_program_read_dataset_light_sub();
void randomx_program_epilogue();
void randomx_program_end();
}

View file

@ -86,8 +86,10 @@ namespace RandomX {
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
const uint8_t* codeReadDatasetLightSub = (uint8_t*)&randomx_program_read_dataset_light_sub;
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
@ -95,10 +97,13 @@ namespace RandomX {
const int32_t epilogueSize = codeProgramEnd - codeEpilogue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
const int32_t readDatasetSize = codeLoopStore - codeReadDataset;
const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
const int32_t readDatasetLightSize = codeLoopStore - codeReadDatasetLight;
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
const int32_t readDatasetLightSubSize = codeEpilogue - codeReadDatasetLightSub;
const int32_t epilogueOffset = CodeSize - epilogueSize;
const int32_t readDatasetLightSubOffset = epilogueOffset - readDatasetLightSubSize;
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
@ -168,6 +173,7 @@ namespace RandomX {
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
static const uint8_t CALL = 0xe8;
size_t JitCompilerX86::getCodeSize() {
return codePos - prologueSize;
@ -176,10 +182,27 @@ namespace RandomX {
JitCompilerX86::JitCompilerX86() {
code = (uint8_t*)allocExecutableMemory(CodeSize);
memcpy(code, codePrologue, prologueSize);
memcpy(code + CodeSize - epilogueSize, codeEpilogue, epilogueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
memcpy(code + readDatasetLightSubOffset, codeReadDatasetLightSub, readDatasetLightSubSize);
}
void JitCompilerX86::generateProgram(Program& prog) {
generateProgramPrologue(prog);
memcpy(code + codePos, codeReadDataset, readDatasetSize);
codePos += readDatasetSize;
generateProgramEpilogue(prog);
}
void JitCompilerX86::generateProgramLight(Program& prog) {
generateProgramPrologue(prog);
memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize);
codePos += readDatasetLightSize;
emitByte(CALL);
emit32(readDatasetLightSubOffset - (codePos + 4));
generateProgramEpilogue(prog);
}
void JitCompilerX86::generateProgramPrologue(Program& prog) {
auto addressRegisters = prog.getEntropy(12);
uint32_t readReg0 = 0 + (addressRegisters & 1);
addressRegisters >>= 1;
@ -205,8 +228,9 @@ namespace RandomX {
emitByte(0xc0 + readReg2);
emit(REX_XOR_EAX);
emitByte(0xc0 + readReg3);
memcpy(code + codePos, codeReadDataset, readDatasetSize);
codePos += readDatasetSize;
}
void JitCompilerX86::generateProgramEpilogue(Program& prog) {
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
emit(SUB_EBX);

View file

@ -37,6 +37,7 @@ namespace RandomX {
public:
JitCompilerX86();
void generateProgram(Program&);
void generateProgramLight(Program&);
ProgramFunc getProgramFunc() {
return (ProgramFunc)code;
}
@ -49,6 +50,8 @@ namespace RandomX {
uint8_t* code;
int32_t codePos;
void generateProgramPrologue(Program&);
void generateProgramEpilogue(Program&);
void genAddressReg(Instruction&, bool);
void genAddressRegDst(Instruction&, bool);
void genAddressImm(Instruction&);

View file

@ -1,3 +1,4 @@
xor eax, eax
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9

View file

@ -1,5 +1,4 @@
xor rbp, rax ;# modify "mx"
xor eax, eax
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rdi+rdx]

View file

@ -0,0 +1,4 @@
xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx"
mov ecx, ebp ;# ecx = ma
shr ecx, 6 ;# ecx = Dataset block number

View file

@ -0,0 +1,308 @@
;# rdi -> Cache pointer
;# rcx -> Dataset block number
;# rax, rbx, rcx, rdx -> scratch registers
sub rsp, 72
mov qword ptr [rsp+64], rbx
mov qword ptr [rsp+56], r8
mov qword ptr [rsp+48], r9
mov qword ptr [rsp+40], r10
mov qword ptr [rsp+32], r11
mov qword ptr [rsp+24], r12
mov qword ptr [rsp+16], r13
mov qword ptr [rsp+8], r14
mov qword ptr [rsp+0], r15
mov r8, rcx
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
;# iteration 0
;# c0
mov rbx, r8
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r8+r9]
call squareHashSub
mov r9, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c1
mov rbx, r9
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r9+r10]
call squareHashSub
mov r10, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c2
mov rbx, r10
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r10+r11]
call squareHashSub
mov r11, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c3
mov rbx, r11
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r11+r12]
call squareHashSub
mov r12, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c4
mov rbx, r12
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r12+r13]
call squareHashSub
mov r13, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c5
mov rbx, r13
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r13+r14]
call squareHashSub
mov r14, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c6
mov rbx, r14
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r14+r15]
call squareHashSub
mov r15, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c7
mov rbx, r15
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r15+r8]
call squareHashSub
mov r8, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# iteration 1
;# c0
mov rbx, r8
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r8+r9]
call squareHashSub
mov r9, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c1
mov rbx, r9
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r9+r10]
call squareHashSub
mov r10, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c2
mov rbx, r10
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r10+r11]
call squareHashSub
mov r11, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c3
mov rbx, r11
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r11+r12]
call squareHashSub
mov r12, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c4
mov rbx, r12
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r12+r13]
call squareHashSub
mov r13, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c5
mov rbx, r13
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r13+r14]
call squareHashSub
mov r14, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c6
mov rbx, r14
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r14+r15]
call squareHashSub
mov r15, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# c7
mov rbx, r15
and rbx, 4194303
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]
lea rcx, [r15+r8]
call squareHashSub
mov r8, rax
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]
;# --------------------------
mov rbx, qword ptr [rsp+64]
xor r8, qword ptr [rsp+56]
xor r9, qword ptr [rsp+48]
xor r10, qword ptr [rsp+40]
xor r11, qword ptr [rsp+32]
xor r12, qword ptr [rsp+24]
xor r13, qword ptr [rsp+16]
xor r14, qword ptr [rsp+8]
xor r15, qword ptr [rsp+0]
add rsp, 72
;# xor eax, eax
ret

View file

@ -19,6 +19,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//#define TRACE
#include "InterpretedVirtualMachine.hpp"
#include "CompiledVirtualMachine.hpp"
#include "CompiledLightVirtualMachine.hpp"
#include "AssemblyGeneratorX86.hpp"
#include "Stopwatch.hpp"
#include "blake2/blake2.h"
@ -202,7 +203,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi
}
int main(int argc, char** argv) {
bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative;
bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit;
int programCount, threadCount, initThreadCount, epoch;
readOption("--softAes", argc, argv, softAes);
@ -214,7 +215,7 @@ int main(int argc, char** argv) {
readIntOption("--init", argc, argv, initThreadCount, 1);
readIntOption("--epoch", argc, argv, epoch, 0);
readOption("--largePages", argc, argv, largePages);
readOption("--async", argc, argv, async);
readOption("--jit", argc, argv, jit);
readOption("--genNative", argc, argv, genNative);
readOption("--help", argc, argv, help);
@ -299,7 +300,10 @@ int main(int argc, char** argv) {
vm = new RandomX::CompiledVirtualMachine();
}
else {
vm = new RandomX::InterpretedVirtualMachine(softAes, async);
if (jit)
vm = new RandomX::CompiledLightVirtualMachine();
else
vm = new RandomX::InterpretedVirtualMachine(softAes, async);
}
vm->setDataset(dataset, datasetSize);
vms.push_back(vm);