4 scratchpad segments

This commit is contained in:
tevador 2019-01-20 00:44:01 +01:00
parent 16db607025
commit bd0dba88a8
10 changed files with 75 additions and 82 deletions

View file

@ -33,7 +33,7 @@ namespace RandomX {
mem.ds = ds;
}
void CompiledVirtualMachine::initializeScratchpad(uint32_t index) {
void CompiledVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
}
@ -42,6 +42,11 @@ namespace RandomX {
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
*(((uint32_t*)&reg) + i) = gen();
}
FPINIT();
for (int i = 0; i < RegistersCount; ++i) {
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
}
compiler.generateProgram(gen);
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
mem.mx = *(((uint32_t*)seed) + 5);

View file

@ -39,7 +39,7 @@ namespace RandomX {
}
CompiledVirtualMachine();
void setDataset(dataset_t ds) override;
void initializeScratchpad(uint32_t index) override;
void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
void initializeProgram(const void* seed) override;
virtual void execute() override;
void* getProgram() {

View file

@ -69,7 +69,7 @@ namespace RandomX {
}
}
void InterpretedVirtualMachine::initializeScratchpad(uint32_t index) {
void InterpretedVirtualMachine::initializeScratchpad(uint8_t* scratchpad, int32_t index) {
uint32_t startingBlock = (ScratchpadSize / CacheLineSize) * index;
if (asyncWorker) {
ILightClientAsyncWorker* worker = mem.ds.asyncWorker;

View file

@ -42,7 +42,7 @@ namespace RandomX {
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
~InterpretedVirtualMachine();
void setDataset(dataset_t ds) override;
void initializeScratchpad(uint32_t index) override;
void initializeScratchpad(uint8_t* scratchpad, int32_t index) override;
void initializeProgram(const void* seed) override;
void execute() override;
const Program& getProgam() {

View file

@ -182,17 +182,17 @@ namespace RandomX {
emitByte(0xe8); //xor rbp, rax
}
emitByte(0x25); //and eax,
if (instr.loca & 15) {
//if (instr.loca & 15) {
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
}
else {
emit(ScratchpadL2 - 1); //first 256 KiB of scratchpad
}
}
/*}
else {
emit(ScratchpadL3 - 1); //whole scratchpad
}
}*/
}
void JitCompilerX86::genar(Instruction& instr) {
@ -271,7 +271,7 @@ namespace RandomX {
}
void JitCompilerX86::gencr(Instruction& instr, bool rax = true) {
if (instr.locc & 16) { //write to register
if (instr.locc & 8) { //write to register
emit(uint16_t(0x8b4c)); //mov
if (rax) {
emitByte(0xc0 + 8 * (instr.regc % RegistersCount)); //regc, rax
@ -281,17 +281,17 @@ namespace RandomX {
}
}
else {
if (instr.locc & 15) {
if (instr.locc & 3) {
//if (instr.locc & 7) {
if (instr.locc & 1) {
scratchpadStoreR(instr, ScratchpadL1, rax);
}
else {
scratchpadStoreR(instr, ScratchpadL2, rax);
}
}
/*}
else {
scratchpadStoreR(instr, ScratchpadL3, rax);
}
}*/
}
}
@ -319,18 +319,18 @@ namespace RandomX {
}
emit(uint16_t(0x280f)); //movaps
emitByte(0xc0 + 8 * regc); // regc, xmm0
if (instr.locc & 16) { //write to scratchpad
if (instr.locc & 15) {
if (instr.locc & 3) { //C.LOC.W
if (instr.locc & 8) { //write to scratchpad
//if (instr.locc & 7) {
if (instr.locc & 1) { //C.LOC.W
scratchpadStoreF(instr, regc, ScratchpadL1, (instr.locc & 128)); //first 16 KiB of scratchpad
}
else {
scratchpadStoreF(instr, regc, ScratchpadL2, (instr.locc & 128)); //first 256 KiB of scratchpad
}
}
else {
//}
/*else {
scratchpadStoreF(instr, regc, ScratchpadL3, (instr.locc & 128)); //whole scratchpad
}
}*/
}
}

View file

@ -39,11 +39,16 @@ namespace RandomX {
mem.ds.dataset = nullptr;
}
void VirtualMachine::getResult(void* out) {
void VirtualMachine::getResult(void* scratchpad, size_t scratchpadSize, void* out) {
constexpr size_t smallStateLength = sizeof(RegisterFile) / sizeof(uint64_t) + 8;
alignas(16) uint64_t smallState[smallStateLength];
memcpy(smallState, &reg, sizeof(RegisterFile));
hashAes1Rx4<false>(scratchpad, ScratchpadSize, smallState + 24);
if (scratchpadSize > 0) {
hashAes1Rx4<false>(scratchpad, scratchpadSize, smallState + 24);
}
else {
memset(smallState + 24, 0, 64);
}
blake2b(out, ResultSize, smallState, sizeof(smallState), nullptr, 0);
}
}

View file

@ -28,10 +28,13 @@ namespace RandomX {
VirtualMachine();
virtual ~VirtualMachine() {}
virtual void setDataset(dataset_t ds) = 0;
virtual void initializeScratchpad(uint32_t index) = 0;
virtual void initializeScratchpad(uint8_t* scratchpad, int32_t index) = 0;
void setScratchpad(void* ptr) {
scratchpad = (convertible_t*)ptr;
}
virtual void initializeProgram(const void* seed) = 0;
virtual void execute() = 0;
void getResult(void*);
void getResult(void*, size_t, void*);
const RegisterFile& getRegisterFile() {
return reg;
}
@ -39,6 +42,6 @@ namespace RandomX {
DatasetReadFunc readDataset;
alignas(16) RegisterFile reg;
MemoryRegisters mem;
alignas(64) convertible_t scratchpad[ScratchpadLength];
convertible_t* scratchpad;
};
}

View file

@ -12,12 +12,12 @@
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
movdqa xmmword ptr [rcx+64], xmm8
movdqa xmmword ptr [rcx+80], xmm9
movdqa xmmword ptr [rcx+96], xmm2
movdqa xmmword ptr [rcx+112], xmm3
movapd xmmword ptr [rcx+64], xmm8
movapd xmmword ptr [rcx+80], xmm9
movapd xmmword ptr [rcx+96], xmm2
movapd xmmword ptr [rcx+112], xmm3
lea rcx, [rcx+64]
movdqa xmmword ptr [rcx+64], xmm4
movdqa xmmword ptr [rcx+80], xmm5
movdqa xmmword ptr [rcx+96], xmm6
movdqa xmmword ptr [rcx+112], xmm7
movapd xmmword ptr [rcx+64], xmm4
movapd xmmword ptr [rcx+80], xmm5
movapd xmmword ptr [rcx+96], xmm6
movapd xmmword ptr [rcx+112], xmm7

View file

@ -1,14 +1,10 @@
mov rdi, rsp ;# beginning of VM stack
mov ebx, 1048577 ;# number of VM instructions to execute + 1
mov ebx, 262145 ;# number of VM instructions to execute + 1
xorps xmm10, xmm10
cmpeqpd xmm10, xmm10
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
;# reset rounding mode
mov dword ptr [rsp-8], 40896
ldmxcsr dword ptr [rsp-8]
;# load integer registers
mov r8, qword ptr [rcx+0]
mov r9, qword ptr [rcx+8]
@ -19,45 +15,13 @@
mov r14, qword ptr [rcx+48]
mov r15, qword ptr [rcx+56]
;# initialize floating point registers
xorps xmm8, xmm8
cvtsi2sd xmm8, qword ptr [rcx+72]
pslldq xmm8, 8
cvtsi2sd xmm8, qword ptr [rcx+64]
xorps xmm9, xmm9
cvtsi2sd xmm9, qword ptr [rcx+88]
pslldq xmm9, 8
cvtsi2sd xmm9, qword ptr [rcx+80]
xorps xmm2, xmm2
cvtsi2sd xmm2, qword ptr [rcx+104]
pslldq xmm2, 8
cvtsi2sd xmm2, qword ptr [rcx+96]
xorps xmm3, xmm3
cvtsi2sd xmm3, qword ptr [rcx+120]
pslldq xmm3, 8
cvtsi2sd xmm3, qword ptr [rcx+112]
;# load floating point registers
movapd xmm8, xmmword ptr [rcx+64]
movapd xmm9, xmmword ptr [rcx+80]
movapd xmm2, xmmword ptr [rcx+96]
movapd xmm3, xmmword ptr [rcx+112]
lea rcx, [rcx+64]
xorps xmm4, xmm4
cvtsi2sd xmm4, qword ptr [rcx+72]
pslldq xmm4, 8
cvtsi2sd xmm4, qword ptr [rcx+64]
xorps xmm5, xmm5
cvtsi2sd xmm5, qword ptr [rcx+88]
pslldq xmm5, 8
cvtsi2sd xmm5, qword ptr [rcx+80]
xorps xmm6, xmm6
cvtsi2sd xmm6, qword ptr [rcx+104]
pslldq xmm6, 8
cvtsi2sd xmm6, qword ptr [rcx+96]
xorps xmm7, xmm7
cvtsi2sd xmm7, qword ptr [rcx+120]
pslldq xmm7, 8
cvtsi2sd xmm7, qword ptr [rcx+112]
movapd xmm4, xmmword ptr [rcx+64]
movapd xmm5, xmmword ptr [rcx+80]
movapd xmm6, xmmword ptr [rcx+96]
movapd xmm7, xmmword ptr [rcx+112]

View file

@ -130,7 +130,7 @@ void generateAsm(int nonce) {
asmX86.printCode(std::cout);
}
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
uint64_t hash[4];
unsigned char blockTemplate[] = {
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
@ -146,11 +146,20 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
*noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
vm->initializeScratchpad(spIndex);
vm->initializeScratchpad(scratchpad, spIndex);
vm->initializeProgram(hash);
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
vm->setScratchpad(scratchpad + 3 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->getResult(hash);
vm->setScratchpad(scratchpad + 2 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->getResult(nullptr, 0, hash);
vm->initializeProgram(hash);
vm->setScratchpad(scratchpad + 1 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->setScratchpad(scratchpad + 0 * RandomX::ScratchpadSize / 4);
vm->execute();
vm->getResult(scratchpad, RandomX::ScratchpadSize, hash);
result.xorWith(hash);
if (RandomX::trace) {
std::cout << "Nonce: " << nonce << " ";
@ -274,18 +283,25 @@ int main(int argc, char** argv) {
vm->setDataset(dataset);
vms.push_back(vm);
}
uint8_t* scratchpadMem;
if (largePages) {
scratchpadMem = (uint8_t*)allocLargePagesMemory(RandomX::ScratchpadSize * (threadCount + 1) / 2);
}
else {
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize);
}
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
sw.restart();
if (threadCount > 1) {
for (int i = 0; i < vms.size(); ++i) {
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i));
}
for (int i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
else {
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0);
mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0, scratchpadMem);
if (compiled)
std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl;
}