From 8c37d4aac36b6a2c0a6dadfc0220dd2f4fe8dfc8 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 12 Apr 2019 19:36:08 +0200 Subject: [PATCH] More refactoring --- src/AssemblyGeneratorX86.cpp | 6 +- src/AssemblyGeneratorX86.hpp | 6 +- src/Blake2Generator.cpp | 51 ++++ src/Blake2Generator.hpp | 36 +++ src/CompiledLightVirtualMachine.cpp | 6 +- src/CompiledLightVirtualMachine.hpp | 2 +- src/CompiledVirtualMachine.cpp | 2 +- src/CompiledVirtualMachine.hpp | 2 +- src/InterpretedVirtualMachine.cpp | 15 +- src/InterpretedVirtualMachine.hpp | 8 +- src/JitCompilerX86.cpp | 12 +- src/JitCompilerX86.hpp | 4 +- src/LightClientAsyncWorker.cpp | 113 ------- src/LightClientAsyncWorker.hpp | 57 ---- src/LightProgramGenerator.hpp | 58 ---- src/Program.hpp | 17 +- src/VirtualMachine.hpp | 2 +- src/main.cpp | 8 +- ...Generator.cpp => superscalarGenerator.cpp} | 283 ++++++++---------- src/superscalarGenerator.hpp | 47 +++ src/tests/superscalar-avalanche.cpp | 7 +- src/tests/superscalar-init.cpp | 2 +- vcxproj/randomx.vcxproj | 8 +- vcxproj/randomx.vcxproj.filters | 24 +- vcxproj/superscalar-avalanche.vcxproj | 3 +- vcxproj/superscalar-avalanche.vcxproj.filters | 9 +- vcxproj/superscalar-init.vcxproj | 3 +- vcxproj/superscalar-init.vcxproj.filters | 9 +- 28 files changed, 347 insertions(+), 453 deletions(-) create mode 100644 src/Blake2Generator.cpp create mode 100644 src/Blake2Generator.hpp delete mode 100644 src/LightClientAsyncWorker.cpp delete mode 100644 src/LightClientAsyncWorker.hpp delete mode 100644 src/LightProgramGenerator.hpp rename src/{LightProgramGenerator.cpp => superscalarGenerator.cpp} (76%) create mode 100644 src/superscalarGenerator.hpp diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index c4e009c..b3511c1 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -23,7 +23,7 @@ along with RandomX. If not, see. #include "common.hpp" #include "reciprocal.h" #include "Program.hpp" -#include "./LightProgramGenerator.hpp" +#include "superscalarGenerator.hpp" namespace RandomX { @@ -62,7 +62,7 @@ namespace RandomX { } } - void AssemblyGeneratorX86::generateAsm(LightProgram& prog) { + void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) { asmCode.str(std::string()); //clear asmCode << "ALIGN 16" << std::endl; for (unsigned i = 0; i < prog.getSize(); ++i) { @@ -126,7 +126,7 @@ namespace RandomX { } } - void AssemblyGeneratorX86::generateC(LightProgram& prog) { + void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) { asmCode.str(std::string()); //clear asmCode << "#include " << std::endl; asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl; diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index 8688cd4..4b777e6 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -27,7 +27,7 @@ along with RandomX. If not, see. namespace RandomX { class Program; - class LightProgram; + class SuperscalarProgram; class AssemblyGeneratorX86; typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int); @@ -35,8 +35,8 @@ namespace RandomX { class AssemblyGeneratorX86 { public: void generateProgram(Program& prog); - void generateAsm(LightProgram& prog); - void generateC(LightProgram& prog); + void generateAsm(SuperscalarProgram& prog); + void generateC(SuperscalarProgram& prog); void printCode(std::ostream& os) { os << asmCode.rdbuf(); } diff --git a/src/Blake2Generator.cpp b/src/Blake2Generator.cpp new file mode 100644 index 0000000..2879088 --- /dev/null +++ b/src/Blake2Generator.cpp @@ -0,0 +1,51 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#include "blake2/blake2.h" +#include "blake2/endian.h" +#include "Blake2Generator.hpp" +#include "common.hpp" + +namespace RandomX { + + Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) { + memset(data, 0, sizeof(data)); + memcpy(data, seed, SeedSize); + store32(&data[60], nonce); + } + + uint8_t Blake2Generator::getByte() { + checkData(1); + return data[dataIndex++]; + } + + uint32_t Blake2Generator::getInt32() { + checkData(4); + auto ret = load32(&data[dataIndex]); + dataIndex += 4; + return ret; + } + + void Blake2Generator::checkData(const size_t bytesNeeded) { + if (dataIndex + bytesNeeded > sizeof(data)) { + blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); + dataIndex = 0; + } + } +} \ No newline at end of file diff --git a/src/Blake2Generator.hpp b/src/Blake2Generator.hpp new file mode 100644 index 0000000..24f2fca --- /dev/null +++ b/src/Blake2Generator.hpp @@ -0,0 +1,36 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once +#include + +namespace RandomX { + + class Blake2Generator { + public: + Blake2Generator(const void* seed, int nonce); + uint8_t getByte(); + uint32_t getInt32(); + private: + uint8_t data[64]; + size_t dataIndex; + + void checkData(const size_t); + }; +} \ No newline at end of file diff --git a/src/CompiledLightVirtualMachine.cpp b/src/CompiledLightVirtualMachine.cpp index 760842a..11bedf8 100644 --- a/src/CompiledLightVirtualMachine.cpp +++ b/src/CompiledLightVirtualMachine.cpp @@ -24,7 +24,7 @@ along with RandomX. If not, see. namespace RandomX { template - void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { + void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; if(superscalar) @@ -32,8 +32,8 @@ namespace RandomX { //datasetBasePtr = ds.dataset.memory; } - template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); - template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + template void CompiledLightVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); template void CompiledLightVirtualMachine::initialize() { diff --git a/src/CompiledLightVirtualMachine.hpp b/src/CompiledLightVirtualMachine.hpp index 9493c58..1d4b78e 100644 --- a/src/CompiledLightVirtualMachine.hpp +++ b/src/CompiledLightVirtualMachine.hpp @@ -39,7 +39,7 @@ namespace RandomX { _mm_free(ptr); } CompiledLightVirtualMachine() {} - void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; + void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; }; } \ No newline at end of file diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp index 4984938..3e44476 100644 --- a/src/CompiledVirtualMachine.cpp +++ b/src/CompiledVirtualMachine.cpp @@ -29,7 +29,7 @@ namespace RandomX { CompiledVirtualMachine::CompiledVirtualMachine() { } - void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { + void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; datasetBasePtr = ds.dataset.memory; diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp index 65b1885..a2866ca 100644 --- a/src/CompiledVirtualMachine.hpp +++ b/src/CompiledVirtualMachine.hpp @@ -42,7 +42,7 @@ namespace RandomX { _mm_free(ptr); } CompiledVirtualMachine(); - void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; + void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; virtual void execute() override; void* getProgram() { diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 673fecf..132a2c9 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -22,7 +22,6 @@ along with RandomX. If not, see. #include "InterpretedVirtualMachine.hpp" #include "dataset.hpp" #include "Cache.hpp" -#include "LightClientAsyncWorker.hpp" #include #include #include @@ -36,7 +35,7 @@ along with RandomX. If not, see. #ifdef STATS #include #endif -#include "LightProgramGenerator.hpp" +#include "superscalarGenerator.hpp" #ifdef FPUCHECK constexpr bool fpuCheck = true; @@ -47,7 +46,7 @@ constexpr bool fpuCheck = false; namespace RandomX { template - void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { + void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) { mem.ds = ds; readDataset = &datasetReadLight; datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize; @@ -55,8 +54,8 @@ namespace RandomX { precompileSuperscalar(programs); } - template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); - template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); template void InterpretedVirtualMachine::initialize() { @@ -475,7 +474,7 @@ namespace RandomX { } template - void InterpretedVirtualMachine::executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector& reciprocals) { + void InterpretedVirtualMachine::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector& reciprocals) { for (unsigned j = 0; j < prog.getSize(); ++j) { Instruction& instr = prog(j); switch (instr.opcode) @@ -539,7 +538,7 @@ namespace RandomX { Cache& cache = mem.ds.cache; for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { mixBlock = getMixBlock(registerValue, cache); - LightProgram& prog = superScalarPrograms[i]; + SuperscalarProgram& prog = superScalarPrograms[i]; executeSuperscalar(rl, prog, reciprocals); @@ -554,7 +553,7 @@ namespace RandomX { } template - void InterpretedVirtualMachine::precompileSuperscalar(LightProgram* programs) { + void InterpretedVirtualMachine::precompileSuperscalar(SuperscalarProgram* programs) { memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms)); reciprocals.clear(); for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index ddefa67..3632112 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -70,17 +70,17 @@ namespace RandomX { } InterpretedVirtualMachine(bool soft) : softAes(soft) {} ~InterpretedVirtualMachine() {} - void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; + void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override; void initialize() override; void execute() override; - static void executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector& reciprocals); + static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector& reciprocals); private: static InstructionHandler engine[256]; DatasetReadFunc readDataset; bool softAes; InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE]; std::vector reciprocals; - alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES]; + alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES]; #ifdef STATS int count_ADD_64 = 0; int count_ADD_32 = 0; @@ -128,7 +128,7 @@ namespace RandomX { int datasetAccess[256] = { 0 }; #endif void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); - void precompileSuperscalar(LightProgram*); + void precompileSuperscalar(SuperscalarProgram*); void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]); void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]); diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 8e15e15..ad7c85a 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -87,7 +87,7 @@ namespace RandomX { */ #include "JitCompilerX86-static.hpp" -#include "LightProgramGenerator.hpp" +#include "superscalarGenerator.hpp" #define NOP_TEST true @@ -261,16 +261,16 @@ namespace RandomX { template void JitCompilerX86::generateProgramLight(Program& prog); template - void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) { + void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) { memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize); codePos = superScalarHashOffset + codeSshInitSize; for (unsigned j = 0; j < N; ++j) { - LightProgram& prog = programs[j]; + SuperscalarProgram& prog = programs[j]; for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); instr.src %= RegistersCount; instr.dst %= RegistersCount; - generateCode(instr, i); + generateCode(instr, i); } emit(codeShhLoad, codeSshLoadSize); if (j < N - 1) { @@ -290,7 +290,7 @@ namespace RandomX { } template - void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]); + void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]); void JitCompilerX86::generateDatasetInitCode() { memcpy(code, codeDatasetInit, datasetInitSize); @@ -345,7 +345,7 @@ namespace RandomX { } template<> - void JitCompilerX86::generateCode(Instruction& instr, int i) { + void JitCompilerX86::generateCode(Instruction& instr, int i) { switch (instr.opcode) { case RandomX::SuperscalarInstructionType::ISUB_R: diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index 9240cfe..2908b04 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -27,7 +27,7 @@ along with RandomX. If not, see. namespace RandomX { class Program; - class LightProgram; + class SuperscalarProgram; class JitCompilerX86; typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); @@ -42,7 +42,7 @@ namespace RandomX { template void generateProgramLight(Program&); template - void generateSuperScalarHash(LightProgram (&programs)[N]); + void generateSuperScalarHash(SuperscalarProgram (&programs)[N]); ProgramFunc getProgramFunc() { return (ProgramFunc)code; } diff --git a/src/LightClientAsyncWorker.cpp b/src/LightClientAsyncWorker.cpp deleted file mode 100644 index fbba713..0000000 --- a/src/LightClientAsyncWorker.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* -Copyright (c) 2019 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -#include "LightClientAsyncWorker.hpp" -#include "dataset.hpp" -#include "Cache.hpp" - -namespace RandomX { - - LightClientAsyncWorker::LightClientAsyncWorker(const Cache& c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false), -#ifdef TRACE - sw(true), -#endif - workerThread(&LightClientAsyncWorker::runWorker, this) { - - } - - void LightClientAsyncWorker::prepareBlock(addr_t addr) { -#ifdef TRACE - std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr / CacheLineSize << std::endl; -#endif - { - std::lock_guard lk(mutex); - startBlock = addr / CacheLineSize; - blockCount = 1; - output = currentLine.data(); - hasWork = true; - } -#ifdef TRACE - std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl; -#endif - notifier.notify_one(); - } - - const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) { -#ifdef TRACE - std::cout << sw.getElapsed() << ": getBlock-enter " << addr / CacheLineSize << std::endl; -#endif - uint32_t currentBlock = addr / CacheLineSize; - if (currentBlock != startBlock || output != currentLine.data()) { - initBlock(cache, (uint8_t*)currentLine.data(), currentBlock, RANDOMX_CACHE_ACCESSES / 8); - } - else { - sync(); - } -#ifdef TRACE - std::cout << sw.getElapsed() << ": getBlock-return " << addr / CacheLineSize << std::endl; -#endif - return currentLine.data(); - } - - void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { -#ifdef TRACE - std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl; -#endif - { - std::lock_guard lk(mutex); - this->startBlock = startBlock; - this->blockCount = blockCount; - output = out; - hasWork = true; - notifier.notify_one(); - } - } - - void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) { - for (uint32_t i = 0; i < blockCount; ++i) { - initBlock(cache, (uint8_t*)out + CacheLineSize * i, startBlock + i, RANDOMX_CACHE_ACCESSES / 8); - } - } - - void LightClientAsyncWorker::sync() { - std::unique_lock lk(mutex); - notifier.wait(lk, [this] { return !hasWork; }); - } - - void LightClientAsyncWorker::runWorker() { -#ifdef TRACE - std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl; -#endif - for (;;) { - std::unique_lock lk(mutex); - notifier.wait(lk, [this] { return hasWork; }); -#ifdef TRACE - std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl; -#endif - //getBlocks(output, startBlock, blockCount); - initBlock(cache, (uint8_t*)output, startBlock, RANDOMX_CACHE_ACCESSES / 8); - hasWork = false; -#ifdef TRACE - std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl; -#endif - lk.unlock(); - notifier.notify_one(); - } - } -} \ No newline at end of file diff --git a/src/LightClientAsyncWorker.hpp b/src/LightClientAsyncWorker.hpp deleted file mode 100644 index 7c45e53..0000000 --- a/src/LightClientAsyncWorker.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* -Copyright (c) 2019 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -//#define TRACE -#include "common.hpp" - -#include -#include -#include -#include -#ifdef TRACE -#include "Stopwatch.hpp" -#include -#endif - -namespace RandomX { - - using DatasetLine = std::array; - - class LightClientAsyncWorker : public ILightClientAsyncWorker { - public: - LightClientAsyncWorker(const Cache&); - void prepareBlock(addr_t) final; - void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final; - const uint64_t* getBlock(addr_t) final; - void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final; - void sync() final; - private: - void runWorker(); - std::condition_variable notifier; - std::mutex mutex; - alignas(16) DatasetLine currentLine; - void* output; - uint32_t startBlock, blockCount; - bool hasWork; -#ifdef TRACE - Stopwatch sw; -#endif - std::thread workerThread; - }; -} \ No newline at end of file diff --git a/src/LightProgramGenerator.hpp b/src/LightProgramGenerator.hpp deleted file mode 100644 index beb7974..0000000 --- a/src/LightProgramGenerator.hpp +++ /dev/null @@ -1,58 +0,0 @@ -/* -Copyright (c) 2019 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -#include "Program.hpp" - -namespace RandomX { - - // Intel Ivy Bridge reference - namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size - constexpr int ISUB_R = 0; //1 p015 1 3 - constexpr int IXOR_R = 1; //1 p015 1 3 - constexpr int IADD_RS = 2; //1 p01 1 4 - constexpr int IMUL_R = 3; //1 p1 3 4 - constexpr int IROR_C = 4; //1 p05 1 4 - constexpr int IADD_C7 = 5; //1 p015 1 7 - constexpr int IXOR_C7 = 6; //1 p015 1 7 - constexpr int IADD_C8 = 7; //1+0 p015 1 8 - constexpr int IXOR_C8 = 8; //1+0 p015 1 8 - constexpr int IADD_C9 = 9; //1+0 p015 1 9 - constexpr int IXOR_C9 = 10; //1+0 p015 1 9 - constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 - constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 - constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 - - constexpr int COUNT = 14; - constexpr int INVALID = -1; - } - - class Blake2Generator { - public: - Blake2Generator(const void* seed, int nonce); - uint8_t getByte(); - uint32_t getInt32(); - private: - uint8_t data[64]; - size_t dataIndex; - - void checkData(const size_t); - }; - - double generateSuperscalar(LightProgram& prog, Blake2Generator& gen); -} \ No newline at end of file diff --git a/src/Program.hpp b/src/Program.hpp index 37c8303..2f2a402 100644 --- a/src/Program.hpp +++ b/src/Program.hpp @@ -53,12 +53,14 @@ namespace RandomX { Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; }; - class LightProgram { + static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program"); + + class SuperscalarProgram { public: Instruction& operator()(int pc) { return programBuffer[pc]; } - friend std::ostream& operator<<(std::ostream& os, const LightProgram& p) { + friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) { p.print(os); return os; } @@ -74,6 +76,15 @@ namespace RandomX { void setAddressRegister(uint32_t val) { addrReg = val; } + double ipc; + int codeSize; + int macroOps; + int decodeCycles; + int cpuLatency; + int asicLatency; + int mulCount; + int cpuLatencies[8]; + int asicLatencies[8]; private: void print(std::ostream& os) const { for (unsigned i = 0; i < size; ++i) { @@ -85,6 +96,4 @@ namespace RandomX { uint32_t size; int addrReg; }; - - static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program"); } diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp index 1edacdb..7352933 100644 --- a/src/VirtualMachine.hpp +++ b/src/VirtualMachine.hpp @@ -28,7 +28,7 @@ namespace RandomX { public: VirtualMachine(); virtual ~VirtualMachine() {} - virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; + virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0; void setScratchpad(void* ptr) { scratchpad = (uint8_t*)ptr; } diff --git a/src/main.cpp b/src/main.cpp index a120cf9..42dc15f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -36,7 +36,7 @@ along with RandomX. If not, see. #include "dataset.hpp" #include "Cache.hpp" #include "hashAes1Rx4.hpp" -#include "LightProgramGenerator.hpp" +#include "superscalarGenerator.hpp" #include "JitCompilerX86.hpp" const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; @@ -226,13 +226,13 @@ int main(int argc, char** argv) { readOption("--legacy", argc, argv, legacy); if (genSuperscalar) { - RandomX::LightProgram p; + RandomX::SuperscalarProgram p; RandomX::Blake2Generator gen(seed, programCount); RandomX::generateSuperscalar(p, gen); RandomX::AssemblyGeneratorX86 asmX86; asmX86.generateAsm(p); //std::ofstream file("lightProg2.asm"); - //asmX86.printCode(std::cout); + asmX86.printCode(std::cout); return 0; } @@ -268,7 +268,7 @@ int main(int argc, char** argv) { const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize; const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch); dataset.cache.size = cacheSize; - RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES]; + RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl; diff --git a/src/LightProgramGenerator.cpp b/src/superscalarGenerator.cpp similarity index 76% rename from src/LightProgramGenerator.cpp rename to src/superscalarGenerator.cpp index 40a767b..d4fd32a 100644 --- a/src/LightProgramGenerator.cpp +++ b/src/superscalarGenerator.cpp @@ -18,7 +18,6 @@ along with RandomX. If not, see. */ #include -#include "blake2/blake2.h" #include "configuration.h" #include "Program.hpp" #include "blake2/endian.h" @@ -27,7 +26,7 @@ along with RandomX. If not, see. #include #include #include -#include "LightProgramGenerator.hpp" +#include "superscalarGenerator.hpp" namespace RandomX { @@ -35,6 +34,7 @@ namespace RandomX { return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP; } + //uOPs (micro-ops) are represented only by the execution port they can go to namespace ExecutionPort { using type = int; constexpr type Null = 0; @@ -46,40 +46,9 @@ namespace RandomX { constexpr type P015 = P0 | P1 | P5; } - Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) { - memset(data, 0, sizeof(data)); - memcpy(data, seed, SeedSize); - store32(&data[60], nonce); - } - - uint8_t Blake2Generator::getByte() { - checkData(1); - return data[dataIndex++]; - } - - uint32_t Blake2Generator::getInt32() { - checkData(4); - auto ret = load32(&data[dataIndex]); - dataIndex += 4; - return ret; - } - - void Blake2Generator::checkData(const size_t bytesNeeded) { - if (dataIndex + bytesNeeded > sizeof(data)) { - blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0); - dataIndex = 0; - } - } - - class RegisterInfo { - public: - RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {} - int latency; - int lastOpGroup; - int lastOpPar; - int value; - }; - + //Macro-operation as output of the x86 decoder + //Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op + //Macro-op can consist of 1 or 2 uOPs. class MacroOp { public: MacroOp(const char* name, int size) @@ -137,10 +106,7 @@ namespace RandomX { int latency_; ExecutionPort::type uop1_; ExecutionPort::type uop2_; - int cycle_; bool dependent_ = false; - MacroOp* depDst_ = nullptr; - MacroOp* depSrc_ = nullptr; }; //Size: 3 bytes @@ -174,7 +140,7 @@ namespace RandomX { const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr }; const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) }; - class LightInstructionInfo { + class SuperscalarInstructionInfo { public: const char* getName() const { return name_; @@ -203,21 +169,21 @@ namespace RandomX { int getSrcOp() const { return srcOp_; } - static const LightInstructionInfo ISUB_R; - static const LightInstructionInfo IXOR_R; - static const LightInstructionInfo IADD_RS; - static const LightInstructionInfo IMUL_R; - static const LightInstructionInfo IROR_C; - static const LightInstructionInfo IADD_C7; - static const LightInstructionInfo IXOR_C7; - static const LightInstructionInfo IADD_C8; - static const LightInstructionInfo IXOR_C8; - static const LightInstructionInfo IADD_C9; - static const LightInstructionInfo IXOR_C9; - static const LightInstructionInfo IMULH_R; - static const LightInstructionInfo ISMULH_R; - static const LightInstructionInfo IMUL_RCP; - static const LightInstructionInfo NOP; + static const SuperscalarInstructionInfo ISUB_R; + static const SuperscalarInstructionInfo IXOR_R; + static const SuperscalarInstructionInfo IADD_RS; + static const SuperscalarInstructionInfo IMUL_R; + static const SuperscalarInstructionInfo IROR_C; + static const SuperscalarInstructionInfo IADD_C7; + static const SuperscalarInstructionInfo IXOR_C7; + static const SuperscalarInstructionInfo IADD_C8; + static const SuperscalarInstructionInfo IXOR_C8; + static const SuperscalarInstructionInfo IADD_C9; + static const SuperscalarInstructionInfo IXOR_C9; + static const SuperscalarInstructionInfo IMULH_R; + static const SuperscalarInstructionInfo ISMULH_R; + static const SuperscalarInstructionInfo IMUL_RCP; + static const SuperscalarInstructionInfo NOP; private: const char* name_; int type_; @@ -227,14 +193,14 @@ namespace RandomX { int dstOp_ = 0; int srcOp_; - LightInstructionInfo(const char* name) + SuperscalarInstructionInfo(const char* name) : name_(name), type_(-1), latency_(0) {} - LightInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp) + SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp) : name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) { ops_.push_back(MacroOp(op)); } template - LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) + SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp) : name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) { for (unsigned i = 0; i < N; ++i) { ops_.push_back(MacroOp(arr[i])); @@ -244,24 +210,34 @@ namespace RandomX { } }; - const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0); - const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0); - const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0); - const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0); - const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1); - const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1); - const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); - const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1); - const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); - const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1); - const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1); - const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); - const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); - const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1); - const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP"); + const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP"); + + //these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions. + //RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate). + //Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction. + const int buffer0[] = { 4, 8, 4 }; + const int buffer1[] = { 7, 3, 3, 3 }; + const int buffer2[] = { 3, 7, 3, 3 }; + const int buffer3[] = { 4, 9, 3 }; + const int buffer4[] = { 4, 4, 4, 4 }; + const int buffer5[] = { 3, 3, 10 }; class DecoderBuffer { public: @@ -318,16 +294,6 @@ namespace RandomX { } }; - //these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions. - //RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate). - //Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction. - const int buffer0[] = { 4, 8, 4 }; - const int buffer1[] = { 7, 3, 3, 3 }; - const int buffer2[] = { 3, 7, 3, 3 }; - const int buffer3[] = { 4, 9, 3 }; - const int buffer4[] = { 4, 4, 4, 4 }; - const int buffer5[] = { 3, 3, 10 }; - const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0); const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1); const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2); @@ -344,13 +310,13 @@ namespace RandomX { const DecoderBuffer DecoderBuffer::Default = DecoderBuffer(); - const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R }; - const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R }; - const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS }; - const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 }; - const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 }; - const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 }; - const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP; + const SuperscalarInstructionInfo* slot_3[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R }; + const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R }; + const SuperscalarInstructionInfo* slot_4[] = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS }; + const SuperscalarInstructionInfo* slot_7[] = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 }; + const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 }; + const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 }; + const SuperscalarInstructionInfo* slot_10 = &SuperscalarInstructionInfo::IMUL_RCP; static bool selectRegister(std::vector& availableRegisters, Blake2Generator& gen, int& reg) { int index; @@ -367,9 +333,19 @@ namespace RandomX { return true; } - class LightInstruction { + class RegisterInfo { public: - void toInstr(Instruction& instr) { + RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {} + int latency; + int lastOpGroup; + int lastOpPar; + int value; + }; + + //"SuperscalarInstruction" consists of one or more macro-ops + class SuperscalarInstruction { + public: + void toInstr(Instruction& instr) { //translate to a RandomX instruction format instr.opcode = getType(); instr.dst = dst_; instr.src = src_ >= 0 ? src_ : dst_; @@ -392,7 +368,7 @@ namespace RandomX { case 4: //if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions if (fetchType == 4 && !isLast) { - create(&LightInstructionInfo::IMUL_R, gen); + create(&SuperscalarInstructionInfo::IMUL_R, gen); } else { create(slot_4[gen.getByte() & 1], gen); @@ -415,7 +391,7 @@ namespace RandomX { } } - void create(const LightInstructionInfo* info, Blake2Generator& gen) { + void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) { info_ = info; reset(); switch (info->getType()) @@ -445,7 +421,7 @@ namespace RandomX { mod_ = 0; imm32_ = 0; opGroup_ = SuperscalarInstructionType::IMUL_R; - opGroupPar_ = -1; + groupParIsSource_ = true; } break; case SuperscalarInstructionType::IROR_C: { @@ -505,18 +481,22 @@ namespace RandomX { } } - bool selectDestination(int cycle, RegisterInfo (®isters)[8], Blake2Generator& gen) { + bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (®isters)[8], Blake2Generator& gen) { + /*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R) + std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/ std::vector availableRegisters; //Conditions for the destination register: // * value must be ready at the required cycle // * cannot be the same as the source register unless the instruction allows it // - this avoids optimizable instructions such as "xor r, r" or "sub r, r" + // * register cannot be multiplied twice in a row unless allowChainedMul is true + // - this avoids accumulation of trailing zeroes in registers due to excessive multiplication + // - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator) // * either the last instruction applied to the register or its source must be different than this instruction // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2" - // - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) for (unsigned i = 0; i < 8; ++i) { - if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) + if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) availableRegisters.push_back(i); } return selectRegister(availableRegisters, gen, dst_); @@ -560,14 +540,14 @@ namespace RandomX { return opGroupPar_; } - const LightInstructionInfo& getInfo() const { + const SuperscalarInstructionInfo& getInfo() const { return *info_; } - static const LightInstruction Null; + static const SuperscalarInstruction Null; private: - const LightInstructionInfo* info_; + const SuperscalarInstructionInfo* info_; int src_ = -1; int dst_ = -1; int mod_; @@ -582,15 +562,16 @@ namespace RandomX { canReuse_ = groupParIsSource_ = false; } - LightInstruction(const LightInstructionInfo* info) : info_(info) { + SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) { } }; - const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP); + const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP); - constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 3; + constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4; constexpr int LOOK_FORWARD_CYCLES = 4; constexpr int MAX_THROWAWAY_COUNT = 256; + #ifndef _DEBUG constexpr bool TRACE = false; constexpr bool INFO = false; @@ -602,7 +583,7 @@ namespace RandomX { template static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) { //The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload - //P1 (multiplication port) by instructions that can go to any port. + //port P1 (multiplication) by instructions that can go to any port. for (; cycle < CYCLE_MAP_SIZE; ++cycle) { if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) { if (commit) { @@ -666,14 +647,14 @@ namespace RandomX { return -1; } - double generateSuperscalar(LightProgram& prog, Blake2Generator& gen) { + void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) { ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3]; memset(portBusy, 0, sizeof(portBusy)); RegisterInfo registers[8]; const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default; - LightInstruction currentInstruction = LightInstruction::Null; + SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null; int macroOpIndex = 0; int codeSize = 0; int macroOpCount = 0; @@ -719,7 +700,9 @@ namespace RandomX { int scheduleCycle = scheduleMop(mop, portBusy, cycle, depCycle); if (scheduleCycle < 0) { /*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; - return 0; + //__debugbreak(); + portsSaturated = true; + break; } //find a source register (if applicable) that will be ready when this instruction executes @@ -737,20 +720,20 @@ namespace RandomX { throwAwayCount++; macroOpIndex = currentInstruction.getInfo().getSize(); if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; + //cycle = topCycle; continue; } //abort this decode buffer - /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available" << std::endl; - currentInstruction = LightInstruction::Null; + /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; + currentInstruction = SuperscalarInstruction::Null; break; } if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; } - throwAwayCount = 0; //find a destination register that will be ready when this instruction executes if (macroOpIndex == currentInstruction.getInfo().getDstOp()) { int forward; - for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, registers, gen); ++forward) { + for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) { if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl; ++scheduleCycle; ++cycle; @@ -760,16 +743,18 @@ namespace RandomX { throwAwayCount++; macroOpIndex = currentInstruction.getInfo().getSize(); if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; + //cycle = topCycle; continue; } //abort this decode buffer /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; - currentInstruction = LightInstruction::Null; + currentInstruction = SuperscalarInstruction::Null; break; } if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; } throwAwayCount = 0; + //recalculate when the instruction can be scheduled for execution based on operand availability scheduleCycle = scheduleMop(mop, portBusy, scheduleCycle, scheduleCycle); @@ -809,67 +794,53 @@ namespace RandomX { ++cycle; } - if(INFO) std::cout << "; ALU port utilization:" << std::endl; - if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; - - int portCycles = 0; - for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { - //std::cout << "; " << std::setw(3) << i << " "; - for (int j = 0; j < 3; ++j) { - //std::cout << (portBusy[i][j] ? '*' : '_'); - portCycles += !!portBusy[i][j]; - } - //std::cout << std::endl; - } - double ipc = (macroOpCount / (double)retireCycle); - if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl; - if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl; - if (INFO) std::cout << "; fetch cycles: " << decodeCycle << std::endl; - if (INFO) std::cout << "; RandomX instructions: " << programSize << std::endl; - if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl; - if (INFO) std::cout << "; IPC = " << ipc << std::endl; - if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl; - if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl; - - int asicLatency[8]; - memset(asicLatency, 0, sizeof(asicLatency)); + memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies)); //Calculate ASIC latency: //Assumes 1 cycle latency for all operations and unlimited parallelization. for (int i = 0; i < programSize; ++i) { Instruction& instr = prog(i); - int latDst = asicLatency[instr.dst] + 1; - int latSrc = instr.dst != instr.src ? asicLatency[instr.src] + 1 : 0; - asicLatency[instr.dst] = std::max(latDst, latSrc); + int latDst = prog.asicLatencies[instr.dst] + 1; + int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0; + prog.asicLatencies[instr.dst] = std::max(latDst, latSrc); } //address register is the register with the highest ASIC latency int asicLatencyMax = 0; int addressReg = 0; for (int i = 0; i < 8; ++i) { - if (asicLatency[i] > asicLatencyMax) { - asicLatencyMax = asicLatency[i]; + if (prog.asicLatencies[i] > asicLatencyMax) { + asicLatencyMax = prog.asicLatencies[i]; addressReg = i; } - } - - if (INFO) std::cout << "; ASIC latency: " << asicLatencyMax << std::endl; - - if (INFO) { - std::cout << "; ASIC latency:" << std::endl; - for (int i = 0; i < 8; ++i) { - std::cout << "; r" << i << " = " << asicLatency[i] << std::endl; - } - if (INFO) std::cout << "; CPU latency:" << std::endl; - for (int i = 0; i < 8; ++i) { - std::cout << "; r" << i << " = " << registers[i].latency << std::endl; - } + prog.cpuLatencies[i] = registers[i].latency; } prog.setSize(programSize); prog.setAddressRegister(addressReg); - return ipc; + + prog.cpuLatency = retireCycle; + prog.asicLatency = asicLatencyMax; + prog.codeSize = codeSize; + prog.macroOps = macroOpCount; + prog.decodeCycles = decodeCycle; + prog.ipc = ipc; + prog.mulCount = mulCount; + + + /*if(INFO) std::cout << "; ALU port utilization:" << std::endl; + if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl; + + int portCycles = 0; + for (int i = 0; i < CYCLE_MAP_SIZE; ++i) { + std::cout << "; " << std::setw(3) << i << " "; + for (int j = 0; j < 3; ++j) { + std::cout << (portBusy[i][j] ? '*' : '_'); + portCycles += !!portBusy[i][j]; + } + std::cout << std::endl; + }*/ } } \ No newline at end of file diff --git a/src/superscalarGenerator.hpp b/src/superscalarGenerator.hpp new file mode 100644 index 0000000..a64e80d --- /dev/null +++ b/src/superscalarGenerator.hpp @@ -0,0 +1,47 @@ +/* +Copyright (c) 2019 tevador + +This file is part of RandomX. + +RandomX is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX. If not, see. +*/ + +#pragma once +#include "Program.hpp" +#include "Blake2Generator.hpp" + +namespace RandomX { + // Intel Ivy Bridge reference + namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size + constexpr int ISUB_R = 0; //1 p015 1 3 (sub) + constexpr int IXOR_R = 1; //1 p015 1 3 (xor) + constexpr int IADD_RS = 2; //1 p01 1 4 (lea) + constexpr int IMUL_R = 3; //1 p1 3 4 (imul) + constexpr int IROR_C = 4; //1 p05 1 4 (ror) + constexpr int IADD_C7 = 5; //1 p015 1 7 (add) + constexpr int IXOR_C7 = 6; //1 p015 1 7 (xor) + constexpr int IADD_C8 = 7; //1+0 p015 1 7+1 (add+nop) + constexpr int IXOR_C8 = 8; //1+0 p015 1 7+1 (xor+nop) + constexpr int IADD_C9 = 9; //1+0 p015 1 7+2 (add+nop) + constexpr int IXOR_C9 = 10; //1+0 p015 1 7+2 (xor+nop) + constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov) + constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) + constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 (mov+imul) + + constexpr int COUNT = 14; + constexpr int INVALID = -1; + } + + void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen); +} \ No newline at end of file diff --git a/src/tests/superscalar-avalanche.cpp b/src/tests/superscalar-avalanche.cpp index 9c91a88..9fa1613 100644 --- a/src/tests/superscalar-avalanche.cpp +++ b/src/tests/superscalar-avalanche.cpp @@ -20,9 +20,10 @@ along with RandomX. If not, see. #include #include #include -#include "../LightProgramGenerator.hpp" +#include "../superscalarGenerator.hpp" #include "../InterpretedVirtualMachine.hpp" #include "../intrinPortable.h" +#include "../Blake2Generator.hpp" const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; @@ -45,9 +46,9 @@ int main() { uint64_t rb[8]; memcpy(rb, ra, sizeof rb); rb[0] ^= (1ULL << bit); - RandomX::LightProgram p; + RandomX::SuperscalarProgram p; RandomX::Blake2Generator gen(seed, i); - RandomX::generateLightProg2(p, gen); + RandomX::generateSuperscalar(p, gen); RandomX::InterpretedVirtualMachine::executeSuperscalar(ra, p, dummy); RandomX::InterpretedVirtualMachine::executeSuperscalar(rb, p, dummy); uint64_t diff = 0; diff --git a/src/tests/superscalar-init.cpp b/src/tests/superscalar-init.cpp index b366355..a7c1208 100644 --- a/src/tests/superscalar-init.cpp +++ b/src/tests/superscalar-init.cpp @@ -21,7 +21,7 @@ along with RandomX. If not, see. #include #include #include -#include "../LightProgramGenerator.hpp" +#include "../superscalarGenerator.hpp" #include "../InterpretedVirtualMachine.hpp" #include "../intrinPortable.h" #include "../configuration.h" diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj index 1c1cae0..d646143 100644 --- a/vcxproj/randomx.vcxproj +++ b/vcxproj/randomx.vcxproj @@ -127,6 +127,7 @@ + @@ -137,8 +138,7 @@ - - + @@ -153,6 +153,7 @@ + @@ -167,8 +168,7 @@ - - + diff --git a/vcxproj/randomx.vcxproj.filters b/vcxproj/randomx.vcxproj.filters index 5b821c8..77939bd 100644 --- a/vcxproj/randomx.vcxproj.filters +++ b/vcxproj/randomx.vcxproj.filters @@ -54,12 +54,6 @@ Source Files - - Source Files - - - Source Files - Source Files @@ -75,6 +69,12 @@ Source Files + + Source Files + + + Source Files + @@ -136,12 +136,6 @@ Header Files - - Header Files - - - Header Files - Header Files @@ -166,5 +160,11 @@ Header Files + + Header Files + + + Header Files + \ No newline at end of file diff --git a/vcxproj/superscalar-avalanche.vcxproj b/vcxproj/superscalar-avalanche.vcxproj index dab0311..1cac62b 100644 --- a/vcxproj/superscalar-avalanche.vcxproj +++ b/vcxproj/superscalar-avalanche.vcxproj @@ -118,6 +118,7 @@ + @@ -125,9 +126,9 @@ - + diff --git a/vcxproj/superscalar-avalanche.vcxproj.filters b/vcxproj/superscalar-avalanche.vcxproj.filters index 9984ed1..93b3838 100644 --- a/vcxproj/superscalar-avalanche.vcxproj.filters +++ b/vcxproj/superscalar-avalanche.vcxproj.filters @@ -45,9 +45,6 @@ Source Files - - Source Files - Source Files @@ -60,6 +57,12 @@ Source Files + + Source Files + + + Source Files + diff --git a/vcxproj/superscalar-init.vcxproj b/vcxproj/superscalar-init.vcxproj index 4c4794c..d765f85 100644 --- a/vcxproj/superscalar-init.vcxproj +++ b/vcxproj/superscalar-init.vcxproj @@ -118,6 +118,7 @@ + @@ -125,9 +126,9 @@ - + diff --git a/vcxproj/superscalar-init.vcxproj.filters b/vcxproj/superscalar-init.vcxproj.filters index 4666d07..cad6e2b 100644 --- a/vcxproj/superscalar-init.vcxproj.filters +++ b/vcxproj/superscalar-init.vcxproj.filters @@ -42,9 +42,6 @@ Source Files - - Source Files - Source Files @@ -60,6 +57,12 @@ Source Files + + Source Files + + + Source Files +