More refactoring

This commit is contained in:
tevador 2019-04-12 19:36:08 +02:00
parent 9404516dd8
commit 8c37d4aac3
28 changed files with 347 additions and 453 deletions

View File

@ -23,7 +23,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "common.hpp"
#include "reciprocal.h"
#include "Program.hpp"
#include "./LightProgramGenerator.hpp"
#include "superscalarGenerator.hpp"
namespace RandomX {
@ -62,7 +62,7 @@ namespace RandomX {
}
}
void AssemblyGeneratorX86::generateAsm(LightProgram& prog) {
void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
asmCode.str(std::string()); //clear
asmCode << "ALIGN 16" << std::endl;
for (unsigned i = 0; i < prog.getSize(); ++i) {
@ -126,7 +126,7 @@ namespace RandomX {
}
}
void AssemblyGeneratorX86::generateC(LightProgram& prog) {
void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
asmCode.str(std::string()); //clear
asmCode << "#include <stdint.h>" << std::endl;
asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;

View File

@ -27,7 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
class Program;
class LightProgram;
class SuperscalarProgram;
class AssemblyGeneratorX86;
typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
@ -35,8 +35,8 @@ namespace RandomX {
class AssemblyGeneratorX86 {
public:
void generateProgram(Program& prog);
void generateAsm(LightProgram& prog);
void generateC(LightProgram& prog);
void generateAsm(SuperscalarProgram& prog);
void generateC(SuperscalarProgram& prog);
void printCode(std::ostream& os) {
os << asmCode.rdbuf();
}

51
src/Blake2Generator.cpp Normal file
View File

@ -0,0 +1,51 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "blake2/blake2.h"
#include "blake2/endian.h"
#include "Blake2Generator.hpp"
#include "common.hpp"
namespace RandomX {
Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
memset(data, 0, sizeof(data));
memcpy(data, seed, SeedSize);
store32(&data[60], nonce);
}
uint8_t Blake2Generator::getByte() {
checkData(1);
return data[dataIndex++];
}
uint32_t Blake2Generator::getInt32() {
checkData(4);
auto ret = load32(&data[dataIndex]);
dataIndex += 4;
return ret;
}
void Blake2Generator::checkData(const size_t bytesNeeded) {
if (dataIndex + bytesNeeded > sizeof(data)) {
blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
dataIndex = 0;
}
}
}

36
src/Blake2Generator.hpp Normal file
View File

@ -0,0 +1,36 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
namespace RandomX {
class Blake2Generator {
public:
Blake2Generator(const void* seed, int nonce);
uint8_t getByte();
uint32_t getInt32();
private:
uint8_t data[64];
size_t dataIndex;
void checkData(const size_t);
};
}

View File

@ -24,7 +24,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
template<bool superscalar>
void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
void CompiledLightVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
if(superscalar)
@ -32,8 +32,8 @@ namespace RandomX {
//datasetBasePtr = ds.dataset.memory;
}
template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template void CompiledLightVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template void CompiledLightVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template<bool superscalar>
void CompiledLightVirtualMachine<superscalar>::initialize() {

View File

@ -39,7 +39,7 @@ namespace RandomX {
_mm_free(ptr);
}
CompiledLightVirtualMachine() {}
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
};
}

View File

@ -29,7 +29,7 @@ namespace RandomX {
CompiledVirtualMachine::CompiledVirtualMachine() {
}
void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
void CompiledVirtualMachine::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
datasetBasePtr = ds.dataset.memory;

View File

@ -42,7 +42,7 @@ namespace RandomX {
_mm_free(ptr);
}
CompiledVirtualMachine();
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
virtual void execute() override;
void* getProgram() {

View File

@ -22,7 +22,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "InterpretedVirtualMachine.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
#include "LightClientAsyncWorker.hpp"
#include <iostream>
#include <iomanip>
#include <stdexcept>
@ -36,7 +35,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#ifdef STATS
#include <algorithm>
#endif
#include "LightProgramGenerator.hpp"
#include "superscalarGenerator.hpp"
#ifdef FPUCHECK
constexpr bool fpuCheck = true;
@ -47,7 +46,7 @@ constexpr bool fpuCheck = false;
namespace RandomX {
template<bool superscalar>
void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
readDataset = &datasetReadLight;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
@ -55,8 +54,8 @@ namespace RandomX {
precompileSuperscalar(programs);
}
template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
template<bool superscalar>
void InterpretedVirtualMachine<superscalar>::initialize() {
@ -475,7 +474,7 @@ namespace RandomX {
}
template<bool superscalar>
void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector<uint64_t>& reciprocals) {
void InterpretedVirtualMachine<superscalar>::executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals) {
for (unsigned j = 0; j < prog.getSize(); ++j) {
Instruction& instr = prog(j);
switch (instr.opcode)
@ -539,7 +538,7 @@ namespace RandomX {
Cache& cache = mem.ds.cache;
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
mixBlock = getMixBlock(registerValue, cache);
LightProgram& prog = superScalarPrograms[i];
SuperscalarProgram& prog = superScalarPrograms[i];
executeSuperscalar(rl, prog, reciprocals);
@ -554,7 +553,7 @@ namespace RandomX {
}
template<bool superscalar>
void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(LightProgram* programs) {
void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(SuperscalarProgram* programs) {
memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
reciprocals.clear();
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {

View File

@ -70,17 +70,17 @@ namespace RandomX {
}
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
~InterpretedVirtualMachine() {}
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
void execute() override;
static void executeSuperscalar(int_reg_t(&r)[8], LightProgram& prog, std::vector<uint64_t>& reciprocals);
static void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t>& reciprocals);
private:
static InstructionHandler<superscalar> engine[256];
DatasetReadFunc readDataset;
bool softAes;
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
std::vector<uint64_t> reciprocals;
alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
alignas(64) SuperscalarProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
#ifdef STATS
int count_ADD_64 = 0;
int count_ADD_32 = 0;
@ -128,7 +128,7 @@ namespace RandomX {
int datasetAccess[256] = { 0 };
#endif
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void precompileSuperscalar(LightProgram*);
void precompileSuperscalar(SuperscalarProgram*);
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);

View File

@ -87,7 +87,7 @@ namespace RandomX {
*/
#include "JitCompilerX86-static.hpp"
#include "LightProgramGenerator.hpp"
#include "superscalarGenerator.hpp"
#define NOP_TEST true
@ -261,16 +261,16 @@ namespace RandomX {
template void JitCompilerX86::generateProgramLight<false>(Program& prog);
template<size_t N>
void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) {
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[N]) {
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
codePos = superScalarHashOffset + codeSshInitSize;
for (unsigned j = 0; j < N; ++j) {
LightProgram& prog = programs[j];
SuperscalarProgram& prog = programs[j];
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
generateCode<LightProgram>(instr, i);
generateCode<SuperscalarProgram>(instr, i);
}
emit(codeShhLoad, codeSshLoadSize);
if (j < N - 1) {
@ -290,7 +290,7 @@ namespace RandomX {
}
template
void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
void JitCompilerX86::generateSuperScalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
void JitCompilerX86::generateDatasetInitCode() {
memcpy(code, codeDatasetInit, datasetInitSize);
@ -345,7 +345,7 @@ namespace RandomX {
}
template<>
void JitCompilerX86::generateCode<LightProgram>(Instruction& instr, int i) {
void JitCompilerX86::generateCode<SuperscalarProgram>(Instruction& instr, int i) {
switch (instr.opcode)
{
case RandomX::SuperscalarInstructionType::ISUB_R:

View File

@ -27,7 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
class Program;
class LightProgram;
class SuperscalarProgram;
class JitCompilerX86;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
@ -42,7 +42,7 @@ namespace RandomX {
template<bool superscalar>
void generateProgramLight(Program&);
template<size_t N>
void generateSuperScalarHash(LightProgram (&programs)[N]);
void generateSuperScalarHash(SuperscalarProgram (&programs)[N]);
ProgramFunc getProgramFunc() {
return (ProgramFunc)code;
}

View File

@ -1,113 +0,0 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "LightClientAsyncWorker.hpp"
#include "dataset.hpp"
#include "Cache.hpp"
namespace RandomX {
LightClientAsyncWorker::LightClientAsyncWorker(const Cache& c) : ILightClientAsyncWorker(c), output(nullptr), hasWork(false),
#ifdef TRACE
sw(true),
#endif
workerThread(&LightClientAsyncWorker::runWorker, this) {
}
void LightClientAsyncWorker::prepareBlock(addr_t addr) {
#ifdef TRACE
std::cout << sw.getElapsed() << ": prepareBlock-enter " << addr / CacheLineSize << std::endl;
#endif
{
std::lock_guard<std::mutex> lk(mutex);
startBlock = addr / CacheLineSize;
blockCount = 1;
output = currentLine.data();
hasWork = true;
}
#ifdef TRACE
std::cout << sw.getElapsed() << ": prepareBlock-notify " << startBlock << "/" << blockCount << std::endl;
#endif
notifier.notify_one();
}
const uint64_t* LightClientAsyncWorker::getBlock(addr_t addr) {
#ifdef TRACE
std::cout << sw.getElapsed() << ": getBlock-enter " << addr / CacheLineSize << std::endl;
#endif
uint32_t currentBlock = addr / CacheLineSize;
if (currentBlock != startBlock || output != currentLine.data()) {
initBlock(cache, (uint8_t*)currentLine.data(), currentBlock, RANDOMX_CACHE_ACCESSES / 8);
}
else {
sync();
}
#ifdef TRACE
std::cout << sw.getElapsed() << ": getBlock-return " << addr / CacheLineSize << std::endl;
#endif
return currentLine.data();
}
void LightClientAsyncWorker::prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
#ifdef TRACE
std::cout << sw.getElapsed() << ": prepareBlocks-enter " << startBlock << "/" << blockCount << std::endl;
#endif
{
std::lock_guard<std::mutex> lk(mutex);
this->startBlock = startBlock;
this->blockCount = blockCount;
output = out;
hasWork = true;
notifier.notify_one();
}
}
void LightClientAsyncWorker::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = 0; i < blockCount; ++i) {
initBlock(cache, (uint8_t*)out + CacheLineSize * i, startBlock + i, RANDOMX_CACHE_ACCESSES / 8);
}
}
void LightClientAsyncWorker::sync() {
std::unique_lock<std::mutex> lk(mutex);
notifier.wait(lk, [this] { return !hasWork; });
}
void LightClientAsyncWorker::runWorker() {
#ifdef TRACE
std::cout << sw.getElapsed() << ": runWorker-enter " << std::endl;
#endif
for (;;) {
std::unique_lock<std::mutex> lk(mutex);
notifier.wait(lk, [this] { return hasWork; });
#ifdef TRACE
std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
#endif
//getBlocks(output, startBlock, blockCount);
initBlock(cache, (uint8_t*)output, startBlock, RANDOMX_CACHE_ACCESSES / 8);
hasWork = false;
#ifdef TRACE
std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;
#endif
lk.unlock();
notifier.notify_one();
}
}
}

View File

@ -1,57 +0,0 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
//#define TRACE
#include "common.hpp"
#include <thread>
#include <mutex>
#include <condition_variable>
#include <array>
#ifdef TRACE
#include "Stopwatch.hpp"
#include <iostream>
#endif
namespace RandomX {
using DatasetLine = std::array<uint64_t, CacheLineSize / sizeof(uint64_t)>;
class LightClientAsyncWorker : public ILightClientAsyncWorker {
public:
LightClientAsyncWorker(const Cache&);
void prepareBlock(addr_t) final;
void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
const uint64_t* getBlock(addr_t) final;
void getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) final;
void sync() final;
private:
void runWorker();
std::condition_variable notifier;
std::mutex mutex;
alignas(16) DatasetLine currentLine;
void* output;
uint32_t startBlock, blockCount;
bool hasWork;
#ifdef TRACE
Stopwatch sw;
#endif
std::thread workerThread;
};
}

View File

@ -1,58 +0,0 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "Program.hpp"
namespace RandomX {
// Intel Ivy Bridge reference
namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size
constexpr int ISUB_R = 0; //1 p015 1 3
constexpr int IXOR_R = 1; //1 p015 1 3
constexpr int IADD_RS = 2; //1 p01 1 4
constexpr int IMUL_R = 3; //1 p1 3 4
constexpr int IROR_C = 4; //1 p05 1 4
constexpr int IADD_C7 = 5; //1 p015 1 7
constexpr int IXOR_C7 = 6; //1 p015 1 7
constexpr int IADD_C8 = 7; //1+0 p015 1 8
constexpr int IXOR_C8 = 8; //1+0 p015 1 8
constexpr int IADD_C9 = 9; //1+0 p015 1 9
constexpr int IXOR_C9 = 10; //1+0 p015 1 9
constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3
constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3
constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4
constexpr int COUNT = 14;
constexpr int INVALID = -1;
}
class Blake2Generator {
public:
Blake2Generator(const void* seed, int nonce);
uint8_t getByte();
uint32_t getInt32();
private:
uint8_t data[64];
size_t dataIndex;
void checkData(const size_t);
};
double generateSuperscalar(LightProgram& prog, Blake2Generator& gen);
}

View File

@ -53,12 +53,14 @@ namespace RandomX {
Instruction programBuffer[RANDOMX_PROGRAM_SIZE];
};
class LightProgram {
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
class SuperscalarProgram {
public:
Instruction& operator()(int pc) {
return programBuffer[pc];
}
friend std::ostream& operator<<(std::ostream& os, const LightProgram& p) {
friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
p.print(os);
return os;
}
@ -74,6 +76,15 @@ namespace RandomX {
void setAddressRegister(uint32_t val) {
addrReg = val;
}
double ipc;
int codeSize;
int macroOps;
int decodeCycles;
int cpuLatency;
int asicLatency;
int mulCount;
int cpuLatencies[8];
int asicLatencies[8];
private:
void print(std::ostream& os) const {
for (unsigned i = 0; i < size; ++i) {
@ -85,6 +96,4 @@ namespace RandomX {
uint32_t size;
int addrReg;
};
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
}

View File

@ -28,7 +28,7 @@ namespace RandomX {
public:
VirtualMachine();
virtual ~VirtualMachine() {}
virtual void setDataset(dataset_t ds, uint64_t size, LightProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
virtual void setDataset(dataset_t ds, uint64_t size, SuperscalarProgram (&programs)[RANDOMX_CACHE_ACCESSES]) = 0;
void setScratchpad(void* ptr) {
scratchpad = (uint8_t*)ptr;
}

View File

@ -36,7 +36,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "dataset.hpp"
#include "Cache.hpp"
#include "hashAes1Rx4.hpp"
#include "LightProgramGenerator.hpp"
#include "superscalarGenerator.hpp"
#include "JitCompilerX86.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@ -226,13 +226,13 @@ int main(int argc, char** argv) {
readOption("--legacy", argc, argv, legacy);
if (genSuperscalar) {
RandomX::LightProgram p;
RandomX::SuperscalarProgram p;
RandomX::Blake2Generator gen(seed, programCount);
RandomX::generateSuperscalar(p, gen);
RandomX::AssemblyGeneratorX86 asmX86;
asmX86.generateAsm(p);
//std::ofstream file("lightProg2.asm");
//asmX86.printCode(std::cout);
asmX86.printCode(std::cout);
return 0;
}
@ -268,7 +268,7 @@ int main(int argc, char** argv) {
const uint64_t cacheSize = (RANDOMX_ARGON_MEMORY + RANDOMX_ARGON_GROWTH * epoch) * RandomX::ArgonBlockSize;
const uint64_t datasetSize = (RANDOMX_DATASET_SIZE + RANDOMX_DS_GROWTH * epoch);
dataset.cache.size = cacheSize;
RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES];
RandomX::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
std::cout << "RandomX - " << (miningMode ? "mining" : "verification") << " mode" << std::endl;

View File

@ -18,7 +18,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include "blake2/blake2.h"
#include "configuration.h"
#include "Program.hpp"
#include "blake2/endian.h"
@ -27,7 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <algorithm>
#include <stdexcept>
#include <iomanip>
#include "LightProgramGenerator.hpp"
#include "superscalarGenerator.hpp"
namespace RandomX {
@ -35,6 +34,7 @@ namespace RandomX {
return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
}
//uOPs (micro-ops) are represented only by the execution port they can go to
namespace ExecutionPort {
using type = int;
constexpr type Null = 0;
@ -46,40 +46,9 @@ namespace RandomX {
constexpr type P015 = P0 | P1 | P5;
}
Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
memset(data, 0, sizeof(data));
memcpy(data, seed, SeedSize);
store32(&data[60], nonce);
}
uint8_t Blake2Generator::getByte() {
checkData(1);
return data[dataIndex++];
}
uint32_t Blake2Generator::getInt32() {
checkData(4);
auto ret = load32(&data[dataIndex]);
dataIndex += 4;
return ret;
}
void Blake2Generator::checkData(const size_t bytesNeeded) {
if (dataIndex + bytesNeeded > sizeof(data)) {
blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
dataIndex = 0;
}
}
class RegisterInfo {
public:
RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {}
int latency;
int lastOpGroup;
int lastOpPar;
int value;
};
//Macro-operation as output of the x86 decoder
//Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op
//Macro-op can consist of 1 or 2 uOPs.
class MacroOp {
public:
MacroOp(const char* name, int size)
@ -137,10 +106,7 @@ namespace RandomX {
int latency_;
ExecutionPort::type uop1_;
ExecutionPort::type uop2_;
int cycle_;
bool dependent_ = false;
MacroOp* depDst_ = nullptr;
MacroOp* depSrc_ = nullptr;
};
//Size: 3 bytes
@ -174,7 +140,7 @@ namespace RandomX {
const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
class LightInstructionInfo {
class SuperscalarInstructionInfo {
public:
const char* getName() const {
return name_;
@ -203,21 +169,21 @@ namespace RandomX {
int getSrcOp() const {
return srcOp_;
}
static const LightInstructionInfo ISUB_R;
static const LightInstructionInfo IXOR_R;
static const LightInstructionInfo IADD_RS;
static const LightInstructionInfo IMUL_R;
static const LightInstructionInfo IROR_C;
static const LightInstructionInfo IADD_C7;
static const LightInstructionInfo IXOR_C7;
static const LightInstructionInfo IADD_C8;
static const LightInstructionInfo IXOR_C8;
static const LightInstructionInfo IADD_C9;
static const LightInstructionInfo IXOR_C9;
static const LightInstructionInfo IMULH_R;
static const LightInstructionInfo ISMULH_R;
static const LightInstructionInfo IMUL_RCP;
static const LightInstructionInfo NOP;
static const SuperscalarInstructionInfo ISUB_R;
static const SuperscalarInstructionInfo IXOR_R;
static const SuperscalarInstructionInfo IADD_RS;
static const SuperscalarInstructionInfo IMUL_R;
static const SuperscalarInstructionInfo IROR_C;
static const SuperscalarInstructionInfo IADD_C7;
static const SuperscalarInstructionInfo IXOR_C7;
static const SuperscalarInstructionInfo IADD_C8;
static const SuperscalarInstructionInfo IXOR_C8;
static const SuperscalarInstructionInfo IADD_C9;
static const SuperscalarInstructionInfo IXOR_C9;
static const SuperscalarInstructionInfo IMULH_R;
static const SuperscalarInstructionInfo ISMULH_R;
static const SuperscalarInstructionInfo IMUL_RCP;
static const SuperscalarInstructionInfo NOP;
private:
const char* name_;
int type_;
@ -227,14 +193,14 @@ namespace RandomX {
int dstOp_ = 0;
int srcOp_;
LightInstructionInfo(const char* name)
SuperscalarInstructionInfo(const char* name)
: name_(name), type_(-1), latency_(0) {}
LightInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp)
SuperscalarInstructionInfo(const char* name, int type, const MacroOp& op, int srcOp)
: name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) {
ops_.push_back(MacroOp(op));
}
template <size_t N>
LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
SuperscalarInstructionInfo(const char* name, int type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
: name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) {
for (unsigned i = 0; i < N; ++i) {
ops_.push_back(MacroOp(arr[i]));
@ -244,24 +210,34 @@ namespace RandomX {
}
};
const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
const LightInstructionInfo LightInstructionInfo::IADD_RS = LightInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
const LightInstructionInfo LightInstructionInfo::IADD_C7 = LightInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
const LightInstructionInfo LightInstructionInfo::IXOR_C7 = LightInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
const LightInstructionInfo LightInstructionInfo::IADD_C8 = LightInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
const LightInstructionInfo LightInstructionInfo::IXOR_C8 = LightInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
const LightInstructionInfo LightInstructionInfo::IADD_C9 = LightInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
const LightInstructionInfo LightInstructionInfo::IXOR_C9 = LightInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP");
const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
const int buffer0[] = { 4, 8, 4 };
const int buffer1[] = { 7, 3, 3, 3 };
const int buffer2[] = { 3, 7, 3, 3 };
const int buffer3[] = { 4, 9, 3 };
const int buffer4[] = { 4, 4, 4, 4 };
const int buffer5[] = { 3, 3, 10 };
class DecoderBuffer {
public:
@ -318,16 +294,6 @@ namespace RandomX {
}
};
//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
const int buffer0[] = { 4, 8, 4 };
const int buffer1[] = { 7, 3, 3, 3 };
const int buffer2[] = { 3, 7, 3, 3 };
const int buffer3[] = { 4, 9, 3 };
const int buffer4[] = { 4, 4, 4, 4 };
const int buffer5[] = { 3, 3, 10 };
const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0);
const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2);
@ -344,13 +310,13 @@ namespace RandomX {
const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS };
const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C7, &LightInstructionInfo::IADD_C7 };
const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IXOR_C8, &LightInstructionInfo::IADD_C8 };
const LightInstructionInfo* slot_9[] = { &LightInstructionInfo::IXOR_C9, &LightInstructionInfo::IADD_C9 };
const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
const SuperscalarInstructionInfo* slot_3[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R };
const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R };
const SuperscalarInstructionInfo* slot_4[] = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS };
const SuperscalarInstructionInfo* slot_7[] = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 };
const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 };
const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 };
const SuperscalarInstructionInfo* slot_10 = &SuperscalarInstructionInfo::IMUL_RCP;
static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) {
int index;
@ -367,9 +333,19 @@ namespace RandomX {
return true;
}
class LightInstruction {
class RegisterInfo {
public:
void toInstr(Instruction& instr) {
RegisterInfo() : latency(0), lastOpGroup(-1), lastOpPar(-1), value(0) {}
int latency;
int lastOpGroup;
int lastOpPar;
int value;
};
//"SuperscalarInstruction" consists of one or more macro-ops
class SuperscalarInstruction {
public:
void toInstr(Instruction& instr) { //translate to a RandomX instruction format
instr.opcode = getType();
instr.dst = dst_;
instr.src = src_ >= 0 ? src_ : dst_;
@ -392,7 +368,7 @@ namespace RandomX {
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if (fetchType == 4 && !isLast) {
create(&LightInstructionInfo::IMUL_R, gen);
create(&SuperscalarInstructionInfo::IMUL_R, gen);
}
else {
create(slot_4[gen.getByte() & 1], gen);
@ -415,7 +391,7 @@ namespace RandomX {
}
}
void create(const LightInstructionInfo* info, Blake2Generator& gen) {
void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) {
info_ = info;
reset();
switch (info->getType())
@ -445,7 +421,7 @@ namespace RandomX {
mod_ = 0;
imm32_ = 0;
opGroup_ = SuperscalarInstructionType::IMUL_R;
opGroupPar_ = -1;
groupParIsSource_ = true;
} break;
case SuperscalarInstructionType::IROR_C: {
@ -505,18 +481,22 @@ namespace RandomX {
}
}
bool selectDestination(int cycle, RegisterInfo (&registers)[8], Blake2Generator& gen) {
bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (&registers)[8], Blake2Generator& gen) {
/*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R)
std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/
std::vector<int> availableRegisters;
//Conditions for the destination register:
// * value must be ready at the required cycle
// * cannot be the same as the source register unless the instruction allows it
// - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
// * register cannot be multiplied twice in a row unless allowChainedMul is true
// - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
// - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
// * either the last instruction applied to the register or its source must be different than this instruction
// - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
// - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication
// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
for (unsigned i = 0; i < 8; ++i) {
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister))
availableRegisters.push_back(i);
}
return selectRegister(availableRegisters, gen, dst_);
@ -560,14 +540,14 @@ namespace RandomX {
return opGroupPar_;
}
const LightInstructionInfo& getInfo() const {
const SuperscalarInstructionInfo& getInfo() const {
return *info_;
}
static const LightInstruction Null;
static const SuperscalarInstruction Null;
private:
const LightInstructionInfo* info_;
const SuperscalarInstructionInfo* info_;
int src_ = -1;
int dst_ = -1;
int mod_;
@ -582,15 +562,16 @@ namespace RandomX {
canReuse_ = groupParIsSource_ = false;
}
LightInstruction(const LightInstructionInfo* info) : info_(info) {
SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) {
}
};
const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP);
const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP);
constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 3;
constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4;
constexpr int LOOK_FORWARD_CYCLES = 4;
constexpr int MAX_THROWAWAY_COUNT = 256;
#ifndef _DEBUG
constexpr bool TRACE = false;
constexpr bool INFO = false;
@ -602,7 +583,7 @@ namespace RandomX {
template<bool commit>
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
//P1 (multiplication port) by instructions that can go to any port.
//port P1 (multiplication) by instructions that can go to any port.
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
if (commit) {
@ -666,14 +647,14 @@ namespace RandomX {
return -1;
}
double generateSuperscalar(LightProgram& prog, Blake2Generator& gen) {
void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) {
ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3];
memset(portBusy, 0, sizeof(portBusy));
RegisterInfo registers[8];
const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default;
LightInstruction currentInstruction = LightInstruction::Null;
SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null;
int macroOpIndex = 0;
int codeSize = 0;
int macroOpCount = 0;
@ -719,7 +700,9 @@ namespace RandomX {
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
if (scheduleCycle < 0) {
/*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
return 0;
//__debugbreak();
portsSaturated = true;
break;
}
//find a source register (if applicable) that will be ready when this instruction executes
@ -737,20 +720,20 @@ namespace RandomX {
throwAwayCount++;
macroOpIndex = currentInstruction.getInfo().getSize();
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
//cycle = topCycle;
continue;
}
//abort this decode buffer
/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available" << std::endl;
currentInstruction = LightInstruction::Null;
/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
currentInstruction = SuperscalarInstruction::Null;
break;
}
if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
}
throwAwayCount = 0;
//find a destination register that will be ready when this instruction executes
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
int forward;
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, registers, gen); ++forward) {
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
++scheduleCycle;
++cycle;
@ -760,16 +743,18 @@ namespace RandomX {
throwAwayCount++;
macroOpIndex = currentInstruction.getInfo().getSize();
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
//cycle = topCycle;
continue;
}
//abort this decode buffer
/*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
currentInstruction = LightInstruction::Null;
currentInstruction = SuperscalarInstruction::Null;
break;
}
if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
}
throwAwayCount = 0;
//recalculate when the instruction can be scheduled for execution based on operand availability
scheduleCycle = scheduleMop<true>(mop, portBusy, scheduleCycle, scheduleCycle);
@ -809,67 +794,53 @@ namespace RandomX {
++cycle;
}
if(INFO) std::cout << "; ALU port utilization:" << std::endl;
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
int portCycles = 0;
for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
//std::cout << "; " << std::setw(3) << i << " ";
for (int j = 0; j < 3; ++j) {
//std::cout << (portBusy[i][j] ? '*' : '_');
portCycles += !!portBusy[i][j];
}
//std::cout << std::endl;
}
double ipc = (macroOpCount / (double)retireCycle);
if (INFO) std::cout << "; code size " << codeSize << " bytes" << std::endl;
if (INFO) std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
if (INFO) std::cout << "; fetch cycles: " << decodeCycle << std::endl;
if (INFO) std::cout << "; RandomX instructions: " << programSize << std::endl;
if (INFO) std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl;
if (INFO) std::cout << "; IPC = " << ipc << std::endl;
if (INFO) std::cout << "; Port-cycles: " << portCycles << std::endl;
if (INFO) std::cout << "; Multiplications: " << mulCount << std::endl;
int asicLatency[8];
memset(asicLatency, 0, sizeof(asicLatency));
memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies));
//Calculate ASIC latency:
//Assumes 1 cycle latency for all operations and unlimited parallelization.
for (int i = 0; i < programSize; ++i) {
Instruction& instr = prog(i);
int latDst = asicLatency[instr.dst] + 1;
int latSrc = instr.dst != instr.src ? asicLatency[instr.src] + 1 : 0;
asicLatency[instr.dst] = std::max(latDst, latSrc);
int latDst = prog.asicLatencies[instr.dst] + 1;
int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0;
prog.asicLatencies[instr.dst] = std::max(latDst, latSrc);
}
//address register is the register with the highest ASIC latency
int asicLatencyMax = 0;
int addressReg = 0;
for (int i = 0; i < 8; ++i) {
if (asicLatency[i] > asicLatencyMax) {
asicLatencyMax = asicLatency[i];
if (prog.asicLatencies[i] > asicLatencyMax) {
asicLatencyMax = prog.asicLatencies[i];
addressReg = i;
}
}
if (INFO) std::cout << "; ASIC latency: " << asicLatencyMax << std::endl;
if (INFO) {
std::cout << "; ASIC latency:" << std::endl;
for (int i = 0; i < 8; ++i) {
std::cout << "; r" << i << " = " << asicLatency[i] << std::endl;
}
if (INFO) std::cout << "; CPU latency:" << std::endl;
for (int i = 0; i < 8; ++i) {
std::cout << "; r" << i << " = " << registers[i].latency << std::endl;
}
prog.cpuLatencies[i] = registers[i].latency;
}
prog.setSize(programSize);
prog.setAddressRegister(addressReg);
return ipc;
prog.cpuLatency = retireCycle;
prog.asicLatency = asicLatencyMax;
prog.codeSize = codeSize;
prog.macroOps = macroOpCount;
prog.decodeCycles = decodeCycle;
prog.ipc = ipc;
prog.mulCount = mulCount;
/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
int portCycles = 0;
for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
std::cout << "; " << std::setw(3) << i << " ";
for (int j = 0; j < 3; ++j) {
std::cout << (portBusy[i][j] ? '*' : '_');
portCycles += !!portBusy[i][j];
}
std::cout << std::endl;
}*/
}
}

View File

@ -0,0 +1,47 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#pragma once
#include "Program.hpp"
#include "Blake2Generator.hpp"
namespace RandomX {
// Intel Ivy Bridge reference
namespace SuperscalarInstructionType { //uOPs (decode) execution ports latency code size
constexpr int ISUB_R = 0; //1 p015 1 3 (sub)
constexpr int IXOR_R = 1; //1 p015 1 3 (xor)
constexpr int IADD_RS = 2; //1 p01 1 4 (lea)
constexpr int IMUL_R = 3; //1 p1 3 4 (imul)
constexpr int IROR_C = 4; //1 p05 1 4 (ror)
constexpr int IADD_C7 = 5; //1 p015 1 7 (add)
constexpr int IXOR_C7 = 6; //1 p015 1 7 (xor)
constexpr int IADD_C8 = 7; //1+0 p015 1 7+1 (add+nop)
constexpr int IXOR_C8 = 8; //1+0 p015 1 7+1 (xor+nop)
constexpr int IADD_C9 = 9; //1+0 p015 1 7+2 (add+nop)
constexpr int IXOR_C9 = 10; //1+0 p015 1 7+2 (xor+nop)
constexpr int IMULH_R = 11; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+mul+mov)
constexpr int ISMULH_R = 12; //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov)
constexpr int IMUL_RCP = 13; //1+1 p015+p1 4 10+4 (mov+imul)
constexpr int COUNT = 14;
constexpr int INVALID = -1;
}
void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen);
}

View File

@ -20,9 +20,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <iostream>
#include <cstdint>
#include <vector>
#include "../LightProgramGenerator.hpp"
#include "../superscalarGenerator.hpp"
#include "../InterpretedVirtualMachine.hpp"
#include "../intrinPortable.h"
#include "../Blake2Generator.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@ -45,9 +46,9 @@ int main() {
uint64_t rb[8];
memcpy(rb, ra, sizeof rb);
rb[0] ^= (1ULL << bit);
RandomX::LightProgram p;
RandomX::SuperscalarProgram p;
RandomX::Blake2Generator gen(seed, i);
RandomX::generateLightProg2(p, gen);
RandomX::generateSuperscalar(p, gen);
RandomX::InterpretedVirtualMachine<false>::executeSuperscalar(ra, p, dummy);
RandomX::InterpretedVirtualMachine<false>::executeSuperscalar(rb, p, dummy);
uint64_t diff = 0;

View File

@ -21,7 +21,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <cstdint>
#include <vector>
#include <unordered_set>
#include "../LightProgramGenerator.hpp"
#include "../superscalarGenerator.hpp"
#include "../InterpretedVirtualMachine.hpp"
#include "../intrinPortable.h"
#include "../configuration.h"

View File

@ -127,6 +127,7 @@
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\AssemblyGeneratorX86.cpp" />
<ClCompile Include="..\src\Blake2Generator.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\Cache.cpp" />
<ClCompile Include="..\src\CompiledLightVirtualMachine.cpp" />
@ -137,8 +138,7 @@
<ClCompile Include="..\src\instructionsPortable.cpp" />
<ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
<ClCompile Include="..\src\JitCompilerX86.cpp" />
<ClCompile Include="..\src\LightClientAsyncWorker.cpp" />
<ClCompile Include="..\src\LightProgramGenerator.cpp" />
<ClCompile Include="..\src\superscalarGenerator.cpp" />
<ClCompile Include="..\src\main.cpp" />
<ClCompile Include="..\src\reciprocal.c" />
<ClCompile Include="..\src\softAes.cpp" />
@ -153,6 +153,7 @@
<ClInclude Include="..\src\argon2.h" />
<ClInclude Include="..\src\argon2_core.h" />
<ClInclude Include="..\src\AssemblyGeneratorX86.hpp" />
<ClInclude Include="..\src\Blake2Generator.hpp" />
<ClInclude Include="..\src\Cache.hpp" />
<ClInclude Include="..\src\catch.hpp" />
<ClInclude Include="..\src\common.hpp" />
@ -167,8 +168,7 @@
<ClInclude Include="..\src\intrinPortable.h" />
<ClInclude Include="..\src\JitCompilerX86-static.hpp" />
<ClInclude Include="..\src\JitCompilerX86.hpp" />
<ClInclude Include="..\src\LightClientAsyncWorker.hpp" />
<ClInclude Include="..\src\LightProgramGenerator.hpp" />
<ClInclude Include="..\src\superscalarGenerator.hpp" />
<ClInclude Include="..\src\Program.hpp" />
<ClInclude Include="..\src\reciprocal.h" />
<ClInclude Include="..\src\softAes.h" />

View File

@ -54,12 +54,6 @@
<ClCompile Include="..\src\JitCompilerX86.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\LightClientAsyncWorker.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\LightProgramGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\main.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -75,6 +69,12 @@
<ClCompile Include="..\src\blake2\blake2b.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\Blake2Generator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\superscalarGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\JitCompilerX86-static.asm">
@ -136,12 +136,6 @@
<ClInclude Include="..\src\JitCompilerX86-static.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\LightClientAsyncWorker.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\LightProgramGenerator.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\Program.hpp">
<Filter>Header Files</Filter>
</ClInclude>
@ -166,5 +160,11 @@
<ClInclude Include="..\src\virtualMemory.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\Blake2Generator.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\superscalarGenerator.hpp">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@ -118,6 +118,7 @@
<ItemGroup>
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\Blake2Generator.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\Cache.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
@ -125,9 +126,9 @@
<ClCompile Include="..\src\Instruction.cpp" />
<ClCompile Include="..\src\instructionsPortable.cpp" />
<ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
<ClCompile Include="..\src\LightProgramGenerator.cpp" />
<ClCompile Include="..\src\reciprocal.c" />
<ClCompile Include="..\src\softAes.cpp" />
<ClCompile Include="..\src\superscalarGenerator.cpp" />
<ClCompile Include="..\src\tests\superscalar-avalanche.cpp" />
<ClCompile Include="..\src\VirtualMachine.cpp" />
<ClCompile Include="..\src\virtualMemory.cpp" />

View File

@ -45,9 +45,6 @@
<ClCompile Include="..\src\blake2\blake2b.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\LightProgramGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\hashAes1Rx4.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -60,6 +57,12 @@
<ClCompile Include="..\src\virtualMemory.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\superscalarGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\Blake2Generator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\squareHash.asm">

View File

@ -118,6 +118,7 @@
<ItemGroup>
<ClCompile Include="..\src\argon2_core.c" />
<ClCompile Include="..\src\argon2_ref.c" />
<ClCompile Include="..\src\Blake2Generator.cpp" />
<ClCompile Include="..\src\blake2\blake2b.c" />
<ClCompile Include="..\src\Cache.cpp" />
<ClCompile Include="..\src\dataset.cpp" />
@ -125,9 +126,9 @@
<ClCompile Include="..\src\Instruction.cpp" />
<ClCompile Include="..\src\instructionsPortable.cpp" />
<ClCompile Include="..\src\InterpretedVirtualMachine.cpp" />
<ClCompile Include="..\src\LightProgramGenerator.cpp" />
<ClCompile Include="..\src\reciprocal.c" />
<ClCompile Include="..\src\softAes.cpp" />
<ClCompile Include="..\src\superscalarGenerator.cpp" />
<ClCompile Include="..\src\tests\superscalar-init.cpp" />
<ClCompile Include="..\src\VirtualMachine.cpp" />
<ClCompile Include="..\src\virtualMemory.cpp" />

View File

@ -42,9 +42,6 @@
<ClCompile Include="..\src\InterpretedVirtualMachine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\LightProgramGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\reciprocal.c">
<Filter>Source Files</Filter>
</ClCompile>
@ -60,6 +57,12 @@
<ClCompile Include="..\src\virtualMemory.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\superscalarGenerator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\Blake2Generator.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\squareHash.asm">