mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
SuperscalarHash interpreter
Linux assembly code
This commit is contained in:
parent
b4c02051fa
commit
2132e5fef5
11 changed files with 310 additions and 74 deletions
5
makefile
5
makefile
|
@ -9,7 +9,7 @@ OBJDIR=obj
|
||||||
LDFLAGS=-lpthread
|
LDFLAGS=-lpthread
|
||||||
CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c
|
CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c
|
||||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o)
|
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o LightProgramGenerator.o)
|
||||||
ifeq ($(PLATFORM),amd64)
|
ifeq ($(PLATFORM),amd64)
|
||||||
ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o
|
ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o
|
||||||
CXXFLAGS += -maes
|
CXXFLAGS += -maes
|
||||||
|
@ -100,6 +100,9 @@ $(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtual
|
||||||
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
|
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
|
||||||
|
|
||||||
|
$(OBJDIR)/LightProgramGenerator.o: $(addprefix $(SRCDIR)/,LightProgramGenerator.cpp LightProgramGenerator.hpp Program.hpp blake2/blake2.h blake2/endian.h configuration.h) | $(OBJDIR)
|
||||||
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightProgramGenerator.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h configuration.h) | $(OBJDIR)
|
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ namespace RandomX {
|
||||||
typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const;
|
typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const;
|
||||||
|
|
||||||
namespace InstructionType {
|
namespace InstructionType {
|
||||||
constexpr int IADD_R = 0;
|
constexpr int IADD_RS = 0;
|
||||||
constexpr int IADD_M = 1;
|
constexpr int IADD_M = 1;
|
||||||
constexpr int IADD_RC = 2;
|
constexpr int IADD_RC = 2;
|
||||||
constexpr int ISUB_R = 3;
|
constexpr int ISUB_R = 3;
|
||||||
|
|
|
@ -36,6 +36,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#endif
|
#endif
|
||||||
|
#include "LightProgramGenerator.hpp"
|
||||||
|
|
||||||
#ifdef FPUCHECK
|
#ifdef FPUCHECK
|
||||||
constexpr bool fpuCheck = true;
|
constexpr bool fpuCheck = true;
|
||||||
|
@ -45,17 +46,20 @@ constexpr bool fpuCheck = false;
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
InterpretedVirtualMachine::~InterpretedVirtualMachine() {
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
||||||
}
|
|
||||||
|
|
||||||
void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
|
|
||||||
mem.ds = ds;
|
mem.ds = ds;
|
||||||
readDataset = &datasetReadLight;
|
readDataset = &datasetReadLight;
|
||||||
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
|
||||||
|
if(superscalar)
|
||||||
|
precompileSuperscalar(programs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpretedVirtualMachine::initialize() {
|
template void InterpretedVirtualMachine<true>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||||
|
template void InterpretedVirtualMachine<false>::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||||
|
|
||||||
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::initialize() {
|
||||||
VirtualMachine::initialize();
|
VirtualMachine::initialize();
|
||||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||||
program(i).src %= RegistersCount;
|
program(i).src %= RegistersCount;
|
||||||
|
@ -63,12 +67,19 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
template void InterpretedVirtualMachine<true>::initialize();
|
||||||
|
template void InterpretedVirtualMachine<false>::initialize();
|
||||||
|
|
||||||
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||||
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
|
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
|
||||||
executeBytecode(ic, r, f, e, a);
|
executeBytecode(ic, r, f, e, a);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void InterpretedVirtualMachine<true>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||||
|
template void InterpretedVirtualMachine<false>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||||
|
|
||||||
static void print(int_reg_t r) {
|
static void print(int_reg_t r) {
|
||||||
std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
|
std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -98,14 +109,15 @@ namespace RandomX {
|
||||||
return std::fpclassify(x) == FP_SUBNORMAL;
|
return std::fpclassify(x) == FP_SUBNORMAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
template<bool superscalar>
|
||||||
|
FORCE_INLINE void InterpretedVirtualMachine<superscalar>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||||
auto& ibc = byteCode[ic];
|
auto& ibc = byteCode[ic];
|
||||||
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||||
//if(trace) printState(r, f, e, a);
|
//if(trace) printState(r, f, e, a);
|
||||||
switch (ibc.type)
|
switch (ibc.type)
|
||||||
{
|
{
|
||||||
case InstructionType::IADD_R: {
|
case InstructionType::IADD_RS: {
|
||||||
*ibc.idst += *ibc.isrc;
|
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case InstructionType::IADD_M: {
|
case InstructionType::IADD_M: {
|
||||||
|
@ -289,7 +301,8 @@ namespace RandomX {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpretedVirtualMachine::execute() {
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::execute() {
|
||||||
int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
__m128d f[4];
|
__m128d f[4];
|
||||||
__m128d e[4];
|
__m128d e[4];
|
||||||
|
@ -350,11 +363,16 @@ namespace RandomX {
|
||||||
|
|
||||||
mem.mx ^= r[readReg2] ^ r[readReg3];
|
mem.mx ^= r[readReg2] ^ r[readReg3];
|
||||||
mem.mx &= CacheLineAlignMask;
|
mem.mx &= CacheLineAlignMask;
|
||||||
|
if (superscalar) {
|
||||||
|
executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);
|
||||||
|
}
|
||||||
|
else {
|
||||||
Cache& cache = mem.ds.cache;
|
Cache& cache = mem.ds.cache;
|
||||||
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
|
||||||
initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
|
initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
|
||||||
for (int i = 0; i < RegistersCount; ++i)
|
for (int i = 0; i < RegistersCount; ++i)
|
||||||
r[i] ^= datasetLine[i];
|
r[i] ^= datasetLine[i];
|
||||||
|
}
|
||||||
std::swap(mem.mx, mem.ma);
|
std::swap(mem.mx, mem.ma);
|
||||||
|
|
||||||
if (trace) {
|
if (trace) {
|
||||||
|
@ -419,6 +437,9 @@ namespace RandomX {
|
||||||
_mm_store_pd(®.e[3].lo, e[3]);
|
_mm_store_pd(®.e[3].lo, e[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void InterpretedVirtualMachine<true>::execute();
|
||||||
|
template void InterpretedVirtualMachine<false>::execute();
|
||||||
|
|
||||||
static int getConditionRegister(int(®isterUsage)[8]) {
|
static int getConditionRegister(int(®isterUsage)[8]) {
|
||||||
int min = INT_MAX;
|
int min = INT_MAX;
|
||||||
int minIndex;
|
int minIndex;
|
||||||
|
@ -431,9 +452,118 @@ namespace RandomX {
|
||||||
return minIndex;
|
return minIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
|
||||||
|
constexpr uint64_t superscalarAdd1 = 9298410992540426048ULL;
|
||||||
|
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
|
||||||
|
constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
|
||||||
|
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
|
||||||
|
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
|
||||||
|
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
|
||||||
|
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
|
||||||
|
|
||||||
|
static uint8_t* getMixBlock(uint64_t registerValue, Cache& cache) {
|
||||||
|
uint8_t* mixBlock;
|
||||||
|
if (RANDOMX_ARGON_GROWTH == 0) {
|
||||||
|
constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
|
||||||
|
mixBlock = cache.memory + (registerValue & mask) * CacheLineSize;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
const uint32_t modulus = cache.size / CacheLineSize;
|
||||||
|
mixBlock = cache.memory + (registerValue % modulus) * CacheLineSize;
|
||||||
|
}
|
||||||
|
return mixBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]) {
|
||||||
|
int_reg_t rl[8];
|
||||||
|
uint8_t* mixBlock;
|
||||||
|
uint64_t registerValue = blockNumber;
|
||||||
|
rl[0] = (blockNumber + 1) * superscalarMul0;
|
||||||
|
rl[1] = rl[0] ^ superscalarAdd1;
|
||||||
|
rl[2] = rl[0] ^ superscalarAdd2;
|
||||||
|
rl[3] = rl[0] ^ superscalarAdd3;
|
||||||
|
rl[4] = rl[0] ^ superscalarAdd4;
|
||||||
|
rl[5] = rl[0] ^ superscalarAdd5;
|
||||||
|
rl[6] = rl[0] ^ superscalarAdd6;
|
||||||
|
rl[7] = rl[0] ^ superscalarAdd7;
|
||||||
|
Cache& cache = mem.ds.cache;
|
||||||
|
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||||
|
mixBlock = getMixBlock(registerValue, cache);
|
||||||
|
LightProgram& prog = superScalarPrograms[i];
|
||||||
|
for (unsigned j = 0; j < prog.getSize(); ++j) {
|
||||||
|
Instruction& instr = prog(j);
|
||||||
|
switch (instr.opcode)
|
||||||
|
{
|
||||||
|
case RandomX::LightInstructionType::ISUB_R:
|
||||||
|
rl[instr.dst] -= rl[instr.src];
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IXOR_R:
|
||||||
|
rl[instr.dst] ^= rl[instr.src];
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IADD_RS:
|
||||||
|
rl[instr.dst] += rl[instr.src] << (instr.mod % 4);
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IMUL_R:
|
||||||
|
rl[instr.dst] *= rl[instr.src];
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IROR_C:
|
||||||
|
rl[instr.dst] = rotr(rl[instr.dst], instr.getImm32());
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IADD_C7:
|
||||||
|
case RandomX::LightInstructionType::IADD_C8:
|
||||||
|
case RandomX::LightInstructionType::IADD_C9:
|
||||||
|
rl[instr.dst] += signExtend2sCompl(instr.getImm32());
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IXOR_C7:
|
||||||
|
case RandomX::LightInstructionType::IXOR_C8:
|
||||||
|
case RandomX::LightInstructionType::IXOR_C9:
|
||||||
|
rl[instr.dst] ^= signExtend2sCompl(instr.getImm32());
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IMULH_R:
|
||||||
|
rl[instr.dst] = mulh(rl[instr.dst], rl[instr.src]);
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::ISMULH_R:
|
||||||
|
rl[instr.dst] = smulh(rl[instr.dst], rl[instr.src]);
|
||||||
|
break;
|
||||||
|
case RandomX::LightInstructionType::IMUL_RCP:
|
||||||
|
rl[instr.dst] *= reciprocals[instr.getImm32()];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(unsigned q = 0; q < 8; ++q)
|
||||||
|
rl[q] ^= load64(mixBlock + 8 * q);
|
||||||
|
|
||||||
|
registerValue = rl[prog.getAddressRegister()];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned q = 0; q < 8; ++q)
|
||||||
|
r[q] ^= rl[q];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::precompileSuperscalar(LightProgram* programs) {
|
||||||
|
memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
|
||||||
|
reciprocals.clear();
|
||||||
|
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||||
|
for (unsigned j = 0; j < superScalarPrograms[i].getSize(); ++j) {
|
||||||
|
Instruction& instr = superScalarPrograms[i](j);
|
||||||
|
if (instr.opcode == LightInstructionType::IMUL_RCP) {
|
||||||
|
auto rcp = reciprocal(instr.getImm32());
|
||||||
|
instr.setImm32(reciprocals.size());
|
||||||
|
reciprocals.push_back(rcp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
|
|
||||||
void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
template<bool superscalar>
|
||||||
|
void InterpretedVirtualMachine<superscalar>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||||
int registerUsage[8];
|
int registerUsage[8];
|
||||||
for (unsigned i = 0; i < 8; ++i) {
|
for (unsigned i = 0; i < 8; ++i) {
|
||||||
registerUsage[i] = -1;
|
registerUsage[i] = -1;
|
||||||
|
@ -445,14 +575,17 @@ namespace RandomX {
|
||||||
CASE_REP(IADD_RS) {
|
CASE_REP(IADD_RS) {
|
||||||
auto dst = instr.dst % RegistersCount;
|
auto dst = instr.dst % RegistersCount;
|
||||||
auto src = instr.src % RegistersCount;
|
auto src = instr.src % RegistersCount;
|
||||||
ibc.type = InstructionType::IADD_R;
|
ibc.type = InstructionType::IADD_RS;
|
||||||
ibc.idst = &r[dst];
|
ibc.idst = &r[dst];
|
||||||
if (src != dst) {
|
if (dst != 5) {
|
||||||
ibc.isrc = &r[src];
|
ibc.isrc = &r[src];
|
||||||
|
ibc.shift = instr.mod % 4;
|
||||||
|
ibc.imm = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
ibc.isrc = &r[src];
|
||||||
|
ibc.shift = instr.mod % 4;
|
||||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||||
ibc.isrc = &ibc.imm;
|
|
||||||
}
|
}
|
||||||
registerUsage[instr.dst] = i;
|
registerUsage[instr.dst] = i;
|
||||||
} break;
|
} break;
|
||||||
|
|
|
@ -23,23 +23,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#include "VirtualMachine.hpp"
|
#include "VirtualMachine.hpp"
|
||||||
#include "Program.hpp"
|
#include "Program.hpp"
|
||||||
#include "intrinPortable.h"
|
#include "intrinPortable.h"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
class ITransform {
|
|
||||||
public:
|
|
||||||
virtual int32_t apply(int32_t) const = 0;
|
|
||||||
virtual const char* getName() const = 0;
|
|
||||||
virtual std::ostream& printAsm(std::ostream&) const = 0;
|
|
||||||
virtual std::ostream& printCxx(std::ostream&) const = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct InstructionByteCode;
|
struct InstructionByteCode;
|
||||||
class InterpretedVirtualMachine;
|
template<bool superscalar> class InterpretedVirtualMachine;
|
||||||
|
|
||||||
typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&);
|
template<bool superscalar>
|
||||||
|
using InstructionHandler = void(InterpretedVirtualMachine<superscalar>::*)(Instruction&);
|
||||||
|
|
||||||
struct alignas(8) InstructionByteCode {
|
struct InstructionByteCode {
|
||||||
union {
|
union {
|
||||||
int_reg_t* idst;
|
int_reg_t* idst;
|
||||||
__m128d* fdst;
|
__m128d* fdst;
|
||||||
|
@ -62,6 +56,7 @@ namespace RandomX {
|
||||||
|
|
||||||
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
|
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
|
||||||
|
|
||||||
|
template<bool superscalar>
|
||||||
class InterpretedVirtualMachine : public VirtualMachine {
|
class InterpretedVirtualMachine : public VirtualMachine {
|
||||||
public:
|
public:
|
||||||
void* operator new(size_t size) {
|
void* operator new(size_t size) {
|
||||||
|
@ -74,16 +69,17 @@ namespace RandomX {
|
||||||
_mm_free(ptr);
|
_mm_free(ptr);
|
||||||
}
|
}
|
||||||
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
|
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
|
||||||
~InterpretedVirtualMachine();
|
~InterpretedVirtualMachine() {}
|
||||||
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
|
||||||
void initialize() override;
|
void initialize() override;
|
||||||
void execute() override;
|
void execute() override;
|
||||||
private:
|
private:
|
||||||
static InstructionHandler engine[256];
|
static InstructionHandler<superscalar> engine[256];
|
||||||
DatasetReadFunc readDataset;
|
DatasetReadFunc readDataset;
|
||||||
bool softAes;
|
bool softAes;
|
||||||
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
|
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
|
||||||
|
std::vector<uint64_t> reciprocals;
|
||||||
|
alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
int count_ADD_64 = 0;
|
int count_ADD_64 = 0;
|
||||||
int count_ADD_32 = 0;
|
int count_ADD_32 = 0;
|
||||||
|
@ -131,7 +127,9 @@ namespace RandomX {
|
||||||
int datasetAccess[256] = { 0 };
|
int datasetAccess[256] = { 0 };
|
||||||
#endif
|
#endif
|
||||||
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||||
|
void precompileSuperscalar(LightProgram*);
|
||||||
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||||
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||||
|
void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
|
||||||
};
|
};
|
||||||
}
|
}
|
|
@ -32,10 +32,18 @@
|
||||||
.global DECL(randomx_program_start)
|
.global DECL(randomx_program_start)
|
||||||
.global DECL(randomx_program_read_dataset)
|
.global DECL(randomx_program_read_dataset)
|
||||||
.global DECL(randomx_program_read_dataset_light)
|
.global DECL(randomx_program_read_dataset_light)
|
||||||
|
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||||
|
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||||
|
.global DECL(randomx_program_read_dataset_light_sub)
|
||||||
|
.global DECL(randomx_dataset_init)
|
||||||
.global DECL(randomx_program_loop_store)
|
.global DECL(randomx_program_loop_store)
|
||||||
.global DECL(randomx_program_loop_end)
|
.global DECL(randomx_program_loop_end)
|
||||||
.global DECL(randomx_program_read_dataset_light_sub)
|
.global DECL(randomx_program_read_dataset_light_sub)
|
||||||
.global DECL(randomx_program_epilogue)
|
.global DECL(randomx_program_epilogue)
|
||||||
|
.global DECL(randomx_sshash_load)
|
||||||
|
.global DECL(randomx_sshash_prefetch)
|
||||||
|
.global DECL(randomx_sshash_end)
|
||||||
|
.global DECL(randomx_sshash_init)
|
||||||
.global DECL(randomx_program_end)
|
.global DECL(randomx_program_end)
|
||||||
|
|
||||||
#define db .byte
|
#define db .byte
|
||||||
|
@ -63,6 +71,12 @@ DECL(randomx_program_read_dataset):
|
||||||
DECL(randomx_program_read_dataset_light):
|
DECL(randomx_program_read_dataset_light):
|
||||||
#include "asm/program_read_dataset_light.inc"
|
#include "asm/program_read_dataset_light.inc"
|
||||||
|
|
||||||
|
DECL(randomx_program_read_dataset_sshash_init):
|
||||||
|
#include "asm/program_read_dataset_sshash_init.inc"
|
||||||
|
|
||||||
|
DECL(randomx_program_read_dataset_sshash_fin):
|
||||||
|
#include "asm/program_read_dataset_sshash_fin.inc"
|
||||||
|
|
||||||
DECL(randomx_program_loop_store):
|
DECL(randomx_program_loop_store):
|
||||||
#include "asm/program_loop_store.inc"
|
#include "asm/program_loop_store.inc"
|
||||||
|
|
||||||
|
@ -75,10 +89,84 @@ DECL(randomx_program_read_dataset_light_sub):
|
||||||
squareHashSub:
|
squareHashSub:
|
||||||
#include "asm/squareHash.inc"
|
#include "asm/squareHash.inc"
|
||||||
|
|
||||||
|
.balign 64
|
||||||
|
DECL(randomx_dataset_init):
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
;# cache in rdi
|
||||||
|
;# dataset in rsi
|
||||||
|
mov rbp, rdx ;# block index
|
||||||
|
push rcx ;# max. block index
|
||||||
|
init_block_loop:
|
||||||
|
prefetchw byte ptr [rsi]
|
||||||
|
mov rbx, rbp
|
||||||
|
.byte 232 ;# 0xE8 = call
|
||||||
|
;# .set CALL_LOC,
|
||||||
|
.int 32768 - (call_offset - DECL(randomx_dataset_init))
|
||||||
|
call_offset:
|
||||||
|
mov qword ptr [rsi+0], r8
|
||||||
|
mov qword ptr [rsi+8], r9
|
||||||
|
mov qword ptr [rsi+16], r10
|
||||||
|
mov qword ptr [rsi+24], r11
|
||||||
|
mov qword ptr [rsi+32], r12
|
||||||
|
mov qword ptr [rsi+40], r13
|
||||||
|
mov qword ptr [rsi+48], r14
|
||||||
|
mov qword ptr [rsi+56], r15
|
||||||
|
add rbp, 1
|
||||||
|
add rsi, 64
|
||||||
|
cmp rbp, qword ptr [rsp]
|
||||||
|
jb init_block_loop
|
||||||
|
pop rcx
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
ret
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
DECL(randomx_program_epilogue):
|
DECL(randomx_program_epilogue):
|
||||||
#include "asm/program_epilogue_linux.inc"
|
#include "asm/program_epilogue_linux.inc"
|
||||||
|
|
||||||
|
.balign 64
|
||||||
|
DECL(randomx_sshash_load):
|
||||||
|
#include "asm/program_sshash_load.inc"
|
||||||
|
|
||||||
|
DECL(randomx_sshash_prefetch):
|
||||||
|
#include "asm/program_sshash_prefetch.inc"
|
||||||
|
|
||||||
|
DECL(randomx_sshash_end):
|
||||||
|
nop
|
||||||
|
|
||||||
|
.balign 64
|
||||||
|
DECL(randomx_sshash_init):
|
||||||
|
lea r8, [rbx+1]
|
||||||
|
#include "asm/program_sshash_prefetch.inc"
|
||||||
|
imul r8, qword ptr r0_mul[rip]
|
||||||
|
mov r9, qword ptr r1_add[rip]
|
||||||
|
xor r9, r8
|
||||||
|
mov r10, qword ptr r2_add[rip]
|
||||||
|
xor r10, r8
|
||||||
|
mov r11, qword ptr r3_add[rip]
|
||||||
|
xor r11, r8
|
||||||
|
mov r12, qword ptr r4_add[rip]
|
||||||
|
xor r12, r8
|
||||||
|
mov r13, qword ptr r5_add[rip]
|
||||||
|
xor r13, r8
|
||||||
|
mov r14, qword ptr r6_add[rip]
|
||||||
|
xor r14, r8
|
||||||
|
mov r15, qword ptr r7_add[rip]
|
||||||
|
xor r15, r8
|
||||||
|
jmp DECL(randomx_program_end)
|
||||||
|
|
||||||
|
.balign 64
|
||||||
|
#include "asm/program_sshash_constants.inc"
|
||||||
|
|
||||||
.balign 64
|
.balign 64
|
||||||
DECL(randomx_program_end):
|
DECL(randomx_program_end):
|
||||||
nop
|
nop
|
||||||
|
|
|
@ -68,35 +68,11 @@ randomx_program_read_dataset_light PROC
|
||||||
randomx_program_read_dataset_light ENDP
|
randomx_program_read_dataset_light ENDP
|
||||||
|
|
||||||
randomx_program_read_dataset_sshash_init PROC
|
randomx_program_read_dataset_sshash_init PROC
|
||||||
sub rsp, 72
|
include asm/program_read_dataset_sshash_init.inc
|
||||||
mov qword ptr [rsp+64], rbx
|
|
||||||
mov qword ptr [rsp+56], r8
|
|
||||||
mov qword ptr [rsp+48], r9
|
|
||||||
mov qword ptr [rsp+40], r10
|
|
||||||
mov qword ptr [rsp+32], r11
|
|
||||||
mov qword ptr [rsp+24], r12
|
|
||||||
mov qword ptr [rsp+16], r13
|
|
||||||
mov qword ptr [rsp+8], r14
|
|
||||||
mov qword ptr [rsp+0], r15
|
|
||||||
xor rbp, rax ;# modify "mx"
|
|
||||||
ror rbp, 32 ;# swap "ma" and "mx"
|
|
||||||
mov ebx, ebp ;# ecx = ma
|
|
||||||
and ebx, 2147483584 ;# align "ma" to the start of a cache line
|
|
||||||
shr ebx, 6 ;# ebx = Dataset block number
|
|
||||||
;# call 32768
|
|
||||||
randomx_program_read_dataset_sshash_init ENDP
|
randomx_program_read_dataset_sshash_init ENDP
|
||||||
|
|
||||||
randomx_program_read_dataset_sshash_fin PROC
|
randomx_program_read_dataset_sshash_fin PROC
|
||||||
mov rbx, qword ptr [rsp+64]
|
include asm/program_read_dataset_sshash_fin.inc
|
||||||
xor r8, qword ptr [rsp+56]
|
|
||||||
xor r9, qword ptr [rsp+48]
|
|
||||||
xor r10, qword ptr [rsp+40]
|
|
||||||
xor r11, qword ptr [rsp+32]
|
|
||||||
xor r12, qword ptr [rsp+24]
|
|
||||||
xor r13, qword ptr [rsp+16]
|
|
||||||
xor r14, qword ptr [rsp+8]
|
|
||||||
xor r15, qword ptr [rsp+0]
|
|
||||||
add rsp, 72
|
|
||||||
randomx_program_read_dataset_sshash_fin ENDP
|
randomx_program_read_dataset_sshash_fin ENDP
|
||||||
|
|
||||||
randomx_program_loop_store PROC
|
randomx_program_loop_store PROC
|
||||||
|
|
|
@ -17,10 +17,11 @@ You should have received a copy of the GNU General Public License
|
||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
#include "blake2/blake2.h"
|
#include "blake2/blake2.h"
|
||||||
#include "configuration.h"
|
#include "configuration.h"
|
||||||
#include "Program.hpp"
|
#include "Program.hpp"
|
||||||
#include "blake2/endian.h";
|
#include "blake2/endian.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -793,7 +794,7 @@ namespace RandomX {
|
||||||
mop.setCycle(scheduleCycle);
|
mop.setCycle(scheduleCycle);
|
||||||
if (scheduleCycle < 0) {
|
if (scheduleCycle < 0) {
|
||||||
if (TRACE) std::cout << "; Failed at cycle " << cycle << std::endl;
|
if (TRACE) std::cout << "; Failed at cycle " << cycle << std::endl;
|
||||||
return DBL_MIN;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instrIndex == currentInstruction.getInfo().getSrcOp()) {
|
if (instrIndex == currentInstruction.getInfo().getSrcOp()) {
|
||||||
|
|
10
src/asm/program_read_dataset_sshash_fin.inc
Normal file
10
src/asm/program_read_dataset_sshash_fin.inc
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
mov rbx, qword ptr [rsp+64]
|
||||||
|
xor r8, qword ptr [rsp+56]
|
||||||
|
xor r9, qword ptr [rsp+48]
|
||||||
|
xor r10, qword ptr [rsp+40]
|
||||||
|
xor r11, qword ptr [rsp+32]
|
||||||
|
xor r12, qword ptr [rsp+24]
|
||||||
|
xor r13, qword ptr [rsp+16]
|
||||||
|
xor r14, qword ptr [rsp+8]
|
||||||
|
xor r15, qword ptr [rsp+0]
|
||||||
|
add rsp, 72
|
16
src/asm/program_read_dataset_sshash_init.inc
Normal file
16
src/asm/program_read_dataset_sshash_init.inc
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
sub rsp, 72
|
||||||
|
mov qword ptr [rsp+64], rbx
|
||||||
|
mov qword ptr [rsp+56], r8
|
||||||
|
mov qword ptr [rsp+48], r9
|
||||||
|
mov qword ptr [rsp+40], r10
|
||||||
|
mov qword ptr [rsp+32], r11
|
||||||
|
mov qword ptr [rsp+24], r12
|
||||||
|
mov qword ptr [rsp+16], r13
|
||||||
|
mov qword ptr [rsp+8], r14
|
||||||
|
mov qword ptr [rsp+0], r15
|
||||||
|
xor rbp, rax ;# modify "mx"
|
||||||
|
ror rbp, 32 ;# swap "ma" and "mx"
|
||||||
|
mov ebx, ebp ;# ecx = ma
|
||||||
|
and ebx, 2147483584 ;# align "ma" to the start of a cache line
|
||||||
|
shr ebx, 6 ;# ebx = Dataset block number
|
||||||
|
;# call 32768
|
|
@ -1,16 +1,24 @@
|
||||||
r0_mul: ;# 6364136223846793005
|
r0_mul:
|
||||||
|
;#/ 6364136223846793005
|
||||||
db 45, 127, 149, 76, 45, 244, 81, 88
|
db 45, 127, 149, 76, 45, 244, 81, 88
|
||||||
r1_add: ;# 9298410992540426048
|
r1_add:
|
||||||
|
;#/ 9298410992540426048
|
||||||
db 64, 159, 245, 89, 136, 151, 10, 129
|
db 64, 159, 245, 89, 136, 151, 10, 129
|
||||||
r2_add: ;# 12065312585734608966
|
r2_add:
|
||||||
|
;#/ 12065312585734608966
|
||||||
db 70, 216, 194, 56, 223, 153, 112, 167
|
db 70, 216, 194, 56, 223, 153, 112, 167
|
||||||
r3_add: ;# 9306329213124610396
|
r3_add:
|
||||||
|
;#/ 9306329213124610396
|
||||||
db 92, 9, 34, 191, 28, 185, 38, 129
|
db 92, 9, 34, 191, 28, 185, 38, 129
|
||||||
r4_add: ;# 5281919268842080866
|
r4_add:
|
||||||
|
;#/ 5281919268842080866
|
||||||
db 98, 138, 159, 23, 151, 37, 77, 73
|
db 98, 138, 159, 23, 151, 37, 77, 73
|
||||||
r5_add: ;# 10536153434571861004
|
r5_add:
|
||||||
|
;#/ 10536153434571861004
|
||||||
db 12, 236, 170, 206, 185, 239, 55, 146
|
db 12, 236, 170, 206, 185, 239, 55, 146
|
||||||
r6_add: ;# 3398623926847679864
|
r6_add:
|
||||||
|
;#/ 3398623926847679864
|
||||||
db 120, 45, 230, 108, 116, 86, 42, 47
|
db 120, 45, 230, 108, 116, 86, 42, 47
|
||||||
r7_add: ;# 9549104520008361294
|
r7_add:
|
||||||
|
;#/ 9549104520008361294
|
||||||
db 78, 229, 44, 182, 247, 59, 133, 132
|
db 78, 229, 44, 182, 247, 59, 133, 132
|
|
@ -301,6 +301,7 @@ int main(int argc, char** argv) {
|
||||||
RandomX::JitCompilerX86 jit86;
|
RandomX::JitCompilerX86 jit86;
|
||||||
jit86.generateSuperScalarHash(programs);
|
jit86.generateSuperScalarHash(programs);
|
||||||
jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount);
|
jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount);
|
||||||
|
//dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (initThreadCount > 1) {
|
if (initThreadCount > 1) {
|
||||||
|
@ -333,8 +334,10 @@ int main(int argc, char** argv) {
|
||||||
vm = new RandomX::CompiledLightVirtualMachine<true>();
|
vm = new RandomX::CompiledLightVirtualMachine<true>();
|
||||||
else if (jit)
|
else if (jit)
|
||||||
vm = new RandomX::CompiledLightVirtualMachine<false>();
|
vm = new RandomX::CompiledLightVirtualMachine<false>();
|
||||||
|
else if (useSuperscalar)
|
||||||
|
vm = new RandomX::InterpretedVirtualMachine<true>(softAes);
|
||||||
else
|
else
|
||||||
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
vm = new RandomX::InterpretedVirtualMachine<false>(softAes);
|
||||||
}
|
}
|
||||||
vm->setDataset(dataset, datasetSize, programs);
|
vm->setDataset(dataset, datasetSize, programs);
|
||||||
vms.push_back(vm);
|
vms.push_back(vm);
|
||||||
|
|
Loading…
Reference in a new issue