diff --git a/makefile b/makefile
index cd49f88..7dde5ae 100644
--- a/makefile
+++ b/makefile
@@ -9,7 +9,7 @@ OBJDIR=obj
LDFLAGS=-lpthread
CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
-ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o)
+ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o LightProgramGenerator.o)
ifeq ($(PLATFORM),amd64)
ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o
CXXFLAGS += -maes
@@ -99,6 +99,9 @@ $(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtual
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
+
+$(OBJDIR)/LightProgramGenerator.o: $(addprefix $(SRCDIR)/,LightProgramGenerator.cpp LightProgramGenerator.hpp Program.hpp blake2/blake2.h blake2/endian.h configuration.h) | $(OBJDIR)
+ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightProgramGenerator.cpp -o $@
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h configuration.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
diff --git a/src/Instruction.hpp b/src/Instruction.hpp
index 65d1c8a..9baf8ce 100644
--- a/src/Instruction.hpp
+++ b/src/Instruction.hpp
@@ -30,7 +30,7 @@ namespace RandomX {
typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const;
namespace InstructionType {
- constexpr int IADD_R = 0;
+ constexpr int IADD_RS = 0;
constexpr int IADD_M = 1;
constexpr int IADD_RC = 2;
constexpr int ISUB_R = 3;
diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp
index 636b95b..7ee00ba 100644
--- a/src/InterpretedVirtualMachine.cpp
+++ b/src/InterpretedVirtualMachine.cpp
@@ -36,6 +36,7 @@ along with RandomX. If not, see.
#ifdef STATS
#include
#endif
+#include "LightProgramGenerator.hpp"
#ifdef FPUCHECK
constexpr bool fpuCheck = true;
@@ -45,17 +46,20 @@ constexpr bool fpuCheck = false;
namespace RandomX {
- InterpretedVirtualMachine::~InterpretedVirtualMachine() {
-
- }
-
- void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
+ template
+ void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) {
mem.ds = ds;
readDataset = &datasetReadLight;
datasetRange = (size - RANDOMX_DATASET_SIZE + CacheLineSize) / CacheLineSize;
+ if(superscalar)
+ precompileSuperscalar(programs);
}
- void InterpretedVirtualMachine::initialize() {
+ template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+ template void InterpretedVirtualMachine::setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
+
+ template
+ void InterpretedVirtualMachine::initialize() {
VirtualMachine::initialize();
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
program(i).src %= RegistersCount;
@@ -63,12 +67,19 @@ namespace RandomX {
}
}
- void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
+ template void InterpretedVirtualMachine::initialize();
+ template void InterpretedVirtualMachine::initialize();
+
+ template
+ void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
executeBytecode(ic, r, f, e, a);
}
}
+ template void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+ template void InterpretedVirtualMachine::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+
static void print(int_reg_t r) {
std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
}
@@ -98,14 +109,15 @@ namespace RandomX {
return std::fpclassify(x) == FP_SUBNORMAL;
}
- FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
+ template
+ FORCE_INLINE void InterpretedVirtualMachine::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic];
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
//if(trace) printState(r, f, e, a);
switch (ibc.type)
{
- case InstructionType::IADD_R: {
- *ibc.idst += *ibc.isrc;
+ case InstructionType::IADD_RS: {
+ *ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
} break;
case InstructionType::IADD_M: {
@@ -289,7 +301,8 @@ namespace RandomX {
#endif
}
- void InterpretedVirtualMachine::execute() {
+ template
+ void InterpretedVirtualMachine::execute() {
int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
__m128d f[4];
__m128d e[4];
@@ -350,11 +363,16 @@ namespace RandomX {
mem.mx ^= r[readReg2] ^ r[readReg3];
mem.mx &= CacheLineAlignMask;
- Cache& cache = mem.ds.cache;
- uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
- initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
- for (int i = 0; i < RegistersCount; ++i)
- r[i] ^= datasetLine[i];
+ if (superscalar) {
+ executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r);
+ }
+ else {
+ Cache& cache = mem.ds.cache;
+ uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
+ initBlock(cache, (uint8_t*)datasetLine, datasetBase + mem.ma / CacheLineSize, RANDOMX_CACHE_ACCESSES / 8);
+ for (int i = 0; i < RegistersCount; ++i)
+ r[i] ^= datasetLine[i];
+ }
std::swap(mem.mx, mem.ma);
if (trace) {
@@ -419,6 +437,9 @@ namespace RandomX {
_mm_store_pd(®.e[3].lo, e[3]);
}
+ template void InterpretedVirtualMachine::execute();
+ template void InterpretedVirtualMachine::execute();
+
static int getConditionRegister(int(®isterUsage)[8]) {
int min = INT_MAX;
int minIndex;
@@ -431,9 +452,118 @@ namespace RandomX {
return minIndex;
}
+ constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
+ constexpr uint64_t superscalarAdd1 = 9298410992540426048ULL;
+ constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
+ constexpr uint64_t superscalarAdd3 = 9306329213124610396ULL;
+ constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
+ constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
+ constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
+ constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
+
+ static uint8_t* getMixBlock(uint64_t registerValue, Cache& cache) {
+ uint8_t* mixBlock;
+ if (RANDOMX_ARGON_GROWTH == 0) {
+ constexpr uint32_t mask = (RANDOMX_ARGON_MEMORY * ArgonBlockSize / CacheLineSize - 1);
+ mixBlock = cache.memory + (registerValue & mask) * CacheLineSize;
+ }
+ else {
+ const uint32_t modulus = cache.size / CacheLineSize;
+ mixBlock = cache.memory + (registerValue % modulus) * CacheLineSize;
+ }
+ return mixBlock;
+ }
+
+ template
+ void InterpretedVirtualMachine::executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]) {
+ int_reg_t rl[8];
+ uint8_t* mixBlock;
+ uint64_t registerValue = blockNumber;
+ rl[0] = (blockNumber + 1) * superscalarMul0;
+ rl[1] = rl[0] ^ superscalarAdd1;
+ rl[2] = rl[0] ^ superscalarAdd2;
+ rl[3] = rl[0] ^ superscalarAdd3;
+ rl[4] = rl[0] ^ superscalarAdd4;
+ rl[5] = rl[0] ^ superscalarAdd5;
+ rl[6] = rl[0] ^ superscalarAdd6;
+ rl[7] = rl[0] ^ superscalarAdd7;
+ Cache& cache = mem.ds.cache;
+ for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+ mixBlock = getMixBlock(registerValue, cache);
+ LightProgram& prog = superScalarPrograms[i];
+ for (unsigned j = 0; j < prog.getSize(); ++j) {
+ Instruction& instr = prog(j);
+ switch (instr.opcode)
+ {
+ case RandomX::LightInstructionType::ISUB_R:
+ rl[instr.dst] -= rl[instr.src];
+ break;
+ case RandomX::LightInstructionType::IXOR_R:
+ rl[instr.dst] ^= rl[instr.src];
+ break;
+ case RandomX::LightInstructionType::IADD_RS:
+ rl[instr.dst] += rl[instr.src] << (instr.mod % 4);
+ break;
+ case RandomX::LightInstructionType::IMUL_R:
+ rl[instr.dst] *= rl[instr.src];
+ break;
+ case RandomX::LightInstructionType::IROR_C:
+ rl[instr.dst] = rotr(rl[instr.dst], instr.getImm32());
+ break;
+ case RandomX::LightInstructionType::IADD_C7:
+ case RandomX::LightInstructionType::IADD_C8:
+ case RandomX::LightInstructionType::IADD_C9:
+ rl[instr.dst] += signExtend2sCompl(instr.getImm32());
+ break;
+ case RandomX::LightInstructionType::IXOR_C7:
+ case RandomX::LightInstructionType::IXOR_C8:
+ case RandomX::LightInstructionType::IXOR_C9:
+ rl[instr.dst] ^= signExtend2sCompl(instr.getImm32());
+ break;
+ case RandomX::LightInstructionType::IMULH_R:
+ rl[instr.dst] = mulh(rl[instr.dst], rl[instr.src]);
+ break;
+ case RandomX::LightInstructionType::ISMULH_R:
+ rl[instr.dst] = smulh(rl[instr.dst], rl[instr.src]);
+ break;
+ case RandomX::LightInstructionType::IMUL_RCP:
+ rl[instr.dst] *= reciprocals[instr.getImm32()];
+ break;
+ default:
+ UNREACHABLE;
+ }
+ }
+
+ for(unsigned q = 0; q < 8; ++q)
+ rl[q] ^= load64(mixBlock + 8 * q);
+
+ registerValue = rl[prog.getAddressRegister()];
+ }
+
+ for (unsigned q = 0; q < 8; ++q)
+ r[q] ^= rl[q];
+ }
+
+ template
+ void InterpretedVirtualMachine::precompileSuperscalar(LightProgram* programs) {
+ memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms));
+ reciprocals.clear();
+ for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+ for (unsigned j = 0; j < superScalarPrograms[i].getSize(); ++j) {
+ Instruction& instr = superScalarPrograms[i](j);
+ if (instr.opcode == LightInstructionType::IMUL_RCP) {
+ auto rcp = reciprocal(instr.getImm32());
+ instr.setImm32(reciprocals.size());
+ reciprocals.push_back(rcp);
+ }
+ }
+ }
+ }
+
#include "instructionWeights.hpp"
- void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
+ template
+ void InterpretedVirtualMachine::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
int registerUsage[8];
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
@@ -445,14 +575,17 @@ namespace RandomX {
CASE_REP(IADD_RS) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
- ibc.type = InstructionType::IADD_R;
+ ibc.type = InstructionType::IADD_RS;
ibc.idst = &r[dst];
- if (src != dst) {
+ if (dst != 5) {
ibc.isrc = &r[src];
+ ibc.shift = instr.mod % 4;
+ ibc.imm = 0;
}
else {
+ ibc.isrc = &r[src];
+ ibc.shift = instr.mod % 4;
ibc.imm = signExtend2sCompl(instr.getImm32());
- ibc.isrc = &ibc.imm;
}
registerUsage[instr.dst] = i;
} break;
diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp
index 49178bc..24bb9c6 100644
--- a/src/InterpretedVirtualMachine.hpp
+++ b/src/InterpretedVirtualMachine.hpp
@@ -23,23 +23,17 @@ along with RandomX. If not, see.
#include "VirtualMachine.hpp"
#include "Program.hpp"
#include "intrinPortable.h"
+#include
namespace RandomX {
- class ITransform {
- public:
- virtual int32_t apply(int32_t) const = 0;
- virtual const char* getName() const = 0;
- virtual std::ostream& printAsm(std::ostream&) const = 0;
- virtual std::ostream& printCxx(std::ostream&) const = 0;
- };
-
struct InstructionByteCode;
- class InterpretedVirtualMachine;
+ template class InterpretedVirtualMachine;
- typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&);
+ template
+ using InstructionHandler = void(InterpretedVirtualMachine::*)(Instruction&);
- struct alignas(8) InstructionByteCode {
+ struct InstructionByteCode {
union {
int_reg_t* idst;
__m128d* fdst;
@@ -62,6 +56,7 @@ namespace RandomX {
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
+ template
class InterpretedVirtualMachine : public VirtualMachine {
public:
void* operator new(size_t size) {
@@ -74,16 +69,17 @@ namespace RandomX {
_mm_free(ptr);
}
InterpretedVirtualMachine(bool soft) : softAes(soft) {}
- ~InterpretedVirtualMachine();
+ ~InterpretedVirtualMachine() {}
void setDataset(dataset_t ds, uint64_t size, LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]) override;
void initialize() override;
void execute() override;
private:
- static InstructionHandler engine[256];
+ static InstructionHandler engine[256];
DatasetReadFunc readDataset;
bool softAes;
InstructionByteCode byteCode[RANDOMX_PROGRAM_SIZE];
-
+ std::vector reciprocals;
+ alignas(64) LightProgram superScalarPrograms[RANDOMX_CACHE_ACCESSES];
#ifdef STATS
int count_ADD_64 = 0;
int count_ADD_32 = 0;
@@ -131,7 +127,9 @@ namespace RandomX {
int datasetAccess[256] = { 0 };
#endif
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+ void precompileSuperscalar(LightProgram*);
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+ void executeSuperscalar(uint32_t blockNumber, int_reg_t(&r)[8]);
};
}
\ No newline at end of file
diff --git a/src/JitCompilerX86-static.S b/src/JitCompilerX86-static.S
index 9ccdb16..e78dbe7 100644
--- a/src/JitCompilerX86-static.S
+++ b/src/JitCompilerX86-static.S
@@ -32,10 +32,18 @@
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_light)
+.global DECL(randomx_program_read_dataset_sshash_init)
+.global DECL(randomx_program_read_dataset_sshash_fin)
+.global DECL(randomx_program_read_dataset_light_sub)
+.global DECL(randomx_dataset_init)
.global DECL(randomx_program_loop_store)
.global DECL(randomx_program_loop_end)
.global DECL(randomx_program_read_dataset_light_sub)
.global DECL(randomx_program_epilogue)
+.global DECL(randomx_sshash_load)
+.global DECL(randomx_sshash_prefetch)
+.global DECL(randomx_sshash_end)
+.global DECL(randomx_sshash_init)
.global DECL(randomx_program_end)
#define db .byte
@@ -63,6 +71,12 @@ DECL(randomx_program_read_dataset):
DECL(randomx_program_read_dataset_light):
#include "asm/program_read_dataset_light.inc"
+DECL(randomx_program_read_dataset_sshash_init):
+ #include "asm/program_read_dataset_sshash_init.inc"
+
+DECL(randomx_program_read_dataset_sshash_fin):
+ #include "asm/program_read_dataset_sshash_fin.inc"
+
DECL(randomx_program_loop_store):
#include "asm/program_loop_store.inc"
@@ -75,10 +89,84 @@ DECL(randomx_program_read_dataset_light_sub):
squareHashSub:
#include "asm/squareHash.inc"
+.balign 64
+DECL(randomx_dataset_init):
+ push rbx
+ push rbp
+ push r12
+ push r13
+ push r14
+ push r15
+ ;# cache in rdi
+ ;# dataset in rsi
+ mov rbp, rdx ;# block index
+ push rcx ;# max. block index
+init_block_loop:
+ prefetchw byte ptr [rsi]
+ mov rbx, rbp
+ .byte 232 ;# 0xE8 = call
+ ;# .set CALL_LOC,
+ .int 32768 - (call_offset - DECL(randomx_dataset_init))
+call_offset:
+ mov qword ptr [rsi+0], r8
+ mov qword ptr [rsi+8], r9
+ mov qword ptr [rsi+16], r10
+ mov qword ptr [rsi+24], r11
+ mov qword ptr [rsi+32], r12
+ mov qword ptr [rsi+40], r13
+ mov qword ptr [rsi+48], r14
+ mov qword ptr [rsi+56], r15
+ add rbp, 1
+ add rsi, 64
+ cmp rbp, qword ptr [rsp]
+ jb init_block_loop
+ pop rcx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbp
+ pop rbx
+ ret
+
.balign 64
DECL(randomx_program_epilogue):
#include "asm/program_epilogue_linux.inc"
+.balign 64
+DECL(randomx_sshash_load):
+ #include "asm/program_sshash_load.inc"
+
+DECL(randomx_sshash_prefetch):
+ #include "asm/program_sshash_prefetch.inc"
+
+DECL(randomx_sshash_end):
+ nop
+
+.balign 64
+DECL(randomx_sshash_init):
+ lea r8, [rbx+1]
+ #include "asm/program_sshash_prefetch.inc"
+ imul r8, qword ptr r0_mul[rip]
+ mov r9, qword ptr r1_add[rip]
+ xor r9, r8
+ mov r10, qword ptr r2_add[rip]
+ xor r10, r8
+ mov r11, qword ptr r3_add[rip]
+ xor r11, r8
+ mov r12, qword ptr r4_add[rip]
+ xor r12, r8
+ mov r13, qword ptr r5_add[rip]
+ xor r13, r8
+ mov r14, qword ptr r6_add[rip]
+ xor r14, r8
+ mov r15, qword ptr r7_add[rip]
+ xor r15, r8
+ jmp DECL(randomx_program_end)
+
+.balign 64
+ #include "asm/program_sshash_constants.inc"
+
.balign 64
DECL(randomx_program_end):
nop
diff --git a/src/JitCompilerX86-static.asm b/src/JitCompilerX86-static.asm
index f149655..ab29312 100644
--- a/src/JitCompilerX86-static.asm
+++ b/src/JitCompilerX86-static.asm
@@ -68,35 +68,11 @@ randomx_program_read_dataset_light PROC
randomx_program_read_dataset_light ENDP
randomx_program_read_dataset_sshash_init PROC
- sub rsp, 72
- mov qword ptr [rsp+64], rbx
- mov qword ptr [rsp+56], r8
- mov qword ptr [rsp+48], r9
- mov qword ptr [rsp+40], r10
- mov qword ptr [rsp+32], r11
- mov qword ptr [rsp+24], r12
- mov qword ptr [rsp+16], r13
- mov qword ptr [rsp+8], r14
- mov qword ptr [rsp+0], r15
- xor rbp, rax ;# modify "mx"
- ror rbp, 32 ;# swap "ma" and "mx"
- mov ebx, ebp ;# ecx = ma
- and ebx, 2147483584 ;# align "ma" to the start of a cache line
- shr ebx, 6 ;# ebx = Dataset block number
- ;# call 32768
+ include asm/program_read_dataset_sshash_init.inc
randomx_program_read_dataset_sshash_init ENDP
randomx_program_read_dataset_sshash_fin PROC
- mov rbx, qword ptr [rsp+64]
- xor r8, qword ptr [rsp+56]
- xor r9, qword ptr [rsp+48]
- xor r10, qword ptr [rsp+40]
- xor r11, qword ptr [rsp+32]
- xor r12, qword ptr [rsp+24]
- xor r13, qword ptr [rsp+16]
- xor r14, qword ptr [rsp+8]
- xor r15, qword ptr [rsp+0]
- add rsp, 72
+ include asm/program_read_dataset_sshash_fin.inc
randomx_program_read_dataset_sshash_fin ENDP
randomx_program_loop_store PROC
diff --git a/src/LightProgramGenerator.cpp b/src/LightProgramGenerator.cpp
index eeb09de..97fbb91 100644
--- a/src/LightProgramGenerator.cpp
+++ b/src/LightProgramGenerator.cpp
@@ -17,10 +17,11 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
+#include
#include "blake2/blake2.h"
#include "configuration.h"
#include "Program.hpp"
-#include "blake2/endian.h";
+#include "blake2/endian.h"
#include
#include
#include
@@ -793,7 +794,7 @@ namespace RandomX {
mop.setCycle(scheduleCycle);
if (scheduleCycle < 0) {
if (TRACE) std::cout << "; Failed at cycle " << cycle << std::endl;
- return DBL_MIN;
+ return 0;
}
if (instrIndex == currentInstruction.getInfo().getSrcOp()) {
diff --git a/src/asm/program_read_dataset_sshash_fin.inc b/src/asm/program_read_dataset_sshash_fin.inc
new file mode 100644
index 0000000..f5a067d
--- /dev/null
+++ b/src/asm/program_read_dataset_sshash_fin.inc
@@ -0,0 +1,10 @@
+ mov rbx, qword ptr [rsp+64]
+ xor r8, qword ptr [rsp+56]
+ xor r9, qword ptr [rsp+48]
+ xor r10, qword ptr [rsp+40]
+ xor r11, qword ptr [rsp+32]
+ xor r12, qword ptr [rsp+24]
+ xor r13, qword ptr [rsp+16]
+ xor r14, qword ptr [rsp+8]
+ xor r15, qword ptr [rsp+0]
+ add rsp, 72
\ No newline at end of file
diff --git a/src/asm/program_read_dataset_sshash_init.inc b/src/asm/program_read_dataset_sshash_init.inc
new file mode 100644
index 0000000..a186d2e
--- /dev/null
+++ b/src/asm/program_read_dataset_sshash_init.inc
@@ -0,0 +1,16 @@
+ sub rsp, 72
+ mov qword ptr [rsp+64], rbx
+ mov qword ptr [rsp+56], r8
+ mov qword ptr [rsp+48], r9
+ mov qword ptr [rsp+40], r10
+ mov qword ptr [rsp+32], r11
+ mov qword ptr [rsp+24], r12
+ mov qword ptr [rsp+16], r13
+ mov qword ptr [rsp+8], r14
+ mov qword ptr [rsp+0], r15
+ xor rbp, rax ;# modify "mx"
+ ror rbp, 32 ;# swap "ma" and "mx"
+ mov ebx, ebp ;# ecx = ma
+ and ebx, 2147483584 ;# align "ma" to the start of a cache line
+ shr ebx, 6 ;# ebx = Dataset block number
+ ;# call 32768
\ No newline at end of file
diff --git a/src/asm/program_sshash_constants.inc b/src/asm/program_sshash_constants.inc
index a25a90e..77b4ecd 100644
--- a/src/asm/program_sshash_constants.inc
+++ b/src/asm/program_sshash_constants.inc
@@ -1,16 +1,24 @@
-r0_mul: ;# 6364136223846793005
+r0_mul:
+ ;#/ 6364136223846793005
db 45, 127, 149, 76, 45, 244, 81, 88
-r1_add: ;# 9298410992540426048
+r1_add:
+ ;#/ 9298410992540426048
db 64, 159, 245, 89, 136, 151, 10, 129
-r2_add: ;# 12065312585734608966
+r2_add:
+ ;#/ 12065312585734608966
db 70, 216, 194, 56, 223, 153, 112, 167
-r3_add: ;# 9306329213124610396
+r3_add:
+ ;#/ 9306329213124610396
db 92, 9, 34, 191, 28, 185, 38, 129
-r4_add: ;# 5281919268842080866
+r4_add:
+ ;#/ 5281919268842080866
db 98, 138, 159, 23, 151, 37, 77, 73
-r5_add: ;# 10536153434571861004
+r5_add:
+ ;#/ 10536153434571861004
db 12, 236, 170, 206, 185, 239, 55, 146
-r6_add: ;# 3398623926847679864
+r6_add:
+ ;#/ 3398623926847679864
db 120, 45, 230, 108, 116, 86, 42, 47
-r7_add: ;# 9549104520008361294
+r7_add:
+ ;#/ 9549104520008361294
db 78, 229, 44, 182, 247, 59, 133, 132
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 9410881..36cd800 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -301,6 +301,7 @@ int main(int argc, char** argv) {
RandomX::JitCompilerX86 jit86;
jit86.generateSuperScalarHash(programs);
jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount);
+ //dump((const char*)dataset.dataset.memory, RANDOMX_DATASET_SIZE, "dataset.dat");
}
else {
if (initThreadCount > 1) {
@@ -331,10 +332,12 @@ int main(int argc, char** argv) {
else {
if (jit && useSuperscalar)
vm = new RandomX::CompiledLightVirtualMachine();
- else if(jit)
+ else if (jit)
vm = new RandomX::CompiledLightVirtualMachine();
+ else if (useSuperscalar)
+ vm = new RandomX::InterpretedVirtualMachine(softAes);
else
- vm = new RandomX::InterpretedVirtualMachine(softAes);
+ vm = new RandomX::InterpretedVirtualMachine(softAes);
}
vm->setDataset(dataset, datasetSize, programs);
vms.push_back(vm);