diff --git a/.gitignore b/.gitignore
index 35c1e9a..dd437d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,6 @@ obj/
*.user
*.suo
.vs
-x64
+x64/
+Release/
+Debug/
\ No newline at end of file
diff --git a/makefile b/makefile
index 5585b2b..3b39f4b 100644
--- a/makefile
+++ b/makefile
@@ -3,7 +3,7 @@
AR=gcc-ar
PLATFORM=$(shell uname -m)
CXXFLAGS=-std=c++11
-CCFLAGS=
+CCFLAGS=-std=c99
ARFLAGS=rcs
BINDIR=bin
SRCDIR=src
@@ -80,7 +80,8 @@ $(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \
- $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h
+ $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
+ $(SRCDIR)/intrin_portable.h
$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
$(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \
@@ -90,7 +91,6 @@ $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compi
$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \
$(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \
$(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \
- $(SRCDIR)/asm/program_read_dataset_light.inc \
$(SRCDIR)/asm/program_read_dataset_sshash_init.inc \
$(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \
$(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \
diff --git a/src/asm/program_read_dataset_light.inc b/src/asm/program_read_dataset_light.inc
deleted file mode 100644
index 65d2b8d..0000000
--- a/src/asm/program_read_dataset_light.inc
+++ /dev/null
@@ -1,5 +0,0 @@
- xor rbp, rax ;# modify "mx"
- ror rbp, 32 ;# swap "ma" and "mx"
- mov ecx, ebp ;# ecx = ma
- and ecx, 2147483584 ;# align "ma" to the start of a cache line
- shr ecx, 6 ;# ecx = Dataset block number
diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp
index b73f3a8..165d016 100644
--- a/src/assembly_generator_x86.cpp
+++ b/src/assembly_generator_x86.cpp
@@ -27,12 +27,12 @@ along with RandomX. If not, see.
namespace randomx {
- static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
- static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
- static const char* regFE[8] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
- static const char* regF[4] = { "xmm0", "xmm1", "xmm2", "xmm3" };
- static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
- static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
+ static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
+ static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
+ static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
+ static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
+ static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
+ static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
static const char* tempRegx = "xmm12";
static const char* mantissaMask = "xmm13";
@@ -49,7 +49,9 @@ namespace randomx {
}
asmCode.str(std::string()); //clear
for (unsigned i = 0; i < prog.getSize(); ++i) {
+#if RANDOMX_JUMP
asmCode << "randomx_isn_" << i << ":" << std::endl;
+#endif
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
@@ -469,14 +471,14 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
@@ -484,14 +486,14 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
@@ -499,20 +501,20 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl;
@@ -522,7 +524,7 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
traceflt(instr);
}
@@ -566,7 +568,7 @@ namespace randomx {
void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
const int shift = instr.getModShift();
- const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
+ const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
registerUsage[reg] = i;
@@ -579,7 +581,9 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
+#if RANDOMX_JUMP
handleCondition(instr, i);
+#endif
asmCode << "\txor ecx, ecx" << std::endl;
asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
@@ -602,7 +606,6 @@ namespace randomx {
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
- //Integer
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
@@ -620,27 +623,18 @@ namespace randomx {
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
-
- //Common floating point
INST_HANDLE(FSWAP_R)
-
- //Floating point group F
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
-
- //Floating point group E
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
-
- //Control
INST_HANDLE(COND_R)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
-
INST_HANDLE(NOP)
};
}
\ No newline at end of file
diff --git a/src/assembly_generator_x86.hpp b/src/assembly_generator_x86.hpp
index 60ea7ab..1c27364 100644
--- a/src/assembly_generator_x86.hpp
+++ b/src/assembly_generator_x86.hpp
@@ -19,6 +19,7 @@ along with RandomX. If not, see.
#pragma once
+#include "common.hpp"
#include
namespace randomx {
@@ -48,40 +49,40 @@ namespace randomx {
void traceint(Instruction&);
void traceflt(Instruction&);
void tracenop(Instruction&);
- void h_IADD_RS(Instruction&, int);
- void h_IADD_M(Instruction&, int);
- void h_ISUB_R(Instruction&, int);
- void h_ISUB_M(Instruction&, int);
- void h_IMUL_R(Instruction&, int);
- void h_IMUL_M(Instruction&, int);
- void h_IMULH_R(Instruction&, int);
- void h_IMULH_M(Instruction&, int);
- void h_ISMULH_R(Instruction&, int);
- void h_ISMULH_M(Instruction&, int);
- void h_IMUL_RCP(Instruction&, int);
- void h_ISDIV_C(Instruction&, int);
- void h_INEG_R(Instruction&, int);
- void h_IXOR_R(Instruction&, int);
- void h_IXOR_M(Instruction&, int);
- void h_IROR_R(Instruction&, int);
- void h_IROL_R(Instruction&, int);
- void h_ISWAP_R(Instruction&, int);
- void h_FSWAP_R(Instruction&, int);
- void h_FADD_R(Instruction&, int);
- void h_FADD_M(Instruction&, int);
- void h_FSUB_R(Instruction&, int);
- void h_FSUB_M(Instruction&, int);
- void h_FSCAL_R(Instruction&, int);
- void h_FMUL_R(Instruction&, int);
- void h_FDIV_M(Instruction&, int);
- void h_FSQRT_R(Instruction&, int);
- void h_COND_R(Instruction&, int);
- void h_CFROUND(Instruction&, int);
- void h_ISTORE(Instruction&, int);
- void h_NOP(Instruction&, int);
+ void h_IADD_RS(Instruction&, int);
+ void h_IADD_M(Instruction&, int);
+ void h_ISUB_R(Instruction&, int);
+ void h_ISUB_M(Instruction&, int);
+ void h_IMUL_R(Instruction&, int);
+ void h_IMUL_M(Instruction&, int);
+ void h_IMULH_R(Instruction&, int);
+ void h_IMULH_M(Instruction&, int);
+ void h_ISMULH_R(Instruction&, int);
+ void h_ISMULH_M(Instruction&, int);
+ void h_IMUL_RCP(Instruction&, int);
+ void h_ISDIV_C(Instruction&, int);
+ void h_INEG_R(Instruction&, int);
+ void h_IXOR_R(Instruction&, int);
+ void h_IXOR_M(Instruction&, int);
+ void h_IROR_R(Instruction&, int);
+ void h_IROL_R(Instruction&, int);
+ void h_ISWAP_R(Instruction&, int);
+ void h_FSWAP_R(Instruction&, int);
+ void h_FADD_R(Instruction&, int);
+ void h_FADD_M(Instruction&, int);
+ void h_FSUB_R(Instruction&, int);
+ void h_FSUB_M(Instruction&, int);
+ void h_FSCAL_R(Instruction&, int);
+ void h_FMUL_R(Instruction&, int);
+ void h_FDIV_M(Instruction&, int);
+ void h_FSQRT_R(Instruction&, int);
+ void h_COND_R(Instruction&, int);
+ void h_CFROUND(Instruction&, int);
+ void h_ISTORE(Instruction&, int);
+ void h_NOP(Instruction&, int);
static InstructionGenerator engine[256];
std::stringstream asmCode;
- int registerUsage[8];
+ int registerUsage[RegistersCount];
};
}
\ No newline at end of file
diff --git a/src/common.hpp b/src/common.hpp
index f7a6b1a..3c483bf 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -51,8 +51,6 @@ namespace randomx {
static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
- using addr_t = uint32_t;
-
constexpr int ArgonBlockSize = 1024;
constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
@@ -78,6 +76,10 @@ namespace randomx {
#endif
#endif
+#define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0)
+
+ using addr_t = uint32_t;
+
using int_reg_t = uint64_t;
struct fpu_reg_t {
@@ -95,6 +97,7 @@ namespace randomx {
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
constexpr int RegistersCount = 8;
+ constexpr int RegisterCountFlt = RegistersCount / 2;
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
constexpr int RegisterNeedsSib = 4; //x86 r12 register
@@ -118,5 +121,3 @@ namespace randomx {
typedef void(*CacheDeallocFunc)(randomx_cache*);
typedef void(*CacheInitializeFunc)(randomx_cache*, const void*, size_t);
}
-
-std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf);
diff --git a/src/configuration.h b/src/configuration.h
index e25b061..d155e4e 100644
--- a/src/configuration.h
+++ b/src/configuration.h
@@ -34,7 +34,10 @@ along with RandomX. If not, see.
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
+//Target latency for SuperscalarHash (in cycles of the reference CPU).
#define RANDOMX_SUPERSCALAR_LATENCY 170
+
+//The maximum size of a SuperscalarHash program (number of instructions).
#define RANDOMX_SUPERSCALAR_MAX_SIZE 512
//Dataset base size in bytes. Must be a power of 2.
@@ -61,8 +64,8 @@ along with RandomX. If not, see.
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
-//How many register bits must be zero for a jump condition to be triggered
-#define RANDOMX_CONDITION_BITS 7
+//How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled.
+#define RANDOMX_JUMP_BITS 7
/*
Instruction frequencies (per 256 opcodes)
diff --git a/src/dataset.cpp b/src/dataset.cpp
index 8321797..31c2adb 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -39,6 +39,8 @@ along with RandomX. If not, see.
#include "blake2/endian.h"
#include "argon2.h"
#include "argon2_core.h"
+#include "jit_compiler_x86.hpp"
+#include "intrin_portable.h"
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
@@ -146,6 +148,7 @@ namespace randomx {
rl[7] = rl[0] ^ superscalarAdd7;
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
mixBlock = getMixBlock(registerValue, cache->memory);
+ PREFETCHNTA(mixBlock);
SuperscalarProgram& prog = cache->programs[i];
executeSuperscalar(rl, prog, &cache->reciprocalCache);
diff --git a/src/dataset.hpp b/src/dataset.hpp
index 4e072ff..4458017 100644
--- a/src/dataset.hpp
+++ b/src/dataset.hpp
@@ -24,7 +24,6 @@ along with RandomX. If not, see.
#include
#include "common.hpp"
#include "superscalar_program.hpp"
-#include "jit_compiler_x86.hpp"
#include "allocator.hpp"
/* Global scope for C binding */
@@ -33,6 +32,10 @@ struct randomx_dataset {
randomx::DatasetDeallocFunc dealloc;
};
+namespace randomx {
+ class JitCompilerX86;
+}
+
/* Global scope for C binding */
struct randomx_cache {
uint8_t* memory = nullptr;
diff --git a/src/instruction.cpp b/src/instruction.cpp
index 9f1b681..e1dc557 100644
--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -29,12 +29,12 @@ namespace randomx {
}
void Instruction::genAddressReg(std::ostream& os) const {
- os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
+ os << (getModMem() ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressRegDst(std::ostream& os) const {
if (getModCond())
- os << ((mod % 4) ? "L1" : "L2");
+ os << (getModMem() ? "L1" : "L2");
else
os << "L3";
os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
@@ -49,7 +49,7 @@ namespace randomx {
if(dst == RegisterNeedsDisplacement) {
os << ", " << (int32_t)getImm32();
}
- os << ", LSH " << (int)(mod % 4) << std::endl;
+ os << ", LSH " << (int)getModMem() << std::endl;
}
void Instruction::h_IADD_M(std::ostream& os) const {
@@ -65,7 +65,6 @@ namespace randomx {
}
}
- //1 uOP
void Instruction::h_ISUB_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
@@ -197,57 +196,57 @@ namespace randomx {
}
void Instruction::h_FSWAP_R(std::ostream& os) const {
- const char reg = (dst >= 4) ? 'e' : 'f';
- auto dstIndex = dst % 4;
+ const char reg = (dst >= RegisterCountFlt) ? 'e' : 'f';
+ auto dstIndex = dst % RegisterCountFlt;
os << reg << dstIndex << std::endl;
}
void Instruction::h_FADD_R(std::ostream& os) const {
- auto dstIndex = dst % 4;
- auto srcIndex = src % 4;
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegisterCountFlt;
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FADD_M(std::ostream& os) const {
- auto dstIndex = dst % 4;
+ auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << ", ";
genAddressReg(os);
os << std::endl;
}
void Instruction::h_FSUB_R(std::ostream& os) const {
- auto dstIndex = dst % 4;
- auto srcIndex = src % 4;
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegisterCountFlt;
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FSUB_M(std::ostream& os) const {
- auto dstIndex = dst % 4;
+ auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << ", ";
genAddressReg(os);
os << std::endl;
}
void Instruction::h_FSCAL_R(std::ostream& os) const {
- auto dstIndex = dst % 4;
+ auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << std::endl;
}
void Instruction::h_FMUL_R(std::ostream& os) const {
- auto dstIndex = dst % 4;
- auto srcIndex = src % 4;
+ auto dstIndex = dst % RegisterCountFlt;
+ auto srcIndex = src % RegisterCountFlt;
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FDIV_M(std::ostream& os) const {
- auto dstIndex = dst % 4;
+ auto dstIndex = dst % RegisterCountFlt;
os << "e" << dstIndex << ", ";
genAddressReg(os);
os << std::endl;
}
void Instruction::h_FSQRT_R(std::ostream& os) const {
- auto dstIndex = dst % 4;
+ auto dstIndex = dst % RegisterCountFlt;
os << "e" << dstIndex << std::endl;
}
@@ -280,7 +279,7 @@ namespace randomx {
}
void Instruction::h_COND_R(std::ostream& os) const {
- os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl;
+ os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
@@ -297,7 +296,6 @@ namespace randomx {
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
const char* Instruction::names[256] = {
- //Integer
INST_NAME(IADD_RS)
INST_NAME(IADD_M)
INST_NAME(ISUB_R)
@@ -314,33 +312,22 @@ namespace randomx {
INST_NAME(IXOR_M)
INST_NAME(IROR_R)
INST_NAME(ISWAP_R)
-
- //Common floating point
INST_NAME(FSWAP_R)
-
- //Floating point group F
INST_NAME(FADD_R)
INST_NAME(FADD_M)
INST_NAME(FSUB_R)
INST_NAME(FSUB_M)
INST_NAME(FSCAL_R)
-
- //Floating point group E
INST_NAME(FMUL_R)
INST_NAME(FDIV_M)
INST_NAME(FSQRT_R)
-
- //Control
INST_NAME(COND_R)
INST_NAME(CFROUND)
-
INST_NAME(ISTORE)
-
INST_NAME(NOP)
};
InstructionFormatter Instruction::engine[256] = {
- //Integer
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
@@ -358,22 +345,15 @@ namespace randomx {
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
-
- //Common floating point
INST_HANDLE(FSWAP_R)
-
- //Floating point group F
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
-
- //Floating point group E
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
-
INST_HANDLE(COND_R)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
diff --git a/src/instruction.hpp b/src/instruction.hpp
index 0dc382f..f6dbc3b 100644
--- a/src/instruction.hpp
+++ b/src/instruction.hpp
@@ -103,36 +103,36 @@ namespace randomx {
void genAddressReg(std::ostream& os) const;
void genAddressImm(std::ostream& os) const;
void genAddressRegDst(std::ostream&) const;
- void h_IADD_RS(std::ostream&) const;
- void h_IADD_M(std::ostream&) const;
- void h_ISUB_R(std::ostream&) const;
- void h_ISUB_M(std::ostream&) const;
- void h_IMUL_R(std::ostream&) const;
- void h_IMUL_M(std::ostream&) const;
- void h_IMULH_R(std::ostream&) const;
- void h_IMULH_M(std::ostream&) const;
- void h_ISMULH_R(std::ostream&) const;
- void h_ISMULH_M(std::ostream&) const;
- void h_IMUL_RCP(std::ostream&) const;
- void h_INEG_R(std::ostream&) const;
- void h_IXOR_R(std::ostream&) const;
- void h_IXOR_M(std::ostream&) const;
- void h_IROR_R(std::ostream&) const;
- void h_IROL_R(std::ostream&) const;
- void h_ISWAP_R(std::ostream&) const;
- void h_FSWAP_R(std::ostream&) const;
- void h_FADD_R(std::ostream&) const;
- void h_FADD_M(std::ostream&) const;
- void h_FSUB_R(std::ostream&) const;
- void h_FSUB_M(std::ostream&) const;
- void h_FSCAL_R(std::ostream&) const;
- void h_FMUL_R(std::ostream&) const;
- void h_FDIV_M(std::ostream&) const;
- void h_FSQRT_R(std::ostream&) const;
- void h_COND_R(std::ostream&) const;
- void h_CFROUND(std::ostream&) const;
- void h_ISTORE(std::ostream&) const;
- void h_NOP(std::ostream&) const;
+ void h_IADD_RS(std::ostream&) const;
+ void h_IADD_M(std::ostream&) const;
+ void h_ISUB_R(std::ostream&) const;
+ void h_ISUB_M(std::ostream&) const;
+ void h_IMUL_R(std::ostream&) const;
+ void h_IMUL_M(std::ostream&) const;
+ void h_IMULH_R(std::ostream&) const;
+ void h_IMULH_M(std::ostream&) const;
+ void h_ISMULH_R(std::ostream&) const;
+ void h_ISMULH_M(std::ostream&) const;
+ void h_IMUL_RCP(std::ostream&) const;
+ void h_INEG_R(std::ostream&) const;
+ void h_IXOR_R(std::ostream&) const;
+ void h_IXOR_M(std::ostream&) const;
+ void h_IROR_R(std::ostream&) const;
+ void h_IROL_R(std::ostream&) const;
+ void h_ISWAP_R(std::ostream&) const;
+ void h_FSWAP_R(std::ostream&) const;
+ void h_FADD_R(std::ostream&) const;
+ void h_FADD_M(std::ostream&) const;
+ void h_FSUB_R(std::ostream&) const;
+ void h_FSUB_M(std::ostream&) const;
+ void h_FSCAL_R(std::ostream&) const;
+ void h_FMUL_R(std::ostream&) const;
+ void h_FDIV_M(std::ostream&) const;
+ void h_FSQRT_R(std::ostream&) const;
+ void h_COND_R(std::ostream&) const;
+ void h_CFROUND(std::ostream&) const;
+ void h_ISTORE(std::ostream&) const;
+ void h_NOP(std::ostream&) const;
};
static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
diff --git a/src/intrin_portable.h b/src/intrin_portable.h
index 32aba08..a28ab66 100644
--- a/src/intrin_portable.h
+++ b/src/intrin_portable.h
@@ -318,18 +318,6 @@ constexpr uint64_t ieee_get_exponent_mask() {
return (uint64_t)(E + 1023U) << 52;
}
-template
-__m128d ieee_set_exponent(__m128d x) {
- static_assert(E > -1023, "Invalid exponent value");
- constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
- const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
- constexpr uint64_t exponent64 = (uint64_t)(E + 1023U) << 52;
- const __m128d exponentMask = _mm_castsi128_pd(_mm_set_epi64x(exponent64, exponent64));
- x = _mm_and_pd(x, mantissaMask);
- x = _mm_or_pd(x, exponentMask);
- return x;
-}
-
double loadDoublePortable(const void* addr);
uint64_t mulh(uint64_t, uint64_t);
int64_t smulh(int64_t, int64_t);
diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp
index 2480aa2..7ada8e7 100644
--- a/src/jit_compiler_x86.cpp
+++ b/src/jit_compiler_x86.cpp
@@ -20,8 +20,6 @@ along with RandomX. If not, see.
#include
#include "jit_compiler_x86.hpp"
-#define RANDOMX_JUMP
-
#if !defined(_M_X64) && !defined(__x86_64__)
namespace randomx {
@@ -113,7 +111,6 @@ namespace randomx {
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
- const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
@@ -128,8 +125,7 @@ namespace randomx {
const int32_t prologueSize = codeLoopBegin - codePrologue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
- const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
- const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight;
+ const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
@@ -299,7 +295,7 @@ namespace randomx {
}
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
@@ -336,7 +332,7 @@ namespace randomx {
}
void JitCompilerX86::generateCode(Instruction& instr, int i) {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
instructionOffsets.push_back(codePos);
#endif
auto generator = engine[instr.opcode];
@@ -467,15 +463,6 @@ namespace randomx {
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
- /*if (instr.src != instr.dst) {
- emit(REX_ADD_RR);
- emitByte(0xc0 + 8 * instr.dst + instr.src);
- }
- else {
- emit(REX_81);
- emitByte(0xc0 + instr.dst);
- emit32(instr.getImm32());
- }*/
emit(REX_LEA);
if (instr.dst == RegisterNeedsDisplacement)
emitByte(0xac);
@@ -505,14 +492,6 @@ namespace randomx {
emitByte((scale << 6) | (index << 3) | base);
}
- void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) {
- registerUsage[instr.dst] = i;
- emit(REX_LEA);
- emitByte(0x84 + 8 * instr.dst);
- genSIB(0, instr.src, instr.dst);
- emit32(instr.getImm32());
- }
-
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
@@ -541,14 +520,6 @@ namespace randomx {
}
}
- void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) {
- registerUsage[instr.dst] = i;
- emit(REX_LEA);
- emitByte(0x84 + 8 * instr.dst);
- genSIB(3, instr.dst, instr.dst);
- emit32(instr.getImm32());
- }
-
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
@@ -645,10 +616,6 @@ namespace randomx {
}
}
- void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) {
-
- }
-
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_NEG);
@@ -729,17 +696,14 @@ namespace randomx {
}
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
emit(REX_ADDPD);
emitByte(0xc0 + instr.src + 8 * instr.dst);
- //emit(REX_PADD);
- //emitByte(PADD_OPCODES[instr.mod % 4]);
- //emitByte(0xf8 + instr.dst);
}
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ADDPD);
@@ -747,17 +711,14 @@ namespace randomx {
}
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
emit(REX_SUBPD);
emitByte(0xc0 + instr.src + 8 * instr.dst);
- //emit(REX_PADD);
- //emitByte(PADD_OPCODES[instr.mod % 4]);
- //emitByte(0xf8 + instr.dst);
}
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_SUBPD);
@@ -765,40 +726,20 @@ namespace randomx {
}
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
emit(REX_XORPS);
emitByte(0xc7 + 8 * instr.dst);
}
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
+ instr.dst %= RegisterCountFlt;
+ instr.src %= RegisterCountFlt;
emit(REX_MULPD);
emitByte(0xe0 + instr.src + 8 * instr.dst);
}
- void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) {
- instr.dst %= 4;
- genAddressReg(instr);
- emit(REX_CVTDQ2PD_XMM12);
- emit(REX_ANDPS_XMM12);
- emit(REX_MULPD);
- emitByte(0xe4 + 8 * instr.dst);
- emit(REX_MAXPD);
- emitByte(0xe5 + 8 * instr.dst);
- }
-
- void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) {
- instr.dst %= 4;
- instr.src %= 4;
- emit(REX_DIVPD);
- emitByte(0xe0 + instr.src + 8 * instr.dst);
- emit(REX_MAXPD);
- emitByte(0xe5 + 8 * instr.dst);
- }
-
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ANDPS_XMM12);
@@ -807,7 +748,7 @@ namespace randomx {
}
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
- instr.dst %= 4;
+ instr.dst %= RegisterCountFlt;
emit(SQRTPD);
emitByte(0xe4 + 9 * instr.dst);
}
@@ -883,7 +824,7 @@ namespace randomx {
void JitCompilerX86::handleCondition(Instruction& instr, int i) {
const int shift = instr.getModShift();
- const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
+ const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
int reg = getConditionRegister();
int target = registerUsage[reg] + 1;
emit(REX_ADD_I);
@@ -900,7 +841,7 @@ namespace randomx {
}
void JitCompilerX86::h_COND_R(Instruction& instr, int i) {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
handleCondition(instr, i);
#endif
emit(XOR_ECX_ECX);
@@ -914,40 +855,15 @@ namespace randomx {
emitByte(0xc1 + 8 * instr.dst);
}
- void JitCompilerX86::h_COND_M(Instruction& instr, int i) {
-#ifdef RANDOMX_JUMP
- handleCondition(instr, i);
-#endif
- emit(XOR_ECX_ECX);
- genAddressReg(instr);
- emit(REX_CMP_M32I);
- emit32(instr.getImm32());
- emitByte(0x0f);
- emitByte(condition(instr));
- emitByte(0xc1);
- emit(REX_ADD_RM);
- emitByte(0xc1 + 8 * instr.dst);
- }
-
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
- //if (instr.getModCond())
emit(REX_MOV_MR);
- //else
- // emit(MOVNTI);
- emitByte(0x04 + 8 * instr.src);
- emitByte(0x06);
- }
-
- void JitCompilerX86::h_FSTORE(Instruction& instr, int i) {
- genAddressRegDst(instr, true);
- emit(MOVAPD);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
- emitByte(0x90);
+ emit(NOP1);
}
#include "instruction_weights.hpp"
diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp
index 8bccb1f..964dd93 100644
--- a/src/jit_compiler_x86.hpp
+++ b/src/jit_compiler_x86.hpp
@@ -110,43 +110,36 @@ namespace randomx {
codePos += count;
}
- void h_IADD_RS(Instruction&, int);
- void h_IADD_M(Instruction&, int);
- void h_IADD_RC(Instruction&, int);
- void h_ISUB_R(Instruction&, int);
- void h_ISUB_M(Instruction&, int);
- void h_IMUL_9C(Instruction&, int);
- void h_IMUL_R(Instruction&, int);
- void h_IMUL_M(Instruction&, int);
- void h_IMULH_R(Instruction&, int);
- void h_IMULH_M(Instruction&, int);
- void h_ISMULH_R(Instruction&, int);
- void h_ISMULH_M(Instruction&, int);
- void h_IMUL_RCP(Instruction&, int);
- void h_ISDIV_C(Instruction&, int);
- void h_INEG_R(Instruction&, int);
- void h_IXOR_R(Instruction&, int);
- void h_IXOR_M(Instruction&, int);
- void h_IROR_R(Instruction&, int);
- void h_IROL_R(Instruction&, int);
- void h_ISWAP_R(Instruction&, int);
- void h_FSWAP_R(Instruction&, int);
- void h_FADD_R(Instruction&, int);
- void h_FADD_M(Instruction&, int);
- void h_FSUB_R(Instruction&, int);
- void h_FSUB_M(Instruction&, int);
- void h_FSCAL_R(Instruction&, int);
- void h_FMUL_R(Instruction&, int);
- void h_FMUL_M(Instruction&, int);
- void h_FDIV_R(Instruction&, int);
- void h_FDIV_M(Instruction&, int);
- void h_FSQRT_R(Instruction&, int);
- void h_COND_R(Instruction&, int);
- void h_COND_M(Instruction&, int);
- void h_CFROUND(Instruction&, int);
- void h_ISTORE(Instruction&, int);
- void h_FSTORE(Instruction&, int);
- void h_NOP(Instruction&, int);
+ void h_IADD_RS(Instruction&, int);
+ void h_IADD_M(Instruction&, int);
+ void h_ISUB_R(Instruction&, int);
+ void h_ISUB_M(Instruction&, int);
+ void h_IMUL_R(Instruction&, int);
+ void h_IMUL_M(Instruction&, int);
+ void h_IMULH_R(Instruction&, int);
+ void h_IMULH_M(Instruction&, int);
+ void h_ISMULH_R(Instruction&, int);
+ void h_ISMULH_M(Instruction&, int);
+ void h_IMUL_RCP(Instruction&, int);
+ void h_INEG_R(Instruction&, int);
+ void h_IXOR_R(Instruction&, int);
+ void h_IXOR_M(Instruction&, int);
+ void h_IROR_R(Instruction&, int);
+ void h_IROL_R(Instruction&, int);
+ void h_ISWAP_R(Instruction&, int);
+ void h_FSWAP_R(Instruction&, int);
+ void h_FADD_R(Instruction&, int);
+ void h_FADD_M(Instruction&, int);
+ void h_FSUB_R(Instruction&, int);
+ void h_FSUB_M(Instruction&, int);
+ void h_FSCAL_R(Instruction&, int);
+ void h_FMUL_R(Instruction&, int);
+ void h_FDIV_M(Instruction&, int);
+ void h_FSQRT_R(Instruction&, int);
+ void h_COND_R(Instruction&, int);
+ void h_CFROUND(Instruction&, int);
+ void h_ISTORE(Instruction&, int);
+ void h_NOP(Instruction&, int);
};
}
\ No newline at end of file
diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S
index 3b8e82e..04dbaa9 100644
--- a/src/jit_compiler_x86_static.S
+++ b/src/jit_compiler_x86_static.S
@@ -31,7 +31,6 @@
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
-.global DECL(randomx_program_read_dataset_light)
.global DECL(randomx_program_read_dataset_sshash_init)
.global DECL(randomx_program_read_dataset_sshash_fin)
.global DECL(randomx_program_loop_store)
@@ -66,9 +65,6 @@ DECL(randomx_program_start):
DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc"
-DECL(randomx_program_read_dataset_light):
- #include "asm/program_read_dataset_light.inc"
-
DECL(randomx_program_read_dataset_sshash_init):
#include "asm/program_read_dataset_sshash_init.inc"
diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm
index 3153a8f..92d2ebd 100644
--- a/src/jit_compiler_x86_static.asm
+++ b/src/jit_compiler_x86_static.asm
@@ -24,7 +24,6 @@ PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
-PUBLIC randomx_program_read_dataset_light
PUBLIC randomx_program_read_dataset_sshash_init
PUBLIC randomx_program_read_dataset_sshash_fin
PUBLIC randomx_dataset_init
@@ -62,10 +61,6 @@ randomx_program_read_dataset PROC
include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP
-randomx_program_read_dataset_light PROC
- include asm/program_read_dataset_light.inc
-randomx_program_read_dataset_light ENDP
-
randomx_program_read_dataset_sshash_init PROC
include asm/program_read_dataset_sshash_init.inc
randomx_program_read_dataset_sshash_init ENDP
diff --git a/src/jit_compiler_x86_static.hpp b/src/jit_compiler_x86_static.hpp
index a3ce44f..09b4703 100644
--- a/src/jit_compiler_x86_static.hpp
+++ b/src/jit_compiler_x86_static.hpp
@@ -25,7 +25,6 @@ extern "C" {
void randomx_program_loop_load();
void randomx_program_start();
void randomx_program_read_dataset();
- void randomx_program_read_dataset_light();
void randomx_program_read_dataset_sshash_init();
void randomx_program_read_dataset_sshash_fin();
void randomx_program_loop_store();
diff --git a/src/virtual_machine.cpp b/src/virtual_machine.cpp
index e97fad7..3707ba7 100644
--- a/src/virtual_machine.cpp
+++ b/src/virtual_machine.cpp
@@ -76,22 +76,6 @@ void randomx_vm::initialize() {
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
}
-//TODO
-std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) {
- for (int i = 0; i < randomx::RegistersCount; ++i)
- os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
- for (int i = 0; i < 4; ++i)
- os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
- << " = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec;
- for (int i = 0; i < 4; ++i)
- os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl
- << " = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec;
- for (int i = 0; i < 4; ++i)
- os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl
- << " = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec;
- return os;
-}
-
namespace randomx {
alignas(16) volatile static __m128i aesDummy;
diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp
index a5bba0f..2f69855 100644
--- a/src/vm_interpreted.cpp
+++ b/src/vm_interpreted.cpp
@@ -17,10 +17,6 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see.
*/
-//#define TRACE
-//#define FPUCHECK
-#define RANDOMX_JUMP
-
#include
#include
#include
@@ -33,12 +29,6 @@ along with RandomX. If not, see.
#include "intrin_portable.h"
#include "reciprocal.h"
-#ifdef FPUCHECK
-constexpr bool fpuCheck = true;
-#else
-constexpr bool fpuCheck = false;
-#endif
-
namespace randomx {
static int_reg_t Zero = 0;
@@ -53,49 +43,16 @@ namespace randomx {
void InterpretedVm::run(void* seed) {
VmBase::generateProgram(seed);
randomx_vm::initialize();
- for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
- program(i).src %= RegistersCount;
- program(i).dst %= RegistersCount;
- }
execute();
}
template
- void InterpretedVm::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
- for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
- executeBytecode(ic, r, f, e, a);
+ void InterpretedVm::executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
+ for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
+ executeBytecode(pc, r, f, e, a);
}
}
- static void print(int_reg_t r) {
- std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
- }
-
- static void print(__m128d f) {
- uint64_t lo = *(((uint64_t*)&f) + 0);
- uint64_t hi = *(((uint64_t*)&f) + 1);
- std::cout << std::hex << std::setw(16) << std::setfill('0') << hi << '-' << std::hex << std::setw(16) << std::setfill('0') << lo << std::endl;
- }
-
- static void printState(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
- for (int i = 0; i < 8; ++i) {
- std::cout << "r" << i << " = "; print(r[i]);
- }
- for (int i = 0; i < 4; ++i) {
- std::cout << "f" << i << " = "; print(f[i]);
- }
- for (int i = 0; i < 4; ++i) {
- std::cout << "e" << i << " = "; print(e[i]);
- }
- for (int i = 0; i < 4; ++i) {
- std::cout << "a" << i << " = "; print(a[i]);
- }
- }
-
- static bool isDenormal(double x) {
- return std::fpclassify(x) == FP_SUBNORMAL;
- }
-
template
FORCE_INLINE void* InterpretedVm::getScratchpadAddress(InstructionByteCode& ibc) {
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
@@ -113,9 +70,8 @@ namespace randomx {
}
template
- void InterpretedVm::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
- auto& ibc = byteCode[ic];
- if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
+ void InterpretedVm::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
+ auto& ibc = byteCode[pc];
switch (ibc.type)
{
case InstructionType::IADD_RS: {
@@ -225,11 +181,11 @@ namespace randomx {
} break;
case InstructionType::COND_R: {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
*ibc.creg += (1 << ibc.shift);
- const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
+ const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift;
if ((*ibc.creg & conditionMask) == 0) {
- ic = ibc.target;
+ pc = ibc.target;
break;
}
#endif
@@ -251,50 +207,23 @@ namespace randomx {
default:
UNREACHABLE;
}
- if (trace && ibc.type != InstructionType::NOP) {
- if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
- print(*ibc.idst);
- else //if(ibc.type >= 20 && ibc.type <= 30)
- print(0);
- }
-#ifdef FPUCHECK
- if (ibc.type >= 26 && ibc.type <= 30) {
- double lo = *(((double*)ibc.fdst) + 0);
- double hi = *(((double*)ibc.fdst) + 1);
- if (lo <= 0 || hi <= 0) {
- std::stringstream ss;
- ss << "Underflow in operation " << ibc.type;
- printState(r, f, e, a);
- throw std::runtime_error(ss.str());
- }
- }
-#endif
}
template
void InterpretedVm::execute() {
- int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
- __m128d f[4];
- __m128d e[4];
- __m128d a[4];
+ int_reg_t r[RegistersCount] = { 0 };
+ __m128d f[RegisterCountFlt];
+ __m128d e[RegisterCountFlt];
+ __m128d a[RegisterCountFlt];
- a[0] = _mm_load_pd(®.a[0].lo);
- a[1] = _mm_load_pd(®.a[1].lo);
- a[2] = _mm_load_pd(®.a[2].lo);
- a[3] = _mm_load_pd(®.a[3].lo);
+ for(unsigned i = 0; i < RegisterCountFlt; ++i)
+ a[i] = _mm_load_pd(®.a[i].lo);
precompileProgram(r, f, e, a);
uint32_t spAddr0 = mem.mx;
uint32_t spAddr1 = mem.ma;
- if (trace) {
- std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
- std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
- std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
- printState(r, f, e, a);
- }
-
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
spAddr0 ^= spMix;
@@ -302,31 +231,14 @@ namespace randomx {
spAddr1 ^= spMix >> 32;
spAddr1 &= ScratchpadL3Mask64;
- r[0] ^= load64(scratchpad + spAddr0 + 0);
- r[1] ^= load64(scratchpad + spAddr0 + 8);
- r[2] ^= load64(scratchpad + spAddr0 + 16);
- r[3] ^= load64(scratchpad + spAddr0 + 24);
- r[4] ^= load64(scratchpad + spAddr0 + 32);
- r[5] ^= load64(scratchpad + spAddr0 + 40);
- r[6] ^= load64(scratchpad + spAddr0 + 48);
- r[7] ^= load64(scratchpad + spAddr0 + 56);
+ for (unsigned i = 0; i < RegistersCount; ++i)
+ r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
- f[0] = load_cvt_i32x2(scratchpad + spAddr1 + 0);
- f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
- f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
- f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
- e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
- e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
- e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
- e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
+ for (unsigned i = 0; i < RegisterCountFlt; ++i)
+ f[i] = load_cvt_i32x2(scratchpad + spAddr1 + 8 * i);
- if (trace) {
- std::cout << "iteration " << std::dec << ic << std::endl;
- std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
- std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
- printState(r, f, e, a);
- std::cout << "-----------------------------------" << std::endl;
- }
+ for (unsigned i = 0; i < RegisterCountFlt; ++i)
+ e[i] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
executeBytecode(r, f, e, a);
@@ -335,72 +247,33 @@ namespace randomx {
datasetRead(datasetOffset + mem.ma, r);
std::swap(mem.mx, mem.ma);
- if (trace) {
- std::cout << "iteration " << std::dec << ic << std::endl;
- std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
- std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
- printState(r, f, e, a);
- std::cout << "===================================" << std::endl;
- }
+ for (unsigned i = 0; i < RegistersCount; ++i)
+ store64(scratchpad + spAddr1 + 8 * i, r[i]);
- store64(scratchpad + spAddr1 + 0, r[0]);
- store64(scratchpad + spAddr1 + 8, r[1]);
- store64(scratchpad + spAddr1 + 16, r[2]);
- store64(scratchpad + spAddr1 + 24, r[3]);
- store64(scratchpad + spAddr1 + 32, r[4]);
- store64(scratchpad + spAddr1 + 40, r[5]);
- store64(scratchpad + spAddr1 + 48, r[6]);
- store64(scratchpad + spAddr1 + 56, r[7]);
+ for (unsigned i = 0; i < RegisterCountFlt; ++i)
+ f[i] = _mm_xor_pd(f[i], e[i]);
- f[0] = _mm_xor_pd(f[0], e[0]);
- f[1] = _mm_xor_pd(f[1], e[1]);
- f[2] = _mm_xor_pd(f[2], e[2]);
- f[3] = _mm_xor_pd(f[3], e[3]);
-
-#ifdef FPUCHECK
- for(int i = 0; i < 4; ++i) {
- double lo = *(((double*)&f[i]) + 0);
- double hi = *(((double*)&f[i]) + 1);
- if (isDenormal(lo) || isDenormal(hi)) {
- std::stringstream ss;
- ss << "Denormal f" << i;
- throw std::runtime_error(ss.str());
- }
- }
-#endif
-
- _mm_store_pd((double*)(scratchpad + spAddr0 + 0), f[0]);
- _mm_store_pd((double*)(scratchpad + spAddr0 + 16), f[1]);
- _mm_store_pd((double*)(scratchpad + spAddr0 + 32), f[2]);
- _mm_store_pd((double*)(scratchpad + spAddr0 + 48), f[3]);
+ for (unsigned i = 0; i < RegisterCountFlt; ++i)
+ _mm_store_pd((double*)(scratchpad + spAddr0 + 16 * i), f[i]);
spAddr0 = 0;
spAddr1 = 0;
}
- store64(®.r[0], r[0]);
- store64(®.r[1], r[1]);
- store64(®.r[2], r[2]);
- store64(®.r[3], r[3]);
- store64(®.r[4], r[4]);
- store64(®.r[5], r[5]);
- store64(®.r[6], r[6]);
- store64(®.r[7], r[7]);
+ for (unsigned i = 0; i < RegistersCount; ++i)
+ store64(®.r[i], r[i]);
- _mm_store_pd(®.f[0].lo, f[0]);
- _mm_store_pd(®.f[1].lo, f[1]);
- _mm_store_pd(®.f[2].lo, f[2]);
- _mm_store_pd(®.f[3].lo, f[3]);
- _mm_store_pd(®.e[0].lo, e[0]);
- _mm_store_pd(®.e[1].lo, e[1]);
- _mm_store_pd(®.e[2].lo, e[2]);
- _mm_store_pd(®.e[3].lo, e[3]);
+ for (unsigned i = 0; i < RegisterCountFlt; ++i)
+ _mm_store_pd(®.f[i].lo, f[i]);
+
+ for (unsigned i = 0; i < RegisterCountFlt; ++i)
+ _mm_store_pd(®.e[i].lo, e[i]);
}
- static int getConditionRegister(int(®isterUsage)[8]) {
+ static int getConditionRegister(int(®isterUsage)[RegistersCount]) {
int min = INT_MAX;
int minIndex;
- for (unsigned i = 0; i < 8; ++i) {
+ for (unsigned i = 0; i < RegistersCount; ++i) {
if (registerUsage[i] < min) {
min = registerUsage[i];
minIndex = i;
@@ -410,7 +283,7 @@ namespace randomx {
}
template
- void InterpretedVm::datasetRead(uint32_t address, int_reg_t(&r)[8]) {
+ void InterpretedVm::datasetRead(uint32_t address, int_reg_t(&r)[RegistersCount]) {
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
for (int i = 0; i < RegistersCount; ++i)
r[i] ^= datasetLine[i];
@@ -419,9 +292,9 @@ namespace randomx {
#include "instruction_weights.hpp"
template
- void InterpretedVm::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
- int registerUsage[8];
- for (unsigned i = 0; i < 8; ++i) {
+ void InterpretedVm::precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
+ int registerUsage[RegistersCount];
+ for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
@@ -443,7 +316,7 @@ namespace randomx {
ibc.shift = instr.getModMem();
ibc.imm = signExtend2sCompl(instr.getImm32());
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IADD_M) {
@@ -452,7 +325,7 @@ namespace randomx {
ibc.type = InstructionType::IADD_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
- if (instr.src != instr.dst) {
+ if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@@ -460,7 +333,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(ISUB_R) {
@@ -475,7 +348,7 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(ISUB_M) {
@@ -484,7 +357,7 @@ namespace randomx {
ibc.type = InstructionType::ISUB_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
- if (instr.src != instr.dst) {
+ if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@@ -492,7 +365,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IMUL_R) {
@@ -507,7 +380,7 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IMUL_M) {
@@ -516,7 +389,7 @@ namespace randomx {
ibc.type = InstructionType::IMUL_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
- if (instr.src != instr.dst) {
+ if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@@ -524,7 +397,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IMULH_R) {
@@ -533,7 +406,7 @@ namespace randomx {
ibc.type = InstructionType::IMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IMULH_M) {
@@ -542,7 +415,7 @@ namespace randomx {
ibc.type = InstructionType::IMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
- if (instr.src != instr.dst) {
+ if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@@ -550,7 +423,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(ISMULH_R) {
@@ -559,7 +432,7 @@ namespace randomx {
ibc.type = InstructionType::ISMULH_R;
ibc.idst = &r[dst];
ibc.isrc = &r[src];
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(ISMULH_M) {
@@ -568,7 +441,7 @@ namespace randomx {
ibc.type = InstructionType::ISMULH_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
- if (instr.src != instr.dst) {
+ if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@@ -576,7 +449,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IMUL_RCP) {
@@ -587,7 +460,7 @@ namespace randomx {
ibc.idst = &r[dst];
ibc.imm = randomx_reciprocal(divisor);
ibc.isrc = &ibc.imm;
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
}
else {
ibc.type = InstructionType::NOP;
@@ -598,7 +471,7 @@ namespace randomx {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &r[dst];
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IXOR_R) {
@@ -613,7 +486,7 @@ namespace randomx {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IXOR_M) {
@@ -622,7 +495,7 @@ namespace randomx {
ibc.type = InstructionType::IXOR_M;
ibc.idst = &r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
- if (instr.src != instr.dst) {
+ if (src != dst) {
ibc.isrc = &r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@@ -630,7 +503,7 @@ namespace randomx {
ibc.isrc = &Zero;
ibc.memMask = ScratchpadL3Mask;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IROR_R) {
@@ -645,7 +518,7 @@ namespace randomx {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(IROL_R) {
@@ -660,7 +533,7 @@ namespace randomx {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
- registerUsage[instr.dst] = i;
+ registerUsage[dst] = i;
} break;
CASE_REP(ISWAP_R) {
@@ -670,8 +543,8 @@ namespace randomx {
ibc.idst = &r[dst];
ibc.isrc = &r[src];
ibc.type = InstructionType::ISWAP_R;
- registerUsage[instr.dst] = i;
- registerUsage[instr.src] = i;
+ registerUsage[dst] = i;
+ registerUsage[src] = i;
}
else {
ibc.type = InstructionType::NOP;
@@ -681,23 +554,23 @@ namespace randomx {
CASE_REP(FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
- if (dst < 4)
+ if (dst < RegisterCountFlt)
ibc.fdst = &f[dst];
else
- ibc.fdst = &e[dst - 4];
+ ibc.fdst = &e[dst - RegisterCountFlt];
} break;
CASE_REP(FADD_R) {
- auto dst = instr.dst % 4;
- auto src = instr.src % 4;
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
ibc.fdst = &f[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FADD_M) {
- auto dst = instr.dst % 4;
- auto src = instr.src % 8;
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
@@ -706,16 +579,16 @@ namespace randomx {
} break;
CASE_REP(FSUB_R) {
- auto dst = instr.dst % 4;
- auto src = instr.src % 4;
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
ibc.fdst = &f[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FSUB_M) {
- auto dst = instr.dst % 4;
- auto src = instr.src % 8;
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
ibc.fdst = &f[dst];
ibc.isrc = &r[src];
@@ -724,22 +597,22 @@ namespace randomx {
} break;
CASE_REP(FSCAL_R) {
- auto dst = instr.dst % 4;
+ auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &f[dst];
ibc.type = InstructionType::FSCAL_R;
} break;
CASE_REP(FMUL_R) {
- auto dst = instr.dst % 4;
- auto src = instr.src % 4;
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
ibc.fdst = &e[dst];
ibc.fsrc = &a[src];
} break;
CASE_REP(FDIV_M) {
- auto dst = instr.dst % 4;
- auto src = instr.src % 8;
+ auto dst = instr.dst % RegisterCountFlt;
+ auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
ibc.fdst = &e[dst];
ibc.isrc = &r[src];
@@ -748,7 +621,7 @@ namespace randomx {
} break;
CASE_REP(FSQRT_R) {
- auto dst = instr.dst % 4;
+ auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &e[dst];
} break;
@@ -766,13 +639,13 @@ namespace randomx {
ibc.target = registerUsage[reg];
ibc.shift = instr.getModShift();
ibc.creg = &r[reg];
- for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
+ for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
registerUsage[j] = i;
}
} break;
CASE_REP(CFROUND) {
- auto src = instr.src % 8;
+ auto src = instr.src % RegistersCount;
ibc.isrc = &r[src];
ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63;
diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp
index 8a15785..e3a3eb4 100644
--- a/src/vm_interpreted.hpp
+++ b/src/vm_interpreted.hpp
@@ -71,12 +71,12 @@ namespace randomx {
void run(void* seed) override;
void setDataset(randomx_dataset* dataset) override;
protected:
- virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]);
+ virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[RegistersCount]);
private:
void execute();
- void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
- void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
- void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+ void precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
+ void executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
+ void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
void* getScratchpadAddress(InstructionByteCode& ibc);
__m128d maskRegisterExponentMantissa(__m128d);
diff --git a/vcxproj/benchmark.vcxproj b/vcxproj/benchmark.vcxproj
index eba548f..27031e3 100644
--- a/vcxproj/benchmark.vcxproj
+++ b/vcxproj/benchmark.vcxproj
@@ -106,7 +106,7 @@
MaxSpeed
true
true
- true
+ false
true
diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj
index 0ad01ab..218975a 100644
--- a/vcxproj/randomx.vcxproj
+++ b/vcxproj/randomx.vcxproj
@@ -26,20 +26,20 @@
- Application
+ StaticLibrary
true
v141
MultiByte
- Application
+ StaticLibrary
false
v141
true
MultiByte
- Application
+ StaticLibrary
true
v141
MultiByte