mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Code cleanup & refactoring
This commit is contained in:
parent
22a3aa8d79
commit
7f6bdd9a52
22 changed files with 261 additions and 535 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -3,4 +3,6 @@ obj/
|
|||
*.user
|
||||
*.suo
|
||||
.vs
|
||||
x64
|
||||
x64/
|
||||
Release/
|
||||
Debug/
|
6
makefile
6
makefile
|
@ -3,7 +3,7 @@
|
|||
AR=gcc-ar
|
||||
PLATFORM=$(shell uname -m)
|
||||
CXXFLAGS=-std=c++11
|
||||
CCFLAGS=
|
||||
CCFLAGS=-std=c99
|
||||
ARFLAGS=rcs
|
||||
BINDIR=bin
|
||||
SRCDIR=src
|
||||
|
@ -80,7 +80,8 @@ $(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2
|
|||
$(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \
|
||||
$(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \
|
||||
$(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \
|
||||
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h
|
||||
$(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
|
||||
$(SRCDIR)/intrin_portable.h
|
||||
$(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \
|
||||
$(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
|
||||
$(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \
|
||||
|
@ -90,7 +91,6 @@ $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compi
|
|||
$(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \
|
||||
$(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \
|
||||
$(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \
|
||||
$(SRCDIR)/asm/program_read_dataset_light.inc \
|
||||
$(SRCDIR)/asm/program_read_dataset_sshash_init.inc \
|
||||
$(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \
|
||||
$(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
xor rbp, rax ;# modify "mx"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov ecx, ebp ;# ecx = ma
|
||||
and ecx, 2147483584 ;# align "ma" to the start of a cache line
|
||||
shr ecx, 6 ;# ecx = Dataset block number
|
|
@ -27,12 +27,12 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
|
||||
namespace randomx {
|
||||
|
||||
static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
||||
static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||
static const char* regFE[8] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
static const char* regF[4] = { "xmm0", "xmm1", "xmm2", "xmm3" };
|
||||
static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
||||
static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
||||
static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||
static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
|
||||
static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
||||
|
||||
static const char* tempRegx = "xmm12";
|
||||
static const char* mantissaMask = "xmm13";
|
||||
|
@ -49,7 +49,9 @@ namespace randomx {
|
|||
}
|
||||
asmCode.str(std::string()); //clear
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
#if RANDOMX_JUMP
|
||||
asmCode << "randomx_isn_" << i << ":" << std::endl;
|
||||
#endif
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
|
@ -469,14 +471,14 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
|
||||
|
@ -484,14 +486,14 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
|
||||
|
@ -499,20 +501,20 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl;
|
||||
|
@ -522,7 +524,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
@ -566,7 +568,7 @@ namespace randomx {
|
|||
|
||||
void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
|
||||
const int shift = instr.getModShift();
|
||||
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
|
||||
const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
|
||||
int reg = getConditionRegister();
|
||||
int target = registerUsage[reg] + 1;
|
||||
registerUsage[reg] = i;
|
||||
|
@ -579,7 +581,9 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
|
||||
#if RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
asmCode << "\txor ecx, ecx" << std::endl;
|
||||
asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
asmCode << "\tset" << condition(instr) << " cl" << std::endl;
|
||||
|
@ -602,7 +606,6 @@ namespace randomx {
|
|||
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
||||
|
||||
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
|
||||
//Integer
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(ISUB_R)
|
||||
|
@ -620,27 +623,18 @@ namespace randomx {
|
|||
INST_HANDLE(IROR_R)
|
||||
INST_HANDLE(IROL_R)
|
||||
INST_HANDLE(ISWAP_R)
|
||||
|
||||
//Common floating point
|
||||
INST_HANDLE(FSWAP_R)
|
||||
|
||||
//Floating point group F
|
||||
INST_HANDLE(FADD_R)
|
||||
INST_HANDLE(FADD_M)
|
||||
INST_HANDLE(FSUB_R)
|
||||
INST_HANDLE(FSUB_M)
|
||||
INST_HANDLE(FSCAL_R)
|
||||
|
||||
//Floating point group E
|
||||
INST_HANDLE(FMUL_R)
|
||||
INST_HANDLE(FDIV_M)
|
||||
INST_HANDLE(FSQRT_R)
|
||||
|
||||
//Control
|
||||
INST_HANDLE(COND_R)
|
||||
INST_HANDLE(CFROUND)
|
||||
INST_HANDLE(ISTORE)
|
||||
|
||||
INST_HANDLE(NOP)
|
||||
};
|
||||
}
|
|
@ -19,6 +19,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "common.hpp"
|
||||
#include <sstream>
|
||||
|
||||
namespace randomx {
|
||||
|
@ -48,40 +49,40 @@ namespace randomx {
|
|||
void traceint(Instruction&);
|
||||
void traceflt(Instruction&);
|
||||
void tracenop(Instruction&);
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_ISDIV_C(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_COND_R(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_ISDIV_C(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_COND_R(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
|
||||
static InstructionGenerator engine[256];
|
||||
std::stringstream asmCode;
|
||||
int registerUsage[8];
|
||||
int registerUsage[RegistersCount];
|
||||
};
|
||||
}
|
|
@ -51,8 +51,6 @@ namespace randomx {
|
|||
|
||||
static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
|
||||
|
||||
using addr_t = uint32_t;
|
||||
|
||||
constexpr int ArgonBlockSize = 1024;
|
||||
constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
|
||||
constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
|
||||
|
@ -78,6 +76,10 @@ namespace randomx {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0)
|
||||
|
||||
using addr_t = uint32_t;
|
||||
|
||||
using int_reg_t = uint64_t;
|
||||
|
||||
struct fpu_reg_t {
|
||||
|
@ -95,6 +97,7 @@ namespace randomx {
|
|||
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
|
||||
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
|
||||
constexpr int RegistersCount = 8;
|
||||
constexpr int RegisterCountFlt = RegistersCount / 2;
|
||||
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
|
||||
constexpr int RegisterNeedsSib = 4; //x86 r12 register
|
||||
|
||||
|
@ -118,5 +121,3 @@ namespace randomx {
|
|||
typedef void(*CacheDeallocFunc)(randomx_cache*);
|
||||
typedef void(*CacheInitializeFunc)(randomx_cache*, const void*, size_t);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf);
|
||||
|
|
|
@ -34,7 +34,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
//Number of random Cache accesses per Dataset item. Minimum is 2.
|
||||
#define RANDOMX_CACHE_ACCESSES 8
|
||||
|
||||
//Target latency for SuperscalarHash (in cycles of the reference CPU).
|
||||
#define RANDOMX_SUPERSCALAR_LATENCY 170
|
||||
|
||||
//The maximum size of a SuperscalarHash program (number of instructions).
|
||||
#define RANDOMX_SUPERSCALAR_MAX_SIZE 512
|
||||
|
||||
//Dataset base size in bytes. Must be a power of 2.
|
||||
|
@ -61,8 +64,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
//Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
|
||||
#define RANDOMX_SCRATCHPAD_L1 (16 * 1024)
|
||||
|
||||
//How many register bits must be zero for a jump condition to be triggered
|
||||
#define RANDOMX_CONDITION_BITS 7
|
||||
//How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled.
|
||||
#define RANDOMX_JUMP_BITS 7
|
||||
|
||||
/*
|
||||
Instruction frequencies (per 256 opcodes)
|
||||
|
|
|
@ -39,6 +39,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "blake2/endian.h"
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
#include "jit_compiler_x86.hpp"
|
||||
#include "intrin_portable.h"
|
||||
|
||||
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
|
||||
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
|
||||
|
@ -146,6 +148,7 @@ namespace randomx {
|
|||
rl[7] = rl[0] ^ superscalarAdd7;
|
||||
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
mixBlock = getMixBlock(registerValue, cache->memory);
|
||||
PREFETCHNTA(mixBlock);
|
||||
SuperscalarProgram& prog = cache->programs[i];
|
||||
|
||||
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
||||
|
|
|
@ -24,7 +24,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include <type_traits>
|
||||
#include "common.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include "jit_compiler_x86.hpp"
|
||||
#include "allocator.hpp"
|
||||
|
||||
/* Global scope for C binding */
|
||||
|
@ -33,6 +32,10 @@ struct randomx_dataset {
|
|||
randomx::DatasetDeallocFunc dealloc;
|
||||
};
|
||||
|
||||
namespace randomx {
|
||||
class JitCompilerX86;
|
||||
}
|
||||
|
||||
/* Global scope for C binding */
|
||||
struct randomx_cache {
|
||||
uint8_t* memory = nullptr;
|
||||
|
|
|
@ -29,12 +29,12 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void Instruction::genAddressReg(std::ostream& os) const {
|
||||
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||
os << (getModMem() ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||
}
|
||||
|
||||
void Instruction::genAddressRegDst(std::ostream& os) const {
|
||||
if (getModCond())
|
||||
os << ((mod % 4) ? "L1" : "L2");
|
||||
os << (getModMem() ? "L1" : "L2");
|
||||
else
|
||||
os << "L3";
|
||||
os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||
|
@ -49,7 +49,7 @@ namespace randomx {
|
|||
if(dst == RegisterNeedsDisplacement) {
|
||||
os << ", " << (int32_t)getImm32();
|
||||
}
|
||||
os << ", LSH " << (int)(mod % 4) << std::endl;
|
||||
os << ", LSH " << (int)getModMem() << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_IADD_M(std::ostream& os) const {
|
||||
|
@ -65,7 +65,6 @@ namespace randomx {
|
|||
}
|
||||
}
|
||||
|
||||
//1 uOP
|
||||
void Instruction::h_ISUB_R(std::ostream& os) const {
|
||||
if (src != dst) {
|
||||
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||
|
@ -197,57 +196,57 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void Instruction::h_FSWAP_R(std::ostream& os) const {
|
||||
const char reg = (dst >= 4) ? 'e' : 'f';
|
||||
auto dstIndex = dst % 4;
|
||||
const char reg = (dst >= RegisterCountFlt) ? 'e' : 'f';
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << reg << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FADD_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto srcIndex = src % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegisterCountFlt;
|
||||
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FADD_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "f" << dstIndex << ", ";
|
||||
genAddressReg(os);
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSUB_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto srcIndex = src % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegisterCountFlt;
|
||||
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSUB_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "f" << dstIndex << ", ";
|
||||
genAddressReg(os);
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSCAL_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "f" << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FMUL_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto srcIndex = src % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegisterCountFlt;
|
||||
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FDIV_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "e" << dstIndex << ", ";
|
||||
genAddressReg(os);
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSQRT_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % 4;
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "e" << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
|
@ -280,7 +279,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void Instruction::h_COND_R(std::ostream& os) const {
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl;
|
||||
os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISTORE(std::ostream& os) const {
|
||||
|
@ -297,7 +296,6 @@ namespace randomx {
|
|||
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
|
||||
|
||||
const char* Instruction::names[256] = {
|
||||
//Integer
|
||||
INST_NAME(IADD_RS)
|
||||
INST_NAME(IADD_M)
|
||||
INST_NAME(ISUB_R)
|
||||
|
@ -314,33 +312,22 @@ namespace randomx {
|
|||
INST_NAME(IXOR_M)
|
||||
INST_NAME(IROR_R)
|
||||
INST_NAME(ISWAP_R)
|
||||
|
||||
//Common floating point
|
||||
INST_NAME(FSWAP_R)
|
||||
|
||||
//Floating point group F
|
||||
INST_NAME(FADD_R)
|
||||
INST_NAME(FADD_M)
|
||||
INST_NAME(FSUB_R)
|
||||
INST_NAME(FSUB_M)
|
||||
INST_NAME(FSCAL_R)
|
||||
|
||||
//Floating point group E
|
||||
INST_NAME(FMUL_R)
|
||||
INST_NAME(FDIV_M)
|
||||
INST_NAME(FSQRT_R)
|
||||
|
||||
//Control
|
||||
INST_NAME(COND_R)
|
||||
INST_NAME(CFROUND)
|
||||
|
||||
INST_NAME(ISTORE)
|
||||
|
||||
INST_NAME(NOP)
|
||||
};
|
||||
|
||||
InstructionFormatter Instruction::engine[256] = {
|
||||
//Integer
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(ISUB_R)
|
||||
|
@ -358,22 +345,15 @@ namespace randomx {
|
|||
INST_HANDLE(IROR_R)
|
||||
INST_HANDLE(IROL_R)
|
||||
INST_HANDLE(ISWAP_R)
|
||||
|
||||
//Common floating point
|
||||
INST_HANDLE(FSWAP_R)
|
||||
|
||||
//Floating point group F
|
||||
INST_HANDLE(FADD_R)
|
||||
INST_HANDLE(FADD_M)
|
||||
INST_HANDLE(FSUB_R)
|
||||
INST_HANDLE(FSUB_M)
|
||||
INST_HANDLE(FSCAL_R)
|
||||
|
||||
//Floating point group E
|
||||
INST_HANDLE(FMUL_R)
|
||||
INST_HANDLE(FDIV_M)
|
||||
INST_HANDLE(FSQRT_R)
|
||||
|
||||
INST_HANDLE(COND_R)
|
||||
INST_HANDLE(CFROUND)
|
||||
INST_HANDLE(ISTORE)
|
||||
|
|
|
@ -103,36 +103,36 @@ namespace randomx {
|
|||
void genAddressReg(std::ostream& os) const;
|
||||
void genAddressImm(std::ostream& os) const;
|
||||
void genAddressRegDst(std::ostream&) const;
|
||||
void h_IADD_RS(std::ostream&) const;
|
||||
void h_IADD_M(std::ostream&) const;
|
||||
void h_ISUB_R(std::ostream&) const;
|
||||
void h_ISUB_M(std::ostream&) const;
|
||||
void h_IMUL_R(std::ostream&) const;
|
||||
void h_IMUL_M(std::ostream&) const;
|
||||
void h_IMULH_R(std::ostream&) const;
|
||||
void h_IMULH_M(std::ostream&) const;
|
||||
void h_ISMULH_R(std::ostream&) const;
|
||||
void h_ISMULH_M(std::ostream&) const;
|
||||
void h_IMUL_RCP(std::ostream&) const;
|
||||
void h_INEG_R(std::ostream&) const;
|
||||
void h_IXOR_R(std::ostream&) const;
|
||||
void h_IXOR_M(std::ostream&) const;
|
||||
void h_IROR_R(std::ostream&) const;
|
||||
void h_IROL_R(std::ostream&) const;
|
||||
void h_ISWAP_R(std::ostream&) const;
|
||||
void h_FSWAP_R(std::ostream&) const;
|
||||
void h_FADD_R(std::ostream&) const;
|
||||
void h_FADD_M(std::ostream&) const;
|
||||
void h_FSUB_R(std::ostream&) const;
|
||||
void h_FSUB_M(std::ostream&) const;
|
||||
void h_FSCAL_R(std::ostream&) const;
|
||||
void h_FMUL_R(std::ostream&) const;
|
||||
void h_FDIV_M(std::ostream&) const;
|
||||
void h_FSQRT_R(std::ostream&) const;
|
||||
void h_COND_R(std::ostream&) const;
|
||||
void h_CFROUND(std::ostream&) const;
|
||||
void h_ISTORE(std::ostream&) const;
|
||||
void h_NOP(std::ostream&) const;
|
||||
void h_IADD_RS(std::ostream&) const;
|
||||
void h_IADD_M(std::ostream&) const;
|
||||
void h_ISUB_R(std::ostream&) const;
|
||||
void h_ISUB_M(std::ostream&) const;
|
||||
void h_IMUL_R(std::ostream&) const;
|
||||
void h_IMUL_M(std::ostream&) const;
|
||||
void h_IMULH_R(std::ostream&) const;
|
||||
void h_IMULH_M(std::ostream&) const;
|
||||
void h_ISMULH_R(std::ostream&) const;
|
||||
void h_ISMULH_M(std::ostream&) const;
|
||||
void h_IMUL_RCP(std::ostream&) const;
|
||||
void h_INEG_R(std::ostream&) const;
|
||||
void h_IXOR_R(std::ostream&) const;
|
||||
void h_IXOR_M(std::ostream&) const;
|
||||
void h_IROR_R(std::ostream&) const;
|
||||
void h_IROL_R(std::ostream&) const;
|
||||
void h_ISWAP_R(std::ostream&) const;
|
||||
void h_FSWAP_R(std::ostream&) const;
|
||||
void h_FADD_R(std::ostream&) const;
|
||||
void h_FADD_M(std::ostream&) const;
|
||||
void h_FSUB_R(std::ostream&) const;
|
||||
void h_FSUB_M(std::ostream&) const;
|
||||
void h_FSCAL_R(std::ostream&) const;
|
||||
void h_FMUL_R(std::ostream&) const;
|
||||
void h_FDIV_M(std::ostream&) const;
|
||||
void h_FSQRT_R(std::ostream&) const;
|
||||
void h_COND_R(std::ostream&) const;
|
||||
void h_CFROUND(std::ostream&) const;
|
||||
void h_ISTORE(std::ostream&) const;
|
||||
void h_NOP(std::ostream&) const;
|
||||
};
|
||||
|
||||
static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
|
||||
|
|
|
@ -318,18 +318,6 @@ constexpr uint64_t ieee_get_exponent_mask() {
|
|||
return (uint64_t)(E + 1023U) << 52;
|
||||
}
|
||||
|
||||
template<int E>
|
||||
__m128d ieee_set_exponent(__m128d x) {
|
||||
static_assert(E > -1023, "Invalid exponent value");
|
||||
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
|
||||
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
|
||||
constexpr uint64_t exponent64 = (uint64_t)(E + 1023U) << 52;
|
||||
const __m128d exponentMask = _mm_castsi128_pd(_mm_set_epi64x(exponent64, exponent64));
|
||||
x = _mm_and_pd(x, mantissaMask);
|
||||
x = _mm_or_pd(x, exponentMask);
|
||||
return x;
|
||||
}
|
||||
|
||||
double loadDoublePortable(const void* addr);
|
||||
uint64_t mulh(uint64_t, uint64_t);
|
||||
int64_t smulh(int64_t, int64_t);
|
||||
|
|
|
@ -20,8 +20,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include <stdexcept>
|
||||
#include "jit_compiler_x86.hpp"
|
||||
|
||||
#define RANDOMX_JUMP
|
||||
|
||||
#if !defined(_M_X64) && !defined(__x86_64__)
|
||||
namespace randomx {
|
||||
|
||||
|
@ -113,7 +111,6 @@ namespace randomx {
|
|||
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
|
||||
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
||||
|
@ -128,8 +125,7 @@ namespace randomx {
|
|||
|
||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||
const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
|
||||
const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight;
|
||||
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
|
||||
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
|
||||
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
|
||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||
|
@ -299,7 +295,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
#if RANDOMX_JUMP
|
||||
instructionOffsets.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
|
@ -336,7 +332,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
#if RANDOMX_JUMP
|
||||
instructionOffsets.push_back(codePos);
|
||||
#endif
|
||||
auto generator = engine[instr.opcode];
|
||||
|
@ -467,15 +463,6 @@ namespace randomx {
|
|||
|
||||
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
/*if (instr.src != instr.dst) {
|
||||
emit(REX_ADD_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}*/
|
||||
emit(REX_LEA);
|
||||
if (instr.dst == RegisterNeedsDisplacement)
|
||||
emitByte(0xac);
|
||||
|
@ -505,14 +492,6 @@ namespace randomx {
|
|||
emitByte((scale << 6) | (index << 3) | base);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_LEA);
|
||||
emitByte(0x84 + 8 * instr.dst);
|
||||
genSIB(0, instr.src, instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
|
@ -541,14 +520,6 @@ namespace randomx {
|
|||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_LEA);
|
||||
emitByte(0x84 + 8 * instr.dst);
|
||||
genSIB(3, instr.dst, instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
|
@ -645,10 +616,6 @@ namespace randomx {
|
|||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) {
|
||||
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_NEG);
|
||||
|
@ -729,17 +696,14 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_ADDPD);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
//emit(REX_PADD);
|
||||
//emitByte(PADD_OPCODES[instr.mod % 4]);
|
||||
//emitByte(0xf8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ADDPD);
|
||||
|
@ -747,17 +711,14 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_SUBPD);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
//emit(REX_PADD);
|
||||
//emitByte(PADD_OPCODES[instr.mod % 4]);
|
||||
//emitByte(0xf8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_SUBPD);
|
||||
|
@ -765,40 +726,20 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
emit(REX_XORPS);
|
||||
emitByte(0xc7 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_MULPD);
|
||||
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ANDPS_XMM12);
|
||||
emit(REX_MULPD);
|
||||
emitByte(0xe4 + 8 * instr.dst);
|
||||
emit(REX_MAXPD);
|
||||
emitByte(0xe5 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.src %= 4;
|
||||
emit(REX_DIVPD);
|
||||
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
||||
emit(REX_MAXPD);
|
||||
emitByte(0xe5 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ANDPS_XMM12);
|
||||
|
@ -807,7 +748,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||
instr.dst %= 4;
|
||||
instr.dst %= RegisterCountFlt;
|
||||
emit(SQRTPD);
|
||||
emitByte(0xe4 + 9 * instr.dst);
|
||||
}
|
||||
|
@ -883,7 +824,7 @@ namespace randomx {
|
|||
|
||||
void JitCompilerX86::handleCondition(Instruction& instr, int i) {
|
||||
const int shift = instr.getModShift();
|
||||
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
|
||||
const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
|
||||
int reg = getConditionRegister();
|
||||
int target = registerUsage[reg] + 1;
|
||||
emit(REX_ADD_I);
|
||||
|
@ -900,7 +841,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
void JitCompilerX86::h_COND_R(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
#if RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
emit(XOR_ECX_ECX);
|
||||
|
@ -914,40 +855,15 @@ namespace randomx {
|
|||
emitByte(0xc1 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_COND_M(Instruction& instr, int i) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
emit(XOR_ECX_ECX);
|
||||
genAddressReg(instr);
|
||||
emit(REX_CMP_M32I);
|
||||
emit32(instr.getImm32());
|
||||
emitByte(0x0f);
|
||||
emitByte(condition(instr));
|
||||
emitByte(0xc1);
|
||||
emit(REX_ADD_RM);
|
||||
emitByte(0xc1 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr);
|
||||
//if (instr.getModCond())
|
||||
emit(REX_MOV_MR);
|
||||
//else
|
||||
// emit(MOVNTI);
|
||||
emitByte(0x04 + 8 * instr.src);
|
||||
emitByte(0x06);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr, true);
|
||||
emit(MOVAPD);
|
||||
emitByte(0x04 + 8 * instr.src);
|
||||
emitByte(0x06);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
|
||||
emitByte(0x90);
|
||||
emit(NOP1);
|
||||
}
|
||||
|
||||
#include "instruction_weights.hpp"
|
||||
|
|
|
@ -110,43 +110,36 @@ namespace randomx {
|
|||
codePos += count;
|
||||
}
|
||||
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_IADD_RC(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_9C(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_ISDIV_C(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FMUL_M(Instruction&, int);
|
||||
void h_FDIV_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_COND_R(Instruction&, int);
|
||||
void h_COND_M(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_FSTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_COND_R(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
};
|
||||
|
||||
}
|
|
@ -31,7 +31,6 @@
|
|||
.global DECL(randomx_program_loop_load)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_read_dataset_light)
|
||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||
.global DECL(randomx_program_loop_store)
|
||||
|
@ -66,9 +65,6 @@ DECL(randomx_program_start):
|
|||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_light):
|
||||
#include "asm/program_read_dataset_light.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_init):
|
||||
#include "asm/program_read_dataset_sshash_init.inc"
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ PUBLIC randomx_program_loop_begin
|
|||
PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_light
|
||||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_dataset_init
|
||||
|
@ -62,10 +61,6 @@ randomx_program_read_dataset PROC
|
|||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_read_dataset_light PROC
|
||||
include asm/program_read_dataset_light.inc
|
||||
randomx_program_read_dataset_light ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_init PROC
|
||||
include asm/program_read_dataset_sshash_init.inc
|
||||
randomx_program_read_dataset_sshash_init ENDP
|
||||
|
|
|
@ -25,7 +25,6 @@ extern "C" {
|
|||
void randomx_program_loop_load();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_read_dataset_light();
|
||||
void randomx_program_read_dataset_sshash_init();
|
||||
void randomx_program_read_dataset_sshash_fin();
|
||||
void randomx_program_loop_store();
|
||||
|
|
|
@ -76,22 +76,6 @@ void randomx_vm::initialize() {
|
|||
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
|
||||
}
|
||||
|
||||
//TODO
|
||||
std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) {
|
||||
for (int i = 0; i < randomx::RegistersCount; ++i)
|
||||
os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
|
||||
<< " = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl
|
||||
<< " = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec;
|
||||
for (int i = 0; i < 4; ++i)
|
||||
os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl
|
||||
<< " = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec;
|
||||
return os;
|
||||
}
|
||||
|
||||
namespace randomx {
|
||||
|
||||
alignas(16) volatile static __m128i aesDummy;
|
||||
|
|
|
@ -17,10 +17,6 @@ You should have received a copy of the GNU General Public License
|
|||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//#define TRACE
|
||||
//#define FPUCHECK
|
||||
#define RANDOMX_JUMP
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
|
@ -33,12 +29,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "intrin_portable.h"
|
||||
#include "reciprocal.h"
|
||||
|
||||
#ifdef FPUCHECK
|
||||
constexpr bool fpuCheck = true;
|
||||
#else
|
||||
constexpr bool fpuCheck = false;
|
||||
#endif
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static int_reg_t Zero = 0;
|
||||
|
@ -53,49 +43,16 @@ namespace randomx {
|
|||
void InterpretedVm<Allocator, softAes>::run(void* seed) {
|
||||
VmBase<Allocator, softAes>::generateProgram(seed);
|
||||
randomx_vm::initialize();
|
||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
program(i).src %= RegistersCount;
|
||||
program(i).dst %= RegistersCount;
|
||||
}
|
||||
execute();
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
|
||||
executeBytecode(ic, r, f, e, a);
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
|
||||
for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
|
||||
executeBytecode(pc, r, f, e, a);
|
||||
}
|
||||
}
|
||||
|
||||
static void print(int_reg_t r) {
|
||||
std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
|
||||
}
|
||||
|
||||
static void print(__m128d f) {
|
||||
uint64_t lo = *(((uint64_t*)&f) + 0);
|
||||
uint64_t hi = *(((uint64_t*)&f) + 1);
|
||||
std::cout << std::hex << std::setw(16) << std::setfill('0') << hi << '-' << std::hex << std::setw(16) << std::setfill('0') << lo << std::endl;
|
||||
}
|
||||
|
||||
static void printState(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
std::cout << "r" << i << " = "; print(r[i]);
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
std::cout << "f" << i << " = "; print(f[i]);
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
std::cout << "e" << i << " = "; print(e[i]);
|
||||
}
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
std::cout << "a" << i << " = "; print(a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static bool isDenormal(double x) {
|
||||
return std::fpclassify(x) == FP_SUBNORMAL;
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
FORCE_INLINE void* InterpretedVm<Allocator, softAes>::getScratchpadAddress(InstructionByteCode& ibc) {
|
||||
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
|
||||
|
@ -113,9 +70,8 @@ namespace randomx {
|
|||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
auto& ibc = byteCode[ic];
|
||||
if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
|
||||
void InterpretedVm<Allocator, softAes>::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
|
||||
auto& ibc = byteCode[pc];
|
||||
switch (ibc.type)
|
||||
{
|
||||
case InstructionType::IADD_RS: {
|
||||
|
@ -225,11 +181,11 @@ namespace randomx {
|
|||
} break;
|
||||
|
||||
case InstructionType::COND_R: {
|
||||
#ifdef RANDOMX_JUMP
|
||||
#if RANDOMX_JUMP
|
||||
*ibc.creg += (1 << ibc.shift);
|
||||
const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
|
||||
const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift;
|
||||
if ((*ibc.creg & conditionMask) == 0) {
|
||||
ic = ibc.target;
|
||||
pc = ibc.target;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@ -251,50 +207,23 @@ namespace randomx {
|
|||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
if (trace && ibc.type != InstructionType::NOP) {
|
||||
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
|
||||
print(*ibc.idst);
|
||||
else //if(ibc.type >= 20 && ibc.type <= 30)
|
||||
print(0);
|
||||
}
|
||||
#ifdef FPUCHECK
|
||||
if (ibc.type >= 26 && ibc.type <= 30) {
|
||||
double lo = *(((double*)ibc.fdst) + 0);
|
||||
double hi = *(((double*)ibc.fdst) + 1);
|
||||
if (lo <= 0 || hi <= 0) {
|
||||
std::stringstream ss;
|
||||
ss << "Underflow in operation " << ibc.type;
|
||||
printState(r, f, e, a);
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::execute() {
|
||||
int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
__m128d f[4];
|
||||
__m128d e[4];
|
||||
__m128d a[4];
|
||||
int_reg_t r[RegistersCount] = { 0 };
|
||||
__m128d f[RegisterCountFlt];
|
||||
__m128d e[RegisterCountFlt];
|
||||
__m128d a[RegisterCountFlt];
|
||||
|
||||
a[0] = _mm_load_pd(®.a[0].lo);
|
||||
a[1] = _mm_load_pd(®.a[1].lo);
|
||||
a[2] = _mm_load_pd(®.a[2].lo);
|
||||
a[3] = _mm_load_pd(®.a[3].lo);
|
||||
for(unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
a[i] = _mm_load_pd(®.a[i].lo);
|
||||
|
||||
precompileProgram(r, f, e, a);
|
||||
|
||||
uint32_t spAddr0 = mem.mx;
|
||||
uint32_t spAddr1 = mem.ma;
|
||||
|
||||
if (trace) {
|
||||
std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
|
||||
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
|
||||
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
|
||||
printState(r, f, e, a);
|
||||
}
|
||||
|
||||
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
|
||||
uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
|
||||
spAddr0 ^= spMix;
|
||||
|
@ -302,31 +231,14 @@ namespace randomx {
|
|||
spAddr1 ^= spMix >> 32;
|
||||
spAddr1 &= ScratchpadL3Mask64;
|
||||
|
||||
r[0] ^= load64(scratchpad + spAddr0 + 0);
|
||||
r[1] ^= load64(scratchpad + spAddr0 + 8);
|
||||
r[2] ^= load64(scratchpad + spAddr0 + 16);
|
||||
r[3] ^= load64(scratchpad + spAddr0 + 24);
|
||||
r[4] ^= load64(scratchpad + spAddr0 + 32);
|
||||
r[5] ^= load64(scratchpad + spAddr0 + 40);
|
||||
r[6] ^= load64(scratchpad + spAddr0 + 48);
|
||||
r[7] ^= load64(scratchpad + spAddr0 + 56);
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
|
||||
|
||||
f[0] = load_cvt_i32x2(scratchpad + spAddr1 + 0);
|
||||
f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
|
||||
f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
|
||||
f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
|
||||
e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
|
||||
e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
|
||||
e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
|
||||
e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
f[i] = load_cvt_i32x2(scratchpad + spAddr1 + 8 * i);
|
||||
|
||||
if (trace) {
|
||||
std::cout << "iteration " << std::dec << ic << std::endl;
|
||||
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
|
||||
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
|
||||
printState(r, f, e, a);
|
||||
std::cout << "-----------------------------------" << std::endl;
|
||||
}
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
e[i] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
|
||||
|
||||
executeBytecode(r, f, e, a);
|
||||
|
||||
|
@ -335,72 +247,33 @@ namespace randomx {
|
|||
datasetRead(datasetOffset + mem.ma, r);
|
||||
std::swap(mem.mx, mem.ma);
|
||||
|
||||
if (trace) {
|
||||
std::cout << "iteration " << std::dec << ic << std::endl;
|
||||
std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
|
||||
std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
|
||||
printState(r, f, e, a);
|
||||
std::cout << "===================================" << std::endl;
|
||||
}
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
store64(scratchpad + spAddr1 + 8 * i, r[i]);
|
||||
|
||||
store64(scratchpad + spAddr1 + 0, r[0]);
|
||||
store64(scratchpad + spAddr1 + 8, r[1]);
|
||||
store64(scratchpad + spAddr1 + 16, r[2]);
|
||||
store64(scratchpad + spAddr1 + 24, r[3]);
|
||||
store64(scratchpad + spAddr1 + 32, r[4]);
|
||||
store64(scratchpad + spAddr1 + 40, r[5]);
|
||||
store64(scratchpad + spAddr1 + 48, r[6]);
|
||||
store64(scratchpad + spAddr1 + 56, r[7]);
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
f[i] = _mm_xor_pd(f[i], e[i]);
|
||||
|
||||
f[0] = _mm_xor_pd(f[0], e[0]);
|
||||
f[1] = _mm_xor_pd(f[1], e[1]);
|
||||
f[2] = _mm_xor_pd(f[2], e[2]);
|
||||
f[3] = _mm_xor_pd(f[3], e[3]);
|
||||
|
||||
#ifdef FPUCHECK
|
||||
for(int i = 0; i < 4; ++i) {
|
||||
double lo = *(((double*)&f[i]) + 0);
|
||||
double hi = *(((double*)&f[i]) + 1);
|
||||
if (isDenormal(lo) || isDenormal(hi)) {
|
||||
std::stringstream ss;
|
||||
ss << "Denormal f" << i;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
_mm_store_pd((double*)(scratchpad + spAddr0 + 0), f[0]);
|
||||
_mm_store_pd((double*)(scratchpad + spAddr0 + 16), f[1]);
|
||||
_mm_store_pd((double*)(scratchpad + spAddr0 + 32), f[2]);
|
||||
_mm_store_pd((double*)(scratchpad + spAddr0 + 48), f[3]);
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
_mm_store_pd((double*)(scratchpad + spAddr0 + 16 * i), f[i]);
|
||||
|
||||
spAddr0 = 0;
|
||||
spAddr1 = 0;
|
||||
}
|
||||
|
||||
store64(®.r[0], r[0]);
|
||||
store64(®.r[1], r[1]);
|
||||
store64(®.r[2], r[2]);
|
||||
store64(®.r[3], r[3]);
|
||||
store64(®.r[4], r[4]);
|
||||
store64(®.r[5], r[5]);
|
||||
store64(®.r[6], r[6]);
|
||||
store64(®.r[7], r[7]);
|
||||
for (unsigned i = 0; i < RegistersCount; ++i)
|
||||
store64(®.r[i], r[i]);
|
||||
|
||||
_mm_store_pd(®.f[0].lo, f[0]);
|
||||
_mm_store_pd(®.f[1].lo, f[1]);
|
||||
_mm_store_pd(®.f[2].lo, f[2]);
|
||||
_mm_store_pd(®.f[3].lo, f[3]);
|
||||
_mm_store_pd(®.e[0].lo, e[0]);
|
||||
_mm_store_pd(®.e[1].lo, e[1]);
|
||||
_mm_store_pd(®.e[2].lo, e[2]);
|
||||
_mm_store_pd(®.e[3].lo, e[3]);
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
_mm_store_pd(®.f[i].lo, f[i]);
|
||||
|
||||
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
||||
_mm_store_pd(®.e[i].lo, e[i]);
|
||||
}
|
||||
|
||||
static int getConditionRegister(int(®isterUsage)[8]) {
|
||||
static int getConditionRegister(int(®isterUsage)[RegistersCount]) {
|
||||
int min = INT_MAX;
|
||||
int minIndex;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||
if (registerUsage[i] < min) {
|
||||
min = registerUsage[i];
|
||||
minIndex = i;
|
||||
|
@ -410,7 +283,7 @@ namespace randomx {
|
|||
}
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[8]) {
|
||||
void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[RegistersCount]) {
|
||||
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
||||
for (int i = 0; i < RegistersCount; ++i)
|
||||
r[i] ^= datasetLine[i];
|
||||
|
@ -419,9 +292,9 @@ namespace randomx {
|
|||
#include "instruction_weights.hpp"
|
||||
|
||||
template<class Allocator, bool softAes>
|
||||
void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
|
||||
int registerUsage[8];
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
|
||||
int registerUsage[RegistersCount];
|
||||
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
|
@ -443,7 +316,7 @@ namespace randomx {
|
|||
ibc.shift = instr.getModMem();
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IADD_M) {
|
||||
|
@ -452,7 +325,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::IADD_M;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.src != instr.dst) {
|
||||
if (src != dst) {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -460,7 +333,7 @@ namespace randomx {
|
|||
ibc.isrc = &Zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISUB_R) {
|
||||
|
@ -475,7 +348,7 @@ namespace randomx {
|
|||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISUB_M) {
|
||||
|
@ -484,7 +357,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::ISUB_M;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.src != instr.dst) {
|
||||
if (src != dst) {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -492,7 +365,7 @@ namespace randomx {
|
|||
ibc.isrc = &Zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_R) {
|
||||
|
@ -507,7 +380,7 @@ namespace randomx {
|
|||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_M) {
|
||||
|
@ -516,7 +389,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::IMUL_M;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.src != instr.dst) {
|
||||
if (src != dst) {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -524,7 +397,7 @@ namespace randomx {
|
|||
ibc.isrc = &Zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMULH_R) {
|
||||
|
@ -533,7 +406,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::IMULH_R;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMULH_M) {
|
||||
|
@ -542,7 +415,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::IMULH_M;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.src != instr.dst) {
|
||||
if (src != dst) {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -550,7 +423,7 @@ namespace randomx {
|
|||
ibc.isrc = &Zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISMULH_R) {
|
||||
|
@ -559,7 +432,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::ISMULH_R;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISMULH_M) {
|
||||
|
@ -568,7 +441,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::ISMULH_M;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.src != instr.dst) {
|
||||
if (src != dst) {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -576,7 +449,7 @@ namespace randomx {
|
|||
ibc.isrc = &Zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IMUL_RCP) {
|
||||
|
@ -587,7 +460,7 @@ namespace randomx {
|
|||
ibc.idst = &r[dst];
|
||||
ibc.imm = randomx_reciprocal(divisor);
|
||||
ibc.isrc = &ibc.imm;
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
|
@ -598,7 +471,7 @@ namespace randomx {
|
|||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::INEG_R;
|
||||
ibc.idst = &r[dst];
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IXOR_R) {
|
||||
|
@ -613,7 +486,7 @@ namespace randomx {
|
|||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IXOR_M) {
|
||||
|
@ -622,7 +495,7 @@ namespace randomx {
|
|||
ibc.type = InstructionType::IXOR_M;
|
||||
ibc.idst = &r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.src != instr.dst) {
|
||||
if (src != dst) {
|
||||
ibc.isrc = &r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -630,7 +503,7 @@ namespace randomx {
|
|||
ibc.isrc = &Zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IROR_R) {
|
||||
|
@ -645,7 +518,7 @@ namespace randomx {
|
|||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(IROL_R) {
|
||||
|
@ -660,7 +533,7 @@ namespace randomx {
|
|||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[dst] = i;
|
||||
} break;
|
||||
|
||||
CASE_REP(ISWAP_R) {
|
||||
|
@ -670,8 +543,8 @@ namespace randomx {
|
|||
ibc.idst = &r[dst];
|
||||
ibc.isrc = &r[src];
|
||||
ibc.type = InstructionType::ISWAP_R;
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
registerUsage[dst] = i;
|
||||
registerUsage[src] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
|
@ -681,23 +554,23 @@ namespace randomx {
|
|||
CASE_REP(FSWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::FSWAP_R;
|
||||
if (dst < 4)
|
||||
if (dst < RegisterCountFlt)
|
||||
ibc.fdst = &f[dst];
|
||||
else
|
||||
ibc.fdst = &e[dst - 4];
|
||||
ibc.fdst = &e[dst - RegisterCountFlt];
|
||||
} break;
|
||||
|
||||
CASE_REP(FADD_R) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto src = instr.src % 4;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FADD_R;
|
||||
ibc.fdst = &f[dst];
|
||||
ibc.fsrc = &a[src];
|
||||
} break;
|
||||
|
||||
CASE_REP(FADD_M) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto src = instr.src % 8;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FADD_M;
|
||||
ibc.fdst = &f[dst];
|
||||
ibc.isrc = &r[src];
|
||||
|
@ -706,16 +579,16 @@ namespace randomx {
|
|||
} break;
|
||||
|
||||
CASE_REP(FSUB_R) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto src = instr.src % 4;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSUB_R;
|
||||
ibc.fdst = &f[dst];
|
||||
ibc.fsrc = &a[src];
|
||||
} break;
|
||||
|
||||
CASE_REP(FSUB_M) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto src = instr.src % 8;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FSUB_M;
|
||||
ibc.fdst = &f[dst];
|
||||
ibc.isrc = &r[src];
|
||||
|
@ -724,22 +597,22 @@ namespace randomx {
|
|||
} break;
|
||||
|
||||
CASE_REP(FSCAL_R) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.fdst = &f[dst];
|
||||
ibc.type = InstructionType::FSCAL_R;
|
||||
} break;
|
||||
|
||||
CASE_REP(FMUL_R) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto src = instr.src % 4;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FMUL_R;
|
||||
ibc.fdst = &e[dst];
|
||||
ibc.fsrc = &a[src];
|
||||
} break;
|
||||
|
||||
CASE_REP(FDIV_M) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto src = instr.src % 8;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FDIV_M;
|
||||
ibc.fdst = &e[dst];
|
||||
ibc.isrc = &r[src];
|
||||
|
@ -748,7 +621,7 @@ namespace randomx {
|
|||
} break;
|
||||
|
||||
CASE_REP(FSQRT_R) {
|
||||
auto dst = instr.dst % 4;
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSQRT_R;
|
||||
ibc.fdst = &e[dst];
|
||||
} break;
|
||||
|
@ -766,13 +639,13 @@ namespace randomx {
|
|||
ibc.target = registerUsage[reg];
|
||||
ibc.shift = instr.getModShift();
|
||||
ibc.creg = &r[reg];
|
||||
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
} break;
|
||||
|
||||
CASE_REP(CFROUND) {
|
||||
auto src = instr.src % 8;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.isrc = &r[src];
|
||||
ibc.type = InstructionType::CFROUND;
|
||||
ibc.imm = instr.getImm32() & 63;
|
||||
|
|
|
@ -71,12 +71,12 @@ namespace randomx {
|
|||
void run(void* seed) override;
|
||||
void setDataset(randomx_dataset* dataset) override;
|
||||
protected:
|
||||
virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]);
|
||||
virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[RegistersCount]);
|
||||
private:
|
||||
void execute();
|
||||
void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
|
||||
void precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
|
||||
void executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
|
||||
void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
|
||||
void* getScratchpadAddress(InstructionByteCode& ibc);
|
||||
__m128d maskRegisterExponentMantissa(__m128d);
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@
|
|||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<SDLCheck>false</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
|
|
|
@ -26,20 +26,20 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<ConfigurationType>StaticLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
|
|
Loading…
Reference in a new issue