Added explicit STORE instructions

JIT compiler
This commit is contained in:
tevador 2019-01-27 10:52:30 +01:00
parent d2cb086221
commit 005c67f64c
27 changed files with 1751 additions and 1518 deletions

View file

@ -75,6 +75,11 @@ namespace RandomX {
asmCode << "\tand " << reg << ", " << ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl; asmCode << "\tand " << reg << ", " << ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
} }
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl;
asmCode << "\tand eax" << ", " << ((instr.alt % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
}
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) { int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
return instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); return instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
} }
@ -425,7 +430,7 @@ namespace RandomX {
//6 uOPs //6 uOPs
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) { void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
int rotate = (13 - (instr.alt & 63)) & 63; int rotate = (13 - (instr.alt & 63)) & 63;
if (rotate != 0) if (rotate != 0)
asmCode << "\trol rax, " << rotate << std::endl; asmCode << "\trol rax, " << rotate << std::endl;
@ -474,6 +479,18 @@ namespace RandomX {
asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl;
} }
//3 uOPs
void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
asmCode << "\tmov qword ptr [rsi+rax], " << regR[instr.src] << std::endl;
}
//3 uOPs
void AssemblyGeneratorX86::h_FSTORE(Instruction& instr, int i) {
genAddressRegDst(instr, 16);
asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl;
}
#include "instructionWeights.hpp" #include "instructionWeights.hpp"
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x)) #define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
@ -520,5 +537,8 @@ namespace RandomX {
INST_HANDLE(COND_R) INST_HANDLE(COND_R)
INST_HANDLE(COND_M) INST_HANDLE(COND_M)
INST_HANDLE(CFROUND) INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(FSTORE)
}; };
} }

View file

@ -38,16 +38,8 @@ namespace RandomX {
static InstructionGenerator engine[256]; static InstructionGenerator engine[256];
std::stringstream asmCode; std::stringstream asmCode;
void gena(Instruction&, int);
void genar(Instruction&, int);
void genaf(Instruction&, int);
void genbiashift(Instruction&, const char*);
void genbia(Instruction&);
void genbia32(Instruction&);
void genbf(Instruction&, const char*);
void gencr(Instruction&, bool);
void gencf(Instruction&, bool);
void genAddressReg(Instruction&, const char*); void genAddressReg(Instruction&, const char*);
void genAddressRegDst(Instruction&, int);
int32_t genAddressImm(Instruction&); int32_t genAddressImm(Instruction&);
void generateCode(Instruction&, int); void generateCode(Instruction&, int);
@ -85,5 +77,7 @@ namespace RandomX {
void h_COND_R(Instruction&, int); void h_COND_R(Instruction&, int);
void h_COND_M(Instruction&, int); void h_COND_M(Instruction&, int);
void h_CFROUND(Instruction&, int); void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_FSTORE(Instruction&, int);
}; };
} }

View file

@ -71,14 +71,14 @@ namespace RandomX {
reg.a[i].hi.u64 = getSmallPositiveFloatBits(reg.f[i].hi.u64); reg.a[i].hi.u64 = getSmallPositiveFloatBits(reg.f[i].hi.u64);
} }
compiler.generateProgram(gen); compiler.generateProgram(gen);
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7; mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & -64;
mem.mx = *(((uint32_t*)seed) + 5); mem.mx = *(((uint32_t*)seed) + 5);
} }
void CompiledVirtualMachine::execute() { void CompiledVirtualMachine::execute() {
executeProgram(reg, mem, scratchpad, InstructionCount); //executeProgram(reg, mem, scratchpad, InstructionCount);
totalSize += compiler.getCodeSize(); totalSize += compiler.getCodeSize();
//compiler.getProgramFunc()(reg, mem, scratchpad); compiler.getProgramFunc()(reg, mem, scratchpad, InstructionCount);
#ifdef TRACEVM #ifdef TRACEVM
for (int32_t i = InstructionCount - 1; i >= 0; --i) { for (int32_t i = InstructionCount - 1; i >= 0; --i) {
std::cout << std::hex << tracepad[i].u64 << std::endl; std::cout << std::hex << tracepad[i].u64 << std::endl;

View file

@ -32,6 +32,10 @@ namespace RandomX {
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)src << "]"; os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
} }
void Instruction::genAddressRegDst(std::ostream& os) const {
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)dst << "]";
}
void Instruction::genAddressImm(std::ostream& os) const { void Instruction::genAddressImm(std::ostream& os) const {
os << ((alt % 4) ? "L1" : "L2") << "[" << (imm32 & ((alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]"; os << ((alt % 4) ? "L1" : "L2") << "[" << (imm32 & ((alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]";
} }
@ -276,7 +280,7 @@ namespace RandomX {
} }
void Instruction::h_CFROUND(std::ostream& os) const { void Instruction::h_CFROUND(std::ostream& os) const {
os << "r" << (int)dst << ", " << (alt & 63) << std::endl; os << "r" << (int)src << ", " << (alt & 63) << std::endl;
} }
static inline const char* condition(int index) { static inline const char* condition(int index) {
@ -311,6 +315,18 @@ namespace RandomX {
os << ", " << imm32 << ")" << std::endl; os << ", " << imm32 << ")" << std::endl;
} }
void Instruction::h_ISTORE(std::ostream& os) const {
genAddressRegDst(os);
os << ", r" << (int)src << std::endl;
}
void Instruction::h_FSTORE(std::ostream& os) const {
const char reg = (src >= 4) ? 'e' : 'f';
genAddressRegDst(os);
auto srcIndex = src % 4;
os << ", " << reg << srcIndex << std::endl;
}
#include "instructionWeights.hpp" #include "instructionWeights.hpp"
#define INST_NAME(x) REPN(#x, WT(x)) #define INST_NAME(x) REPN(#x, WT(x))
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x)) #define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
@ -358,6 +374,9 @@ namespace RandomX {
INST_NAME(COND_R) INST_NAME(COND_R)
INST_NAME(COND_M) INST_NAME(COND_M)
INST_NAME(CFROUND) INST_NAME(CFROUND)
INST_NAME(ISTORE)
INST_NAME(FSTORE)
}; };
InstructionVisualizer Instruction::engine[256] = { InstructionVisualizer Instruction::engine[256] = {
@ -403,6 +422,9 @@ namespace RandomX {
INST_HANDLE(COND_R) INST_HANDLE(COND_R)
INST_HANDLE(COND_M) INST_HANDLE(COND_M)
INST_HANDLE(CFROUND) INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(FSTORE)
}; };
} }

View file

@ -49,6 +49,7 @@ namespace RandomX {
void genAddressReg(std::ostream& os) const; void genAddressReg(std::ostream& os) const;
void genAddressImm(std::ostream& os) const; void genAddressImm(std::ostream& os) const;
void genAddressRegDst(std::ostream&) const;
void h_IADD_R(std::ostream&) const; void h_IADD_R(std::ostream&) const;
void h_IADD_M(std::ostream&) const; void h_IADD_M(std::ostream&) const;
@ -83,6 +84,8 @@ namespace RandomX {
void h_COND_R(std::ostream&) const; void h_COND_R(std::ostream&) const;
void h_COND_M(std::ostream&) const; void h_COND_M(std::ostream&) const;
void h_CFROUND(std::ostream&) const; void h_CFROUND(std::ostream&) const;
void h_ISTORE(std::ostream&) const;
void h_FSTORE(std::ostream&) const;
}; };
static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction"); static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction");

View file

@ -27,11 +27,16 @@
#define DECL(x) x #define DECL(x) x
#endif #endif
.global DECL(randomx_program_prologue) .global DECL(randomx_program_prologue)
.global DECL(randomx_program_begin) .global DECL(randomx_loop_begin)
.global DECL(randomx_program_load_int)
.global DECL(randomx_program_load_flt)
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_store_int)
.global DECL(randomx_program_store_flt)
.global DECL(randomx_program_loop_end)
.global DECL(randomx_program_epilogue) .global DECL(randomx_program_epilogue)
.global DECL(randomx_program_read)
.global DECL(randomx_program_end) .global DECL(randomx_program_end)
.global DECL(randomx_program_transform)
#define db .byte #define db .byte
@ -40,21 +45,37 @@ DECL(randomx_program_prologue):
#include "asm/program_prologue_linux.inc" #include "asm/program_prologue_linux.inc"
.align 64 .align 64
DECL(randomx_program_begin): #include "asm/program_xmm_constants.inc"
.align 64
DECL(randomx_loop_begin):
nop
DECL(randomx_program_load_int):
#include "asm/program_load_int.inc"
DECL(randomx_program_load_flt):
#include "asm/program_load_flt.inc"
DECL(randomx_program_start):
nop
DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc"
DECL(randomx_program_store_int):
#include "asm/program_store_int.inc"
DECL(randomx_program_store_flt):
#include "asm/program_store_flt.inc"
DECL(randomx_program_loop_end):
nop nop
.align 64 .align 64
DECL(randomx_program_epilogue): DECL(randomx_program_epilogue):
#include "asm/program_epilogue_linux.inc" #include "asm/program_epilogue_linux.inc"
.align 64
DECL(randomx_program_read):
#include "asm/program_read.inc"
.align 64 .align 64
DECL(randomx_program_end): DECL(randomx_program_end):
nop nop
.align 8
DECL(randomx_program_transform):
#include "asm/program_transform_address.inc"

View file

@ -20,12 +20,16 @@ IFDEF RAX
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE _RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_program_prologue PUBLIC randomx_program_prologue
PUBLIC randomx_program_begin PUBLIC randomx_loop_begin
PUBLIC randomx_program_load_int
PUBLIC randomx_program_load_flt
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_store_int
PUBLIC randomx_program_store_flt
PUBLIC randomx_program_loop_end
PUBLIC randomx_program_epilogue PUBLIC randomx_program_epilogue
PUBLIC randomx_program_read
PUBLIC randomx_program_end PUBLIC randomx_program_end
PUBLIC randomx_program_transform
ALIGN 64 ALIGN 64
randomx_program_prologue PROC randomx_program_prologue PROC
@ -33,30 +37,51 @@ randomx_program_prologue PROC
randomx_program_prologue ENDP randomx_program_prologue ENDP
ALIGN 64 ALIGN 64
randomx_program_begin PROC include asm/program_xmm_constants.inc
ALIGN 64
randomx_loop_begin PROC
nop nop
randomx_program_begin ENDP randomx_loop_begin ENDP
randomx_program_load_int PROC
include asm/program_load_int.inc
randomx_program_load_int ENDP
randomx_program_load_flt PROC
include asm/program_load_flt.inc
randomx_program_load_flt ENDP
randomx_program_start PROC
nop
randomx_program_start ENDP
randomx_program_read_dataset PROC
include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP
randomx_program_store_int PROC
include asm/program_store_int.inc
randomx_program_store_int ENDP
randomx_program_store_flt PROC
include asm/program_store_flt.inc
randomx_program_store_flt ENDP
randomx_program_loop_end PROC
nop
randomx_program_loop_end ENDP
ALIGN 64 ALIGN 64
randomx_program_epilogue PROC randomx_program_epilogue PROC
include asm/program_epilogue_win64.inc include asm/program_epilogue_win64.inc
randomx_program_epilogue ENDP randomx_program_epilogue ENDP
ALIGN 64
randomx_program_read PROC
include asm/program_read.inc
randomx_program_read ENDP
ALIGN 64 ALIGN 64
randomx_program_end PROC randomx_program_end PROC
nop nop
randomx_program_end ENDP randomx_program_end ENDP
ALIGN 8
randomx_program_transform PROC
include asm/program_transform_address.inc
randomx_program_transform ENDP
_RANDOMX_JITX86_STATIC ENDS _RANDOMX_JITX86_STATIC ENDS
ENDIF ENDIF

View file

@ -19,9 +19,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
extern "C" { extern "C" {
void randomx_program_prologue(); void randomx_program_prologue();
void randomx_program_begin(); void randomx_loop_begin();
void randomx_program_load_int();
void randomx_program_load_flt();
void randomx_program_start();
void randomx_program_read_dataset();
void randomx_program_store_int();
void randomx_program_store_flt();
void randomx_program_loop_end();
void randomx_program_epilogue(); void randomx_program_epilogue();
void randomx_program_transform();
void randomx_program_read();
void randomx_program_end(); void randomx_program_end();
} }

File diff suppressed because it is too large Load diff

View file

@ -30,16 +30,10 @@ namespace RandomX {
class JitCompilerX86; class JitCompilerX86;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int); typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&);
constexpr uint32_t CodeSize = 64 * 1024; constexpr uint32_t CodeSize = 64 * 1024;
struct CallOffset {
CallOffset(int32_t p, int32_t i) : pos(p), index(i) {}
int32_t pos;
int32_t index;
};
class JitCompilerX86 { class JitCompilerX86 {
public: public:
JitCompilerX86(); JitCompilerX86();
@ -55,66 +49,82 @@ namespace RandomX {
static InstructionGeneratorX86 engine[256]; static InstructionGeneratorX86 engine[256];
uint8_t* code; uint8_t* code;
int32_t codePos; int32_t codePos;
std::vector<int32_t> instructionOffsets;
std::vector<CallOffset> callOffsets;
void gena(Instruction&); void genAddressReg(Instruction&, bool);
void genar(Instruction&); void genAddressRegDst(Instruction&, bool);
void genaf(Instruction&); void genAddressImm(Instruction&);
void genbiashift(Instruction&, uint16_t, uint16_t); void genSIB(int scale, int index, int base);
void genbia(Instruction&, uint16_t, uint16_t);
void genbia32(Instruction&, uint16_t, uint8_t); void generateCode(Instruction&);
void genbf(Instruction&, uint8_t);
void scratchpadStoreR(Instruction&, uint32_t, bool);
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
void gencr(Instruction&, bool);
void gencf(Instruction&);
void generateCode(Instruction&, int);
void fixCallOffsets();
void emitByte(uint8_t val) { void emitByte(uint8_t val) {
code[codePos] = val; code[codePos] = val;
codePos++; codePos++;
} }
template<typename T> void emit32(uint32_t val) {
void emit(T val) { code[codePos + 0] = val;
*reinterpret_cast<T*>(code + codePos) = val; code[codePos + 1] = val >> 8;
codePos += sizeof(T); code[codePos + 2] = val >> 16;
code[codePos + 3] = val >> 24;
codePos += 4;
} }
void h_ADD_64(Instruction&, int); void emit64(uint64_t val) {
void h_ADD_32(Instruction&, int); code[codePos + 0] = val;
void h_SUB_64(Instruction&, int); code[codePos + 1] = val >> 8;
void h_SUB_32(Instruction&, int); code[codePos + 2] = val >> 16;
void h_MUL_64(Instruction&, int); code[codePos + 3] = val >> 24;
void h_MULH_64(Instruction&, int); code[codePos + 4] = val >> 32;
void h_MUL_32(Instruction&, int); code[codePos + 5] = val >> 40;
void h_IMUL_32(Instruction&, int); code[codePos + 6] = val >> 48;
void h_IMULH_64(Instruction&, int); code[codePos + 7] = val >> 56;
void h_DIV_64(Instruction&, int); codePos += 8;
void h_IDIV_64(Instruction&, int); }
void h_AND_64(Instruction&, int);
void h_AND_32(Instruction&, int); template<size_t N>
void h_OR_64(Instruction&, int); void emit(const uint8_t (&src)[N]) {
void h_OR_32(Instruction&, int); for (int i = 0; i < N; ++i) {
void h_XOR_64(Instruction&, int); code[codePos + i] = src[i];
void h_XOR_32(Instruction&, int); }
void h_SHL_64(Instruction&, int); codePos += N;
void h_SHR_64(Instruction&, int); }
void h_SAR_64(Instruction&, int);
void h_ROL_64(Instruction&, int); void h_IADD_R(Instruction&);
void h_ROR_64(Instruction&, int); void h_IADD_M(Instruction&);
void h_FPADD(Instruction&, int); void h_IADD_RC(Instruction&);
void h_FPSUB(Instruction&, int); void h_ISUB_R(Instruction&);
void h_FPMUL(Instruction&, int); void h_ISUB_M(Instruction&);
void h_FPDIV(Instruction&, int); void h_IMUL_9C(Instruction&);
void h_FPSQRT(Instruction&, int); void h_IMUL_R(Instruction&);
void h_FPROUND(Instruction&, int); void h_IMUL_M(Instruction&);
void h_JUMP(Instruction&, int); void h_IMULH_R(Instruction&);
void h_CALL(Instruction&, int); void h_IMULH_M(Instruction&);
void h_RET(Instruction&, int); void h_ISMULH_R(Instruction&);
void h_NOP(Instruction&, int); void h_ISMULH_M(Instruction&);
void h_IDIV_C(Instruction&);
void h_ISDIV_C(Instruction&);
void h_INEG_R(Instruction&);
void h_IXOR_R(Instruction&);
void h_IXOR_M(Instruction&);
void h_IROR_R(Instruction&);
void h_IROL_R(Instruction&);
void h_FPSWAP_R(Instruction&);
void h_FPADD_R(Instruction&);
void h_FPADD_M(Instruction&);
void h_FPSUB_R(Instruction&);
void h_FPSUB_M(Instruction&);
void h_FPNEG_R(Instruction&);
void h_FPMUL_R(Instruction&);
void h_FPMUL_M(Instruction&);
void h_FPDIV_R(Instruction&);
void h_FPDIV_M(Instruction&);
void h_FPSQRT_R(Instruction&);
void h_COND_R(Instruction&);
void h_COND_M(Instruction&);
void h_CFROUND(Instruction&);
void h_ISTORE(Instruction&);
void h_FSTORE(Instruction&);
}; };
} }

View file

@ -1,9 +1,5 @@
;# unroll VM stack
mov rsp, rdi
;# save VM register values ;# save VM register values
pop rcx pop rcx
pop rcx
mov qword ptr [rcx+0], r8 mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9 mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10 mov qword ptr [rcx+16], r10
@ -12,12 +8,12 @@
mov qword ptr [rcx+40], r13 mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14 mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15 mov qword ptr [rcx+56], r15
movapd xmmword ptr [rcx+64], xmm8 movdqa xmmword ptr [rcx+64], xmm0
movapd xmmword ptr [rcx+80], xmm9 movdqa xmmword ptr [rcx+80], xmm1
movapd xmmword ptr [rcx+96], xmm2 movdqa xmmword ptr [rcx+96], xmm2
movapd xmmword ptr [rcx+112], xmm3 movdqa xmmword ptr [rcx+112], xmm3
lea rcx, [rcx+64] lea rcx, [rcx+64]
movapd xmmword ptr [rcx+64], xmm4 movdqa xmmword ptr [rcx+64], xmm4
movapd xmmword ptr [rcx+80], xmm5 movdqa xmmword ptr [rcx+80], xmm5
movapd xmmword ptr [rcx+96], xmm6 movdqa xmmword ptr [rcx+96], xmm6
movapd xmmword ptr [rcx+112], xmm7 movdqa xmmword ptr [rcx+112], xmm7

View file

@ -1,6 +1,12 @@
include program_epilogue_store.inc include program_epilogue_store.inc
;# restore callee-saved registers - Microsoft x64 calling convention ;# restore callee-saved registers - Microsoft x64 calling convention
movdqu xmm15, xmmword ptr [rsp]
movdqu xmm14, xmmword ptr [rsp+16]
movdqu xmm13, xmmword ptr [rsp+32]
movdqu xmm12, xmmword ptr [rsp+48]
movdqu xmm11, xmmword ptr [rsp+64]
add rsp, 80
movdqu xmm10, xmmword ptr [rsp] movdqu xmm10, xmmword ptr [rsp]
movdqu xmm9, xmmword ptr [rsp+16] movdqu xmm9, xmmword ptr [rsp+16]
movdqu xmm8, xmmword ptr [rsp+32] movdqu xmm8, xmmword ptr [rsp+32]
@ -17,4 +23,4 @@
pop rbx pop rbx
;# program finished ;# program finished
ret 0 ret

View file

@ -0,0 +1,14 @@
and eax, 262080
lea rcx, [rsi+rax]
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14
andps xmm5, xmm14
andps xmm6, xmm14
andps xmm7, xmm14

View file

@ -0,0 +1,10 @@
and eax, 262080
lea rcx, [rsi+rax]
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

View file

@ -7,13 +7,14 @@
push r15 push r15
;# function arguments ;# function arguments
mov rbx, rcx ;# loop counter
push rdi ;# RegisterFile& registerFile push rdi ;# RegisterFile& registerFile
mov rbp, qword ptr [rsi] ;# "mx", "ma"
mov rax, qword ptr [rsi+8] ;# uint8_t* dataset
push rax
mov rsi, rdx ;# convertible_t* scratchpad
mov rcx, rdi mov rcx, rdi
mov rbp, qword ptr [rsi] ;# "mx", "ma"
mov eax, ebp ;# "mx"
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
mov rsi, rdx ;# convertible_t* scratchpad
#include "program_prologue_load.inc" #include "program_prologue_load.inc"
jmp randomx_program_begin jmp DECL(randomx_loop_begin)

View file

@ -1,27 +1,20 @@
mov rdi, rsp ;# beginning of VM stack ;# zero integer registers
mov ebx, 262145 ;# number of VM instructions to execute + 1 xor r8, r8
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
xorps xmm10, xmm10 ;# load constant registers
cmpeqpd xmm10, xmm10 lea rcx, [rcx+120]
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff movapd xmm8, xmmword ptr [rcx+72]
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl]
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
;# load integer registers
mov r8, qword ptr [rcx+0]
mov r9, qword ptr [rcx+8]
mov r10, qword ptr [rcx+16]
mov r11, qword ptr [rcx+24]
mov r12, qword ptr [rcx+32]
mov r13, qword ptr [rcx+40]
mov r14, qword ptr [rcx+48]
mov r15, qword ptr [rcx+56]
;# load floating point registers
movapd xmm8, xmmword ptr [rcx+64]
movapd xmm9, xmmword ptr [rcx+80]
movapd xmm2, xmmword ptr [rcx+96]
movapd xmm3, xmmword ptr [rcx+112]
lea rcx, [rcx+64]
movapd xmm4, xmmword ptr [rcx+64]
movapd xmm5, xmmword ptr [rcx+80]
movapd xmm6, xmmword ptr [rcx+96]
movapd xmm7, xmmword ptr [rcx+112]

View file

@ -13,14 +13,21 @@
movdqu xmmword ptr [rsp+32], xmm8 movdqu xmmword ptr [rsp+32], xmm8
movdqu xmmword ptr [rsp+16], xmm9 movdqu xmmword ptr [rsp+16], xmm9
movdqu xmmword ptr [rsp+0], xmm10 movdqu xmmword ptr [rsp+0], xmm10
sub rsp, 80
movdqu xmmword ptr [rsp+64], xmm11
movdqu xmmword ptr [rsp+48], xmm12
movdqu xmmword ptr [rsp+32], xmm13
movdqu xmmword ptr [rsp+16], xmm14
movdqu xmmword ptr [rsp+0], xmm15
;# function arguments ; function arguments
push rcx ;# RegisterFile& registerFile push rcx ; RegisterFile& registerFile
mov rbp, qword ptr [rdx] ;# "mx", "ma" mov rbp, qword ptr [rdx] ; "mx", "ma"
mov rax, qword ptr [rdx+8] ;# uint8_t* dataset mov eax, ebp ; "mx"
push rax mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
mov rsi, r8 ;# convertible_t* scratchpad mov rsi, r8 ; convertible_t* scratchpad
mov rbx, r9 ; loop counter
include program_prologue_load.inc include program_prologue_load.inc
jmp randomx_program_begin jmp randomx_loop_begin

View file

@ -1,20 +0,0 @@
db 0, 0, 0, 0 ;# TransformAddress placeholder
mov rcx, qword ptr [rdi] ;# load the dataset address
xor rbp, rax ;# modify "mx"
;# prefetch cacheline "mx"
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rcx+rdx]
;# read cacheline "ma"
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
lea rcx, [rcx+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
ret

View file

@ -0,0 +1,16 @@
xor rbp, rax ;# modify "mx"
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

View file

@ -0,0 +1,11 @@
and eax, 262080
lea rcx, [rsi+rax]
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
mulpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3

View file

@ -0,0 +1,10 @@
and eax, 262080
lea rcx, [rsi+rax]
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15

View file

@ -0,0 +1,6 @@
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128

View file

@ -81,6 +81,8 @@ namespace RandomX {
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t); constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8; constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8; constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
constexpr uint32_t TransformationCount = 90; constexpr uint32_t TransformationCount = 90;
constexpr int RegistersCount = 8; constexpr int RegistersCount = 8;
@ -129,7 +131,7 @@ namespace RandomX {
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&); typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*); typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
extern "C" { extern "C" {
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t); void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);

View file

@ -21,14 +21,6 @@ _RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
PUBLIC executeProgram PUBLIC executeProgram
ALIGN 16
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
executeProgram PROC executeProgram PROC
; REGISTER ALLOCATION: ; REGISTER ALLOCATION:
; rax -> temporary ; rax -> temporary
@ -114,6 +106,17 @@ executeProgram PROC
movapd xmm14, xmmword ptr [absMask] movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask] movapd xmm15, xmmword ptr [signMask]
jmp program_begin
ALIGN 64
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
ALIGN 64
program_begin: program_begin:
xor eax, r8d ;# read address register 1 xor eax, r8d ;# read address register 1
and eax, 262080 and eax, 262080

View file

@ -22,21 +22,21 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Integer //Integer
#define WT_IADD_R 10 #define WT_IADD_R 10
#define WT_IADD_M 3 #define WT_IADD_M 3
#define WT_IADD_RC 12 #define WT_IADD_RC 10
#define WT_ISUB_R 10 #define WT_ISUB_R 10
#define WT_ISUB_M 3 #define WT_ISUB_M 3
#define WT_IMUL_9C 12 #define WT_IMUL_9C 10
#define WT_IMUL_R 24 #define WT_IMUL_R 20
#define WT_IMUL_M 8 #define WT_IMUL_M 6
#define WT_IMULH_R 6 #define WT_IMULH_R 6
#define WT_IMULH_M 2 #define WT_IMULH_M 2
#define WT_ISMULH_R 6 #define WT_ISMULH_R 6
#define WT_ISMULH_M 2 #define WT_ISMULH_M 2
#define WT_IDIV_C 4 #define WT_IDIV_C 4
#define WT_ISDIV_C 2 #define WT_ISDIV_C 4
#define WT_INEG_R 4 #define WT_INEG_R 2
#define WT_IXOR_R 15 #define WT_IXOR_R 12
#define WT_IXOR_M 5 #define WT_IXOR_M 4
#define WT_IROR_R 10 #define WT_IROR_R 10
#define WT_IROL_R 10 #define WT_IROL_R 10
@ -58,10 +58,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define WT_FPSQRT_R 6 #define WT_FPSQRT_R 6
//Control //Control
#define WT_COND_R 15 #define WT_COND_R 12
#define WT_COND_M 5 #define WT_COND_M 4
#define WT_CFROUND 1 #define WT_CFROUND 1
//Store
#define WT_ISTORE 12
#define WT_FSTORE 6
#define WT_NOP 0 #define WT_NOP 0
constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \ constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \
@ -70,7 +74,7 @@ WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \ WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \ WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \ WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_NOP; WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
static_assert(wtSum == 256, static_assert(wtSum == 256,
"Sum of instruction weights must be 256"); "Sum of instruction weights must be 256");
@ -116,3 +120,40 @@ static_assert(wtSum == 256,
#define REPN(x,N) REPNX(x,N) #define REPN(x,N) REPNX(x,N)
#define NUM(x) x #define NUM(x) x
#define WT(x) NUM(WT_##x) #define WT(x) NUM(WT_##x)
#define REPCASE0(x)
#define REPCASE1(x) case __COUNTER__:
#define REPCASE2(x) REPCASE1(x) case __COUNTER__:
#define REPCASE3(x) REPCASE2(x) case __COUNTER__:
#define REPCASE4(x) REPCASE3(x) case __COUNTER__:
#define REPCASE5(x) REPCASE4(x) case __COUNTER__:
#define REPCASE6(x) REPCASE5(x) case __COUNTER__:
#define REPCASE7(x) REPCASE6(x) case __COUNTER__:
#define REPCASE8(x) REPCASE7(x) case __COUNTER__:
#define REPCASE9(x) REPCASE8(x) case __COUNTER__:
#define REPCASE10(x) REPCASE9(x) case __COUNTER__:
#define REPCASE11(x) REPCASE10(x) case __COUNTER__:
#define REPCASE12(x) REPCASE11(x) case __COUNTER__:
#define REPCASE13(x) REPCASE12(x) case __COUNTER__:
#define REPCASE14(x) REPCASE13(x) case __COUNTER__:
#define REPCASE15(x) REPCASE14(x) case __COUNTER__:
#define REPCASE16(x) REPCASE15(x) case __COUNTER__:
#define REPCASE17(x) REPCASE16(x) case __COUNTER__:
#define REPCASE18(x) REPCASE17(x) case __COUNTER__:
#define REPCASE19(x) REPCASE18(x) case __COUNTER__:
#define REPCASE20(x) REPCASE19(x) case __COUNTER__:
#define REPCASE21(x) REPCASE20(x) case __COUNTER__:
#define REPCASE22(x) REPCASE21(x) case __COUNTER__:
#define REPCASE23(x) REPCASE22(x) case __COUNTER__:
#define REPCASE24(x) REPCASE23(x) case __COUNTER__:
#define REPCASE25(x) REPCASE24(x) case __COUNTER__:
#define REPCASE26(x) REPCASE25(x) case __COUNTER__:
#define REPCASE27(x) REPCASE26(x) case __COUNTER__:
#define REPCASE28(x) REPCASE27(x) case __COUNTER__:
#define REPCASE29(x) REPCASE28(x) case __COUNTER__:
#define REPCASE30(x) REPCASE29(x) case __COUNTER__:
#define REPCASE31(x) REPCASE30(x) case __COUNTER__:
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
#define REPCASENX(x,N) REPCASE##N(x)
#define REPCASEN(x,N) REPCASENX(x,N)
#define CASE_REP(x) REPCASEN(x, WT(x))

View file

@ -174,7 +174,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
for (int chain = 0; chain < 16; ++chain) { for (int chain = 0; chain < 16; ++chain) {
vm->initializeProgram(hash); vm->initializeProgram(hash);
int segment = hash[3] & 3; int segment = hash[3] & 3;
vm->setScratchpad(scratchpad);// +segment * RandomX::ScratchpadSize / 4); vm->setScratchpad(scratchpad + segment * RandomX::ScratchpadSize / 4);
vm->execute(); vm->execute();
vm->getResult(nullptr, 0, hash); vm->getResult(nullptr, 0, hash);
} }

File diff suppressed because it is too large Load diff