mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Added explicit STORE instructions
JIT compiler
This commit is contained in:
parent
d2cb086221
commit
005c67f64c
27 changed files with 1751 additions and 1518 deletions
|
@ -75,6 +75,11 @@ namespace RandomX {
|
|||
asmCode << "\tand " << reg << ", " << ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
||||
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl;
|
||||
asmCode << "\tand eax" << ", " << ((instr.alt % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
|
||||
}
|
||||
|
||||
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
|
||||
return instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
@ -425,7 +430,7 @@ namespace RandomX {
|
|||
|
||||
//6 uOPs
|
||||
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
|
||||
int rotate = (13 - (instr.alt & 63)) & 63;
|
||||
if (rotate != 0)
|
||||
asmCode << "\trol rax, " << rotate << std::endl;
|
||||
|
@ -474,6 +479,18 @@ namespace RandomX {
|
|||
asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl;
|
||||
}
|
||||
|
||||
//3 uOPs
|
||||
void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr);
|
||||
asmCode << "\tmov qword ptr [rsi+rax], " << regR[instr.src] << std::endl;
|
||||
}
|
||||
|
||||
//3 uOPs
|
||||
void AssemblyGeneratorX86::h_FSTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr, 16);
|
||||
asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl;
|
||||
}
|
||||
|
||||
#include "instructionWeights.hpp"
|
||||
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
||||
|
||||
|
@ -520,5 +537,8 @@ namespace RandomX {
|
|||
INST_HANDLE(COND_R)
|
||||
INST_HANDLE(COND_M)
|
||||
INST_HANDLE(CFROUND)
|
||||
|
||||
INST_HANDLE(ISTORE)
|
||||
INST_HANDLE(FSTORE)
|
||||
};
|
||||
}
|
|
@ -38,16 +38,8 @@ namespace RandomX {
|
|||
static InstructionGenerator engine[256];
|
||||
std::stringstream asmCode;
|
||||
|
||||
void gena(Instruction&, int);
|
||||
void genar(Instruction&, int);
|
||||
void genaf(Instruction&, int);
|
||||
void genbiashift(Instruction&, const char*);
|
||||
void genbia(Instruction&);
|
||||
void genbia32(Instruction&);
|
||||
void genbf(Instruction&, const char*);
|
||||
void gencr(Instruction&, bool);
|
||||
void gencf(Instruction&, bool);
|
||||
void genAddressReg(Instruction&, const char*);
|
||||
void genAddressRegDst(Instruction&, int);
|
||||
int32_t genAddressImm(Instruction&);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
|
@ -85,5 +77,7 @@ namespace RandomX {
|
|||
void h_COND_R(Instruction&, int);
|
||||
void h_COND_M(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_FSTORE(Instruction&, int);
|
||||
};
|
||||
}
|
|
@ -71,14 +71,14 @@ namespace RandomX {
|
|||
reg.a[i].hi.u64 = getSmallPositiveFloatBits(reg.f[i].hi.u64);
|
||||
}
|
||||
compiler.generateProgram(gen);
|
||||
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
|
||||
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & -64;
|
||||
mem.mx = *(((uint32_t*)seed) + 5);
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::execute() {
|
||||
executeProgram(reg, mem, scratchpad, InstructionCount);
|
||||
//executeProgram(reg, mem, scratchpad, InstructionCount);
|
||||
totalSize += compiler.getCodeSize();
|
||||
//compiler.getProgramFunc()(reg, mem, scratchpad);
|
||||
compiler.getProgramFunc()(reg, mem, scratchpad, InstructionCount);
|
||||
#ifdef TRACEVM
|
||||
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
|
||||
std::cout << std::hex << tracepad[i].u64 << std::endl;
|
||||
|
|
|
@ -32,6 +32,10 @@ namespace RandomX {
|
|||
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
|
||||
}
|
||||
|
||||
void Instruction::genAddressRegDst(std::ostream& os) const {
|
||||
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)dst << "]";
|
||||
}
|
||||
|
||||
void Instruction::genAddressImm(std::ostream& os) const {
|
||||
os << ((alt % 4) ? "L1" : "L2") << "[" << (imm32 & ((alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]";
|
||||
}
|
||||
|
@ -276,7 +280,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void Instruction::h_CFROUND(std::ostream& os) const {
|
||||
os << "r" << (int)dst << ", " << (alt & 63) << std::endl;
|
||||
os << "r" << (int)src << ", " << (alt & 63) << std::endl;
|
||||
}
|
||||
|
||||
static inline const char* condition(int index) {
|
||||
|
@ -311,6 +315,18 @@ namespace RandomX {
|
|||
os << ", " << imm32 << ")" << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISTORE(std::ostream& os) const {
|
||||
genAddressRegDst(os);
|
||||
os << ", r" << (int)src << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSTORE(std::ostream& os) const {
|
||||
const char reg = (src >= 4) ? 'e' : 'f';
|
||||
genAddressRegDst(os);
|
||||
auto srcIndex = src % 4;
|
||||
os << ", " << reg << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
#include "instructionWeights.hpp"
|
||||
#define INST_NAME(x) REPN(#x, WT(x))
|
||||
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
|
||||
|
@ -358,6 +374,9 @@ namespace RandomX {
|
|||
INST_NAME(COND_R)
|
||||
INST_NAME(COND_M)
|
||||
INST_NAME(CFROUND)
|
||||
|
||||
INST_NAME(ISTORE)
|
||||
INST_NAME(FSTORE)
|
||||
};
|
||||
|
||||
InstructionVisualizer Instruction::engine[256] = {
|
||||
|
@ -403,6 +422,9 @@ namespace RandomX {
|
|||
INST_HANDLE(COND_R)
|
||||
INST_HANDLE(COND_M)
|
||||
INST_HANDLE(CFROUND)
|
||||
|
||||
INST_HANDLE(ISTORE)
|
||||
INST_HANDLE(FSTORE)
|
||||
};
|
||||
|
||||
}
|
|
@ -49,6 +49,7 @@ namespace RandomX {
|
|||
|
||||
void genAddressReg(std::ostream& os) const;
|
||||
void genAddressImm(std::ostream& os) const;
|
||||
void genAddressRegDst(std::ostream&) const;
|
||||
|
||||
void h_IADD_R(std::ostream&) const;
|
||||
void h_IADD_M(std::ostream&) const;
|
||||
|
@ -83,6 +84,8 @@ namespace RandomX {
|
|||
void h_COND_R(std::ostream&) const;
|
||||
void h_COND_M(std::ostream&) const;
|
||||
void h_CFROUND(std::ostream&) const;
|
||||
void h_ISTORE(std::ostream&) const;
|
||||
void h_FSTORE(std::ostream&) const;
|
||||
};
|
||||
|
||||
static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction");
|
||||
|
|
|
@ -27,11 +27,16 @@
|
|||
#define DECL(x) x
|
||||
#endif
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_begin)
|
||||
.global DECL(randomx_loop_begin)
|
||||
.global DECL(randomx_program_load_int)
|
||||
.global DECL(randomx_program_load_flt)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_store_int)
|
||||
.global DECL(randomx_program_store_flt)
|
||||
.global DECL(randomx_program_loop_end)
|
||||
.global DECL(randomx_program_epilogue)
|
||||
.global DECL(randomx_program_read)
|
||||
.global DECL(randomx_program_end)
|
||||
.global DECL(randomx_program_transform)
|
||||
|
||||
#define db .byte
|
||||
|
||||
|
@ -40,21 +45,37 @@ DECL(randomx_program_prologue):
|
|||
#include "asm/program_prologue_linux.inc"
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_begin):
|
||||
#include "asm/program_xmm_constants.inc"
|
||||
|
||||
.align 64
|
||||
DECL(randomx_loop_begin):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_load_int):
|
||||
#include "asm/program_load_int.inc"
|
||||
|
||||
DECL(randomx_program_load_flt):
|
||||
#include "asm/program_load_flt.inc"
|
||||
|
||||
DECL(randomx_program_start):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_store_int):
|
||||
#include "asm/program_store_int.inc"
|
||||
|
||||
DECL(randomx_program_store_flt):
|
||||
#include "asm/program_store_flt.inc"
|
||||
|
||||
DECL(randomx_program_loop_end):
|
||||
nop
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_epilogue):
|
||||
#include "asm/program_epilogue_linux.inc"
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_read):
|
||||
#include "asm/program_read.inc"
|
||||
|
||||
.align 64
|
||||
DECL(randomx_program_end):
|
||||
nop
|
||||
|
||||
.align 8
|
||||
DECL(randomx_program_transform):
|
||||
#include "asm/program_transform_address.inc"
|
||||
|
|
|
@ -20,12 +20,16 @@ IFDEF RAX
|
|||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_begin
|
||||
PUBLIC randomx_loop_begin
|
||||
PUBLIC randomx_program_load_int
|
||||
PUBLIC randomx_program_load_flt
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_store_int
|
||||
PUBLIC randomx_program_store_flt
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_program_read
|
||||
PUBLIC randomx_program_end
|
||||
PUBLIC randomx_program_transform
|
||||
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_prologue PROC
|
||||
|
@ -33,30 +37,51 @@ randomx_program_prologue PROC
|
|||
randomx_program_prologue ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_begin PROC
|
||||
include asm/program_xmm_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_loop_begin PROC
|
||||
nop
|
||||
randomx_program_begin ENDP
|
||||
randomx_loop_begin ENDP
|
||||
|
||||
randomx_program_load_int PROC
|
||||
include asm/program_load_int.inc
|
||||
randomx_program_load_int ENDP
|
||||
|
||||
randomx_program_load_flt PROC
|
||||
include asm/program_load_flt.inc
|
||||
randomx_program_load_flt ENDP
|
||||
|
||||
randomx_program_start PROC
|
||||
nop
|
||||
randomx_program_start ENDP
|
||||
|
||||
randomx_program_read_dataset PROC
|
||||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_store_int PROC
|
||||
include asm/program_store_int.inc
|
||||
randomx_program_store_int ENDP
|
||||
|
||||
randomx_program_store_flt PROC
|
||||
include asm/program_store_flt.inc
|
||||
randomx_program_store_flt ENDP
|
||||
|
||||
randomx_program_loop_end PROC
|
||||
nop
|
||||
randomx_program_loop_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_epilogue PROC
|
||||
include asm/program_epilogue_win64.inc
|
||||
randomx_program_epilogue ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_read PROC
|
||||
include asm/program_read.inc
|
||||
randomx_program_read ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_end PROC
|
||||
nop
|
||||
randomx_program_end ENDP
|
||||
|
||||
ALIGN 8
|
||||
randomx_program_transform PROC
|
||||
include asm/program_transform_address.inc
|
||||
randomx_program_transform ENDP
|
||||
|
||||
_RANDOMX_JITX86_STATIC ENDS
|
||||
|
||||
ENDIF
|
||||
|
|
|
@ -18,10 +18,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
*/
|
||||
|
||||
extern "C" {
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_begin();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_program_transform();
|
||||
void randomx_program_read();
|
||||
void randomx_program_end();
|
||||
void randomx_program_prologue();
|
||||
void randomx_loop_begin();
|
||||
void randomx_program_load_int();
|
||||
void randomx_program_load_flt();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_store_int();
|
||||
void randomx_program_store_flt();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_program_end();
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -30,16 +30,10 @@ namespace RandomX {
|
|||
|
||||
class JitCompilerX86;
|
||||
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&);
|
||||
|
||||
constexpr uint32_t CodeSize = 64 * 1024;
|
||||
|
||||
struct CallOffset {
|
||||
CallOffset(int32_t p, int32_t i) : pos(p), index(i) {}
|
||||
int32_t pos;
|
||||
int32_t index;
|
||||
};
|
||||
|
||||
class JitCompilerX86 {
|
||||
public:
|
||||
JitCompilerX86();
|
||||
|
@ -55,66 +49,82 @@ namespace RandomX {
|
|||
static InstructionGeneratorX86 engine[256];
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
std::vector<int32_t> instructionOffsets;
|
||||
std::vector<CallOffset> callOffsets;
|
||||
|
||||
void gena(Instruction&);
|
||||
void genar(Instruction&);
|
||||
void genaf(Instruction&);
|
||||
void genbiashift(Instruction&, uint16_t, uint16_t);
|
||||
void genbia(Instruction&, uint16_t, uint16_t);
|
||||
void genbia32(Instruction&, uint16_t, uint8_t);
|
||||
void genbf(Instruction&, uint8_t);
|
||||
void scratchpadStoreR(Instruction&, uint32_t, bool);
|
||||
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
|
||||
void gencr(Instruction&, bool);
|
||||
void gencf(Instruction&);
|
||||
void generateCode(Instruction&, int);
|
||||
void fixCallOffsets();
|
||||
void genAddressReg(Instruction&, bool);
|
||||
void genAddressRegDst(Instruction&, bool);
|
||||
void genAddressImm(Instruction&);
|
||||
void genSIB(int scale, int index, int base);
|
||||
|
||||
void generateCode(Instruction&);
|
||||
|
||||
void emitByte(uint8_t val) {
|
||||
code[codePos] = val;
|
||||
codePos++;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void emit(T val) {
|
||||
*reinterpret_cast<T*>(code + codePos) = val;
|
||||
codePos += sizeof(T);
|
||||
void emit32(uint32_t val) {
|
||||
code[codePos + 0] = val;
|
||||
code[codePos + 1] = val >> 8;
|
||||
code[codePos + 2] = val >> 16;
|
||||
code[codePos + 3] = val >> 24;
|
||||
codePos += 4;
|
||||
}
|
||||
|
||||
void h_ADD_64(Instruction&, int);
|
||||
void h_ADD_32(Instruction&, int);
|
||||
void h_SUB_64(Instruction&, int);
|
||||
void h_SUB_32(Instruction&, int);
|
||||
void h_MUL_64(Instruction&, int);
|
||||
void h_MULH_64(Instruction&, int);
|
||||
void h_MUL_32(Instruction&, int);
|
||||
void h_IMUL_32(Instruction&, int);
|
||||
void h_IMULH_64(Instruction&, int);
|
||||
void h_DIV_64(Instruction&, int);
|
||||
void h_IDIV_64(Instruction&, int);
|
||||
void h_AND_64(Instruction&, int);
|
||||
void h_AND_32(Instruction&, int);
|
||||
void h_OR_64(Instruction&, int);
|
||||
void h_OR_32(Instruction&, int);
|
||||
void h_XOR_64(Instruction&, int);
|
||||
void h_XOR_32(Instruction&, int);
|
||||
void h_SHL_64(Instruction&, int);
|
||||
void h_SHR_64(Instruction&, int);
|
||||
void h_SAR_64(Instruction&, int);
|
||||
void h_ROL_64(Instruction&, int);
|
||||
void h_ROR_64(Instruction&, int);
|
||||
void h_FPADD(Instruction&, int);
|
||||
void h_FPSUB(Instruction&, int);
|
||||
void h_FPMUL(Instruction&, int);
|
||||
void h_FPDIV(Instruction&, int);
|
||||
void h_FPSQRT(Instruction&, int);
|
||||
void h_FPROUND(Instruction&, int);
|
||||
void h_JUMP(Instruction&, int);
|
||||
void h_CALL(Instruction&, int);
|
||||
void h_RET(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
void emit64(uint64_t val) {
|
||||
code[codePos + 0] = val;
|
||||
code[codePos + 1] = val >> 8;
|
||||
code[codePos + 2] = val >> 16;
|
||||
code[codePos + 3] = val >> 24;
|
||||
code[codePos + 4] = val >> 32;
|
||||
code[codePos + 5] = val >> 40;
|
||||
code[codePos + 6] = val >> 48;
|
||||
code[codePos + 7] = val >> 56;
|
||||
codePos += 8;
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void emit(const uint8_t (&src)[N]) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
code[codePos + i] = src[i];
|
||||
}
|
||||
codePos += N;
|
||||
}
|
||||
|
||||
void h_IADD_R(Instruction&);
|
||||
void h_IADD_M(Instruction&);
|
||||
void h_IADD_RC(Instruction&);
|
||||
void h_ISUB_R(Instruction&);
|
||||
void h_ISUB_M(Instruction&);
|
||||
void h_IMUL_9C(Instruction&);
|
||||
void h_IMUL_R(Instruction&);
|
||||
void h_IMUL_M(Instruction&);
|
||||
void h_IMULH_R(Instruction&);
|
||||
void h_IMULH_M(Instruction&);
|
||||
void h_ISMULH_R(Instruction&);
|
||||
void h_ISMULH_M(Instruction&);
|
||||
void h_IDIV_C(Instruction&);
|
||||
void h_ISDIV_C(Instruction&);
|
||||
void h_INEG_R(Instruction&);
|
||||
void h_IXOR_R(Instruction&);
|
||||
void h_IXOR_M(Instruction&);
|
||||
void h_IROR_R(Instruction&);
|
||||
void h_IROL_R(Instruction&);
|
||||
void h_FPSWAP_R(Instruction&);
|
||||
void h_FPADD_R(Instruction&);
|
||||
void h_FPADD_M(Instruction&);
|
||||
void h_FPSUB_R(Instruction&);
|
||||
void h_FPSUB_M(Instruction&);
|
||||
void h_FPNEG_R(Instruction&);
|
||||
void h_FPMUL_R(Instruction&);
|
||||
void h_FPMUL_M(Instruction&);
|
||||
void h_FPDIV_R(Instruction&);
|
||||
void h_FPDIV_M(Instruction&);
|
||||
void h_FPSQRT_R(Instruction&);
|
||||
void h_COND_R(Instruction&);
|
||||
void h_COND_M(Instruction&);
|
||||
void h_CFROUND(Instruction&);
|
||||
void h_ISTORE(Instruction&);
|
||||
void h_FSTORE(Instruction&);
|
||||
};
|
||||
|
||||
}
|
|
@ -1,9 +1,5 @@
|
|||
;# unroll VM stack
|
||||
mov rsp, rdi
|
||||
|
||||
;# save VM register values
|
||||
pop rcx
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
|
@ -12,12 +8,12 @@
|
|||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
movapd xmmword ptr [rcx+64], xmm8
|
||||
movapd xmmword ptr [rcx+80], xmm9
|
||||
movapd xmmword ptr [rcx+96], xmm2
|
||||
movapd xmmword ptr [rcx+112], xmm3
|
||||
movdqa xmmword ptr [rcx+64], xmm0
|
||||
movdqa xmmword ptr [rcx+80], xmm1
|
||||
movdqa xmmword ptr [rcx+96], xmm2
|
||||
movdqa xmmword ptr [rcx+112], xmm3
|
||||
lea rcx, [rcx+64]
|
||||
movapd xmmword ptr [rcx+64], xmm4
|
||||
movapd xmmword ptr [rcx+80], xmm5
|
||||
movapd xmmword ptr [rcx+96], xmm6
|
||||
movapd xmmword ptr [rcx+112], xmm7
|
||||
movdqa xmmword ptr [rcx+64], xmm4
|
||||
movdqa xmmword ptr [rcx+80], xmm5
|
||||
movdqa xmmword ptr [rcx+96], xmm6
|
||||
movdqa xmmword ptr [rcx+112], xmm7
|
|
@ -1,6 +1,12 @@
|
|||
include program_epilogue_store.inc
|
||||
|
||||
;# restore callee-saved registers - Microsoft x64 calling convention
|
||||
movdqu xmm15, xmmword ptr [rsp]
|
||||
movdqu xmm14, xmmword ptr [rsp+16]
|
||||
movdqu xmm13, xmmword ptr [rsp+32]
|
||||
movdqu xmm12, xmmword ptr [rsp+48]
|
||||
movdqu xmm11, xmmword ptr [rsp+64]
|
||||
add rsp, 80
|
||||
movdqu xmm10, xmmword ptr [rsp]
|
||||
movdqu xmm9, xmmword ptr [rsp+16]
|
||||
movdqu xmm8, xmmword ptr [rsp+32]
|
||||
|
@ -17,4 +23,4 @@
|
|||
pop rbx
|
||||
|
||||
;# program finished
|
||||
ret 0
|
||||
ret
|
||||
|
|
14
src/asm/program_load_flt.inc
Normal file
14
src/asm/program_load_flt.inc
Normal file
|
@ -0,0 +1,14 @@
|
|||
and eax, 262080
|
||||
lea rcx, [rsi+rax]
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||
andps xmm4, xmm14
|
||||
andps xmm5, xmm14
|
||||
andps xmm6, xmm14
|
||||
andps xmm7, xmm14
|
10
src/asm/program_load_int.inc
Normal file
10
src/asm/program_load_int.inc
Normal file
|
@ -0,0 +1,10 @@
|
|||
and eax, 262080
|
||||
lea rcx, [rsi+rax]
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
|
@ -7,13 +7,14 @@
|
|||
push r15
|
||||
|
||||
;# function arguments
|
||||
mov rbx, rcx ;# loop counter
|
||||
push rdi ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||
mov rax, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, rdx ;# convertible_t* scratchpad
|
||||
mov rcx, rdi
|
||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||
mov eax, ebp ;# "mx"
|
||||
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||
mov rsi, rdx ;# convertible_t* scratchpad
|
||||
|
||||
#include "program_prologue_load.inc"
|
||||
|
||||
jmp randomx_program_begin
|
||||
jmp DECL(randomx_loop_begin)
|
|
@ -1,27 +1,20 @@
|
|||
mov rdi, rsp ;# beginning of VM stack
|
||||
mov ebx, 262145 ;# number of VM instructions to execute + 1
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
xorps xmm10, xmm10
|
||||
cmpeqpd xmm10, xmm10
|
||||
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
||||
movapd xmm13, xmmword ptr [minDbl]
|
||||
movapd xmm14, xmmword ptr [absMask]
|
||||
movapd xmm15, xmmword ptr [signMask]
|
||||
|
||||
;# load integer registers
|
||||
mov r8, qword ptr [rcx+0]
|
||||
mov r9, qword ptr [rcx+8]
|
||||
mov r10, qword ptr [rcx+16]
|
||||
mov r11, qword ptr [rcx+24]
|
||||
mov r12, qword ptr [rcx+32]
|
||||
mov r13, qword ptr [rcx+40]
|
||||
mov r14, qword ptr [rcx+48]
|
||||
mov r15, qword ptr [rcx+56]
|
||||
|
||||
;# load floating point registers
|
||||
movapd xmm8, xmmword ptr [rcx+64]
|
||||
movapd xmm9, xmmword ptr [rcx+80]
|
||||
movapd xmm2, xmmword ptr [rcx+96]
|
||||
movapd xmm3, xmmword ptr [rcx+112]
|
||||
lea rcx, [rcx+64]
|
||||
movapd xmm4, xmmword ptr [rcx+64]
|
||||
movapd xmm5, xmmword ptr [rcx+80]
|
||||
movapd xmm6, xmmword ptr [rcx+96]
|
||||
movapd xmm7, xmmword ptr [rcx+112]
|
||||
|
|
|
@ -13,14 +13,21 @@
|
|||
movdqu xmmword ptr [rsp+32], xmm8
|
||||
movdqu xmmword ptr [rsp+16], xmm9
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm11
|
||||
movdqu xmmword ptr [rsp+48], xmm12
|
||||
movdqu xmmword ptr [rsp+32], xmm13
|
||||
movdqu xmmword ptr [rsp+16], xmm14
|
||||
movdqu xmmword ptr [rsp+0], xmm15
|
||||
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rax, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
push rax
|
||||
mov rsi, r8 ;# convertible_t* scratchpad
|
||||
; function arguments
|
||||
push rcx ; RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
||||
mov eax, ebp ; "mx"
|
||||
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
||||
mov rsi, r8 ; convertible_t* scratchpad
|
||||
mov rbx, r9 ; loop counter
|
||||
|
||||
include program_prologue_load.inc
|
||||
|
||||
jmp randomx_program_begin
|
||||
jmp randomx_loop_begin
|
|
@ -1,20 +0,0 @@
|
|||
db 0, 0, 0, 0 ;# TransformAddress placeholder
|
||||
mov rcx, qword ptr [rdi] ;# load the dataset address
|
||||
xor rbp, rax ;# modify "mx"
|
||||
;# prefetch cacheline "mx"
|
||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, ebp ;# edx = mx
|
||||
prefetchnta byte ptr [rcx+rdx]
|
||||
;# read cacheline "ma"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
lea rcx, [rcx+rdx] ;# dataset cache line
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
ret
|
16
src/asm/program_read_dataset.inc
Normal file
16
src/asm/program_read_dataset.inc
Normal file
|
@ -0,0 +1,16 @@
|
|||
xor rbp, rax ;# modify "mx"
|
||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||
mov edx, ebp ;# edx = mx
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
lea rcx, [rdi+rdx] ;# dataset cache line
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
|
11
src/asm/program_store_flt.inc
Normal file
11
src/asm/program_store_flt.inc
Normal file
|
@ -0,0 +1,11 @@
|
|||
and eax, 262080
|
||||
lea rcx, [rsi+rax]
|
||||
mulpd xmm0, xmm4
|
||||
mulpd xmm1, xmm5
|
||||
mulpd xmm2, xmm6
|
||||
mulpd xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
|
10
src/asm/program_store_int.inc
Normal file
10
src/asm/program_store_int.inc
Normal file
|
@ -0,0 +1,10 @@
|
|||
and eax, 262080
|
||||
lea rcx, [rsi+rax]
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
6
src/asm/program_xmm_constants.inc
Normal file
6
src/asm/program_xmm_constants.inc
Normal file
|
@ -0,0 +1,6 @@
|
|||
minDbl:
|
||||
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
|
||||
absMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||
signMask:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
|
@ -81,6 +81,8 @@ namespace RandomX {
|
|||
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
|
||||
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
|
||||
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
|
||||
constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
|
||||
constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
|
||||
constexpr uint32_t TransformationCount = 90;
|
||||
constexpr int RegistersCount = 8;
|
||||
|
||||
|
@ -129,7 +131,7 @@ namespace RandomX {
|
|||
|
||||
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
|
||||
|
||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*);
|
||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
||||
|
||||
extern "C" {
|
||||
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
||||
|
|
|
@ -21,14 +21,6 @@ _RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
|||
|
||||
PUBLIC executeProgram
|
||||
|
||||
ALIGN 16
|
||||
minDbl:
|
||||
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
|
||||
absMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||
signMask:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
||||
|
||||
executeProgram PROC
|
||||
; REGISTER ALLOCATION:
|
||||
; rax -> temporary
|
||||
|
@ -114,6 +106,17 @@ executeProgram PROC
|
|||
movapd xmm14, xmmword ptr [absMask]
|
||||
movapd xmm15, xmmword ptr [signMask]
|
||||
|
||||
jmp program_begin
|
||||
|
||||
ALIGN 64
|
||||
minDbl:
|
||||
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
|
||||
absMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||
signMask:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
||||
|
||||
ALIGN 64
|
||||
program_begin:
|
||||
xor eax, r8d ;# read address register 1
|
||||
and eax, 262080
|
||||
|
@ -144,7 +147,7 @@ program_begin:
|
|||
|
||||
;# 256 instructions
|
||||
include program.inc
|
||||
|
||||
|
||||
mov eax, r8d ;# read address register 1
|
||||
xor eax, r9d ;# read address register 2
|
||||
xor rbp, rax ;# modify "mx"
|
||||
|
|
|
@ -22,21 +22,21 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
//Integer
|
||||
#define WT_IADD_R 10
|
||||
#define WT_IADD_M 3
|
||||
#define WT_IADD_RC 12
|
||||
#define WT_IADD_RC 10
|
||||
#define WT_ISUB_R 10
|
||||
#define WT_ISUB_M 3
|
||||
#define WT_IMUL_9C 12
|
||||
#define WT_IMUL_R 24
|
||||
#define WT_IMUL_M 8
|
||||
#define WT_IMUL_9C 10
|
||||
#define WT_IMUL_R 20
|
||||
#define WT_IMUL_M 6
|
||||
#define WT_IMULH_R 6
|
||||
#define WT_IMULH_M 2
|
||||
#define WT_ISMULH_R 6
|
||||
#define WT_ISMULH_M 2
|
||||
#define WT_IDIV_C 4
|
||||
#define WT_ISDIV_C 2
|
||||
#define WT_INEG_R 4
|
||||
#define WT_IXOR_R 15
|
||||
#define WT_IXOR_M 5
|
||||
#define WT_ISDIV_C 4
|
||||
#define WT_INEG_R 2
|
||||
#define WT_IXOR_R 12
|
||||
#define WT_IXOR_M 4
|
||||
#define WT_IROR_R 10
|
||||
#define WT_IROL_R 10
|
||||
|
||||
|
@ -58,10 +58,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#define WT_FPSQRT_R 6
|
||||
|
||||
//Control
|
||||
#define WT_COND_R 15
|
||||
#define WT_COND_M 5
|
||||
#define WT_COND_R 12
|
||||
#define WT_COND_M 4
|
||||
#define WT_CFROUND 1
|
||||
|
||||
//Store
|
||||
#define WT_ISTORE 12
|
||||
#define WT_FSTORE 6
|
||||
|
||||
#define WT_NOP 0
|
||||
|
||||
constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \
|
||||
|
@ -70,7 +74,7 @@ WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
|||
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
||||
WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
|
||||
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
|
||||
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_NOP;
|
||||
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
||||
|
||||
static_assert(wtSum == 256,
|
||||
"Sum of instruction weights must be 256");
|
||||
|
@ -116,3 +120,40 @@ static_assert(wtSum == 256,
|
|||
#define REPN(x,N) REPNX(x,N)
|
||||
#define NUM(x) x
|
||||
#define WT(x) NUM(WT_##x)
|
||||
|
||||
#define REPCASE0(x)
|
||||
#define REPCASE1(x) case __COUNTER__:
|
||||
#define REPCASE2(x) REPCASE1(x) case __COUNTER__:
|
||||
#define REPCASE3(x) REPCASE2(x) case __COUNTER__:
|
||||
#define REPCASE4(x) REPCASE3(x) case __COUNTER__:
|
||||
#define REPCASE5(x) REPCASE4(x) case __COUNTER__:
|
||||
#define REPCASE6(x) REPCASE5(x) case __COUNTER__:
|
||||
#define REPCASE7(x) REPCASE6(x) case __COUNTER__:
|
||||
#define REPCASE8(x) REPCASE7(x) case __COUNTER__:
|
||||
#define REPCASE9(x) REPCASE8(x) case __COUNTER__:
|
||||
#define REPCASE10(x) REPCASE9(x) case __COUNTER__:
|
||||
#define REPCASE11(x) REPCASE10(x) case __COUNTER__:
|
||||
#define REPCASE12(x) REPCASE11(x) case __COUNTER__:
|
||||
#define REPCASE13(x) REPCASE12(x) case __COUNTER__:
|
||||
#define REPCASE14(x) REPCASE13(x) case __COUNTER__:
|
||||
#define REPCASE15(x) REPCASE14(x) case __COUNTER__:
|
||||
#define REPCASE16(x) REPCASE15(x) case __COUNTER__:
|
||||
#define REPCASE17(x) REPCASE16(x) case __COUNTER__:
|
||||
#define REPCASE18(x) REPCASE17(x) case __COUNTER__:
|
||||
#define REPCASE19(x) REPCASE18(x) case __COUNTER__:
|
||||
#define REPCASE20(x) REPCASE19(x) case __COUNTER__:
|
||||
#define REPCASE21(x) REPCASE20(x) case __COUNTER__:
|
||||
#define REPCASE22(x) REPCASE21(x) case __COUNTER__:
|
||||
#define REPCASE23(x) REPCASE22(x) case __COUNTER__:
|
||||
#define REPCASE24(x) REPCASE23(x) case __COUNTER__:
|
||||
#define REPCASE25(x) REPCASE24(x) case __COUNTER__:
|
||||
#define REPCASE26(x) REPCASE25(x) case __COUNTER__:
|
||||
#define REPCASE27(x) REPCASE26(x) case __COUNTER__:
|
||||
#define REPCASE28(x) REPCASE27(x) case __COUNTER__:
|
||||
#define REPCASE29(x) REPCASE28(x) case __COUNTER__:
|
||||
#define REPCASE30(x) REPCASE29(x) case __COUNTER__:
|
||||
#define REPCASE31(x) REPCASE30(x) case __COUNTER__:
|
||||
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
|
||||
#define REPCASENX(x,N) REPCASE##N(x)
|
||||
#define REPCASEN(x,N) REPCASENX(x,N)
|
||||
#define CASE_REP(x) REPCASEN(x, WT(x))
|
|
@ -174,7 +174,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
|||
for (int chain = 0; chain < 16; ++chain) {
|
||||
vm->initializeProgram(hash);
|
||||
int segment = hash[3] & 3;
|
||||
vm->setScratchpad(scratchpad);// +segment * RandomX::ScratchpadSize / 4);
|
||||
vm->setScratchpad(scratchpad + segment * RandomX::ScratchpadSize / 4);
|
||||
vm->execute();
|
||||
vm->getResult(nullptr, 0, hash);
|
||||
}
|
||||
|
|
1460
src/program.inc
1460
src/program.inc
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue