Added explicit STORE instructions

JIT compiler
This commit is contained in:
tevador 2019-01-27 10:52:30 +01:00
parent d2cb086221
commit 005c67f64c
27 changed files with 1751 additions and 1518 deletions

View file

@ -75,6 +75,11 @@ namespace RandomX {
asmCode << "\tand " << reg << ", " << ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
}
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl;
asmCode << "\tand eax" << ", " << ((instr.alt % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
}
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
return instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
}
@ -425,7 +430,7 @@ namespace RandomX {
//6 uOPs
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
int rotate = (13 - (instr.alt & 63)) & 63;
if (rotate != 0)
asmCode << "\trol rax, " << rotate << std::endl;
@ -474,6 +479,18 @@ namespace RandomX {
asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl;
}
//3 uOPs
void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
asmCode << "\tmov qword ptr [rsi+rax], " << regR[instr.src] << std::endl;
}
//3 uOPs
void AssemblyGeneratorX86::h_FSTORE(Instruction& instr, int i) {
genAddressRegDst(instr, 16);
asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl;
}
#include "instructionWeights.hpp"
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
@ -520,5 +537,8 @@ namespace RandomX {
INST_HANDLE(COND_R)
INST_HANDLE(COND_M)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(FSTORE)
};
}

View file

@ -38,16 +38,8 @@ namespace RandomX {
static InstructionGenerator engine[256];
std::stringstream asmCode;
void gena(Instruction&, int);
void genar(Instruction&, int);
void genaf(Instruction&, int);
void genbiashift(Instruction&, const char*);
void genbia(Instruction&);
void genbia32(Instruction&);
void genbf(Instruction&, const char*);
void gencr(Instruction&, bool);
void gencf(Instruction&, bool);
void genAddressReg(Instruction&, const char*);
void genAddressRegDst(Instruction&, int);
int32_t genAddressImm(Instruction&);
void generateCode(Instruction&, int);
@ -85,5 +77,7 @@ namespace RandomX {
void h_COND_R(Instruction&, int);
void h_COND_M(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_FSTORE(Instruction&, int);
};
}

View file

@ -71,14 +71,14 @@ namespace RandomX {
reg.a[i].hi.u64 = getSmallPositiveFloatBits(reg.f[i].hi.u64);
}
compiler.generateProgram(gen);
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & -64;
mem.mx = *(((uint32_t*)seed) + 5);
}
void CompiledVirtualMachine::execute() {
executeProgram(reg, mem, scratchpad, InstructionCount);
//executeProgram(reg, mem, scratchpad, InstructionCount);
totalSize += compiler.getCodeSize();
//compiler.getProgramFunc()(reg, mem, scratchpad);
compiler.getProgramFunc()(reg, mem, scratchpad, InstructionCount);
#ifdef TRACEVM
for (int32_t i = InstructionCount - 1; i >= 0; --i) {
std::cout << std::hex << tracepad[i].u64 << std::endl;

View file

@ -32,6 +32,10 @@ namespace RandomX {
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
}
void Instruction::genAddressRegDst(std::ostream& os) const {
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)dst << "]";
}
void Instruction::genAddressImm(std::ostream& os) const {
os << ((alt % 4) ? "L1" : "L2") << "[" << (imm32 & ((alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]";
}
@ -276,7 +280,7 @@ namespace RandomX {
}
void Instruction::h_CFROUND(std::ostream& os) const {
os << "r" << (int)dst << ", " << (alt & 63) << std::endl;
os << "r" << (int)src << ", " << (alt & 63) << std::endl;
}
static inline const char* condition(int index) {
@ -311,6 +315,18 @@ namespace RandomX {
os << ", " << imm32 << ")" << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
genAddressRegDst(os);
os << ", r" << (int)src << std::endl;
}
void Instruction::h_FSTORE(std::ostream& os) const {
const char reg = (src >= 4) ? 'e' : 'f';
genAddressRegDst(os);
auto srcIndex = src % 4;
os << ", " << reg << srcIndex << std::endl;
}
#include "instructionWeights.hpp"
#define INST_NAME(x) REPN(#x, WT(x))
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
@ -358,6 +374,9 @@ namespace RandomX {
INST_NAME(COND_R)
INST_NAME(COND_M)
INST_NAME(CFROUND)
INST_NAME(ISTORE)
INST_NAME(FSTORE)
};
InstructionVisualizer Instruction::engine[256] = {
@ -403,6 +422,9 @@ namespace RandomX {
INST_HANDLE(COND_R)
INST_HANDLE(COND_M)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(FSTORE)
};
}

View file

@ -49,6 +49,7 @@ namespace RandomX {
void genAddressReg(std::ostream& os) const;
void genAddressImm(std::ostream& os) const;
void genAddressRegDst(std::ostream&) const;
void h_IADD_R(std::ostream&) const;
void h_IADD_M(std::ostream&) const;
@ -83,6 +84,8 @@ namespace RandomX {
void h_COND_R(std::ostream&) const;
void h_COND_M(std::ostream&) const;
void h_CFROUND(std::ostream&) const;
void h_ISTORE(std::ostream&) const;
void h_FSTORE(std::ostream&) const;
};
static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction");

View file

@ -27,11 +27,16 @@
#define DECL(x) x
#endif
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_begin)
.global DECL(randomx_loop_begin)
.global DECL(randomx_program_load_int)
.global DECL(randomx_program_load_flt)
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_store_int)
.global DECL(randomx_program_store_flt)
.global DECL(randomx_program_loop_end)
.global DECL(randomx_program_epilogue)
.global DECL(randomx_program_read)
.global DECL(randomx_program_end)
.global DECL(randomx_program_transform)
#define db .byte
@ -40,21 +45,37 @@ DECL(randomx_program_prologue):
#include "asm/program_prologue_linux.inc"
.align 64
DECL(randomx_program_begin):
#include "asm/program_xmm_constants.inc"
.align 64
DECL(randomx_loop_begin):
nop
DECL(randomx_program_load_int):
#include "asm/program_load_int.inc"
DECL(randomx_program_load_flt):
#include "asm/program_load_flt.inc"
DECL(randomx_program_start):
nop
DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc"
DECL(randomx_program_store_int):
#include "asm/program_store_int.inc"
DECL(randomx_program_store_flt):
#include "asm/program_store_flt.inc"
DECL(randomx_program_loop_end):
nop
.align 64
DECL(randomx_program_epilogue):
#include "asm/program_epilogue_linux.inc"
.align 64
DECL(randomx_program_read):
#include "asm/program_read.inc"
.align 64
DECL(randomx_program_end):
nop
.align 8
DECL(randomx_program_transform):
#include "asm/program_transform_address.inc"

View file

@ -20,12 +20,16 @@ IFDEF RAX
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_program_prologue
PUBLIC randomx_program_begin
PUBLIC randomx_loop_begin
PUBLIC randomx_program_load_int
PUBLIC randomx_program_load_flt
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_store_int
PUBLIC randomx_program_store_flt
PUBLIC randomx_program_loop_end
PUBLIC randomx_program_epilogue
PUBLIC randomx_program_read
PUBLIC randomx_program_end
PUBLIC randomx_program_transform
ALIGN 64
randomx_program_prologue PROC
@ -33,30 +37,51 @@ randomx_program_prologue PROC
randomx_program_prologue ENDP
ALIGN 64
randomx_program_begin PROC
include asm/program_xmm_constants.inc
ALIGN 64
randomx_loop_begin PROC
nop
randomx_program_begin ENDP
randomx_loop_begin ENDP
randomx_program_load_int PROC
include asm/program_load_int.inc
randomx_program_load_int ENDP
randomx_program_load_flt PROC
include asm/program_load_flt.inc
randomx_program_load_flt ENDP
randomx_program_start PROC
nop
randomx_program_start ENDP
randomx_program_read_dataset PROC
include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP
randomx_program_store_int PROC
include asm/program_store_int.inc
randomx_program_store_int ENDP
randomx_program_store_flt PROC
include asm/program_store_flt.inc
randomx_program_store_flt ENDP
randomx_program_loop_end PROC
nop
randomx_program_loop_end ENDP
ALIGN 64
randomx_program_epilogue PROC
include asm/program_epilogue_win64.inc
randomx_program_epilogue ENDP
ALIGN 64
randomx_program_read PROC
include asm/program_read.inc
randomx_program_read ENDP
ALIGN 64
randomx_program_end PROC
nop
randomx_program_end ENDP
ALIGN 8
randomx_program_transform PROC
include asm/program_transform_address.inc
randomx_program_transform ENDP
_RANDOMX_JITX86_STATIC ENDS
ENDIF

View file

@ -18,10 +18,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
extern "C" {
void randomx_program_prologue();
void randomx_program_begin();
void randomx_program_epilogue();
void randomx_program_transform();
void randomx_program_read();
void randomx_program_end();
void randomx_program_prologue();
void randomx_loop_begin();
void randomx_program_load_int();
void randomx_program_load_flt();
void randomx_program_start();
void randomx_program_read_dataset();
void randomx_program_store_int();
void randomx_program_store_flt();
void randomx_program_loop_end();
void randomx_program_epilogue();
void randomx_program_end();
}

File diff suppressed because it is too large Load diff

View file

@ -30,16 +30,10 @@ namespace RandomX {
class JitCompilerX86;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&);
constexpr uint32_t CodeSize = 64 * 1024;
struct CallOffset {
CallOffset(int32_t p, int32_t i) : pos(p), index(i) {}
int32_t pos;
int32_t index;
};
class JitCompilerX86 {
public:
JitCompilerX86();
@ -55,66 +49,82 @@ namespace RandomX {
static InstructionGeneratorX86 engine[256];
uint8_t* code;
int32_t codePos;
std::vector<int32_t> instructionOffsets;
std::vector<CallOffset> callOffsets;
void gena(Instruction&);
void genar(Instruction&);
void genaf(Instruction&);
void genbiashift(Instruction&, uint16_t, uint16_t);
void genbia(Instruction&, uint16_t, uint16_t);
void genbia32(Instruction&, uint16_t, uint8_t);
void genbf(Instruction&, uint8_t);
void scratchpadStoreR(Instruction&, uint32_t, bool);
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
void gencr(Instruction&, bool);
void gencf(Instruction&);
void generateCode(Instruction&, int);
void fixCallOffsets();
void genAddressReg(Instruction&, bool);
void genAddressRegDst(Instruction&, bool);
void genAddressImm(Instruction&);
void genSIB(int scale, int index, int base);
void generateCode(Instruction&);
void emitByte(uint8_t val) {
code[codePos] = val;
codePos++;
}
template<typename T>
void emit(T val) {
*reinterpret_cast<T*>(code + codePos) = val;
codePos += sizeof(T);
void emit32(uint32_t val) {
code[codePos + 0] = val;
code[codePos + 1] = val >> 8;
code[codePos + 2] = val >> 16;
code[codePos + 3] = val >> 24;
codePos += 4;
}
void h_ADD_64(Instruction&, int);
void h_ADD_32(Instruction&, int);
void h_SUB_64(Instruction&, int);
void h_SUB_32(Instruction&, int);
void h_MUL_64(Instruction&, int);
void h_MULH_64(Instruction&, int);
void h_MUL_32(Instruction&, int);
void h_IMUL_32(Instruction&, int);
void h_IMULH_64(Instruction&, int);
void h_DIV_64(Instruction&, int);
void h_IDIV_64(Instruction&, int);
void h_AND_64(Instruction&, int);
void h_AND_32(Instruction&, int);
void h_OR_64(Instruction&, int);
void h_OR_32(Instruction&, int);
void h_XOR_64(Instruction&, int);
void h_XOR_32(Instruction&, int);
void h_SHL_64(Instruction&, int);
void h_SHR_64(Instruction&, int);
void h_SAR_64(Instruction&, int);
void h_ROL_64(Instruction&, int);
void h_ROR_64(Instruction&, int);
void h_FPADD(Instruction&, int);
void h_FPSUB(Instruction&, int);
void h_FPMUL(Instruction&, int);
void h_FPDIV(Instruction&, int);
void h_FPSQRT(Instruction&, int);
void h_FPROUND(Instruction&, int);
void h_JUMP(Instruction&, int);
void h_CALL(Instruction&, int);
void h_RET(Instruction&, int);
void h_NOP(Instruction&, int);
void emit64(uint64_t val) {
code[codePos + 0] = val;
code[codePos + 1] = val >> 8;
code[codePos + 2] = val >> 16;
code[codePos + 3] = val >> 24;
code[codePos + 4] = val >> 32;
code[codePos + 5] = val >> 40;
code[codePos + 6] = val >> 48;
code[codePos + 7] = val >> 56;
codePos += 8;
}
template<size_t N>
void emit(const uint8_t (&src)[N]) {
for (int i = 0; i < N; ++i) {
code[codePos + i] = src[i];
}
codePos += N;
}
void h_IADD_R(Instruction&);
void h_IADD_M(Instruction&);
void h_IADD_RC(Instruction&);
void h_ISUB_R(Instruction&);
void h_ISUB_M(Instruction&);
void h_IMUL_9C(Instruction&);
void h_IMUL_R(Instruction&);
void h_IMUL_M(Instruction&);
void h_IMULH_R(Instruction&);
void h_IMULH_M(Instruction&);
void h_ISMULH_R(Instruction&);
void h_ISMULH_M(Instruction&);
void h_IDIV_C(Instruction&);
void h_ISDIV_C(Instruction&);
void h_INEG_R(Instruction&);
void h_IXOR_R(Instruction&);
void h_IXOR_M(Instruction&);
void h_IROR_R(Instruction&);
void h_IROL_R(Instruction&);
void h_FPSWAP_R(Instruction&);
void h_FPADD_R(Instruction&);
void h_FPADD_M(Instruction&);
void h_FPSUB_R(Instruction&);
void h_FPSUB_M(Instruction&);
void h_FPNEG_R(Instruction&);
void h_FPMUL_R(Instruction&);
void h_FPMUL_M(Instruction&);
void h_FPDIV_R(Instruction&);
void h_FPDIV_M(Instruction&);
void h_FPSQRT_R(Instruction&);
void h_COND_R(Instruction&);
void h_COND_M(Instruction&);
void h_CFROUND(Instruction&);
void h_ISTORE(Instruction&);
void h_FSTORE(Instruction&);
};
}

View file

@ -1,9 +1,5 @@
;# unroll VM stack
mov rsp, rdi
;# save VM register values
pop rcx
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
@ -12,12 +8,12 @@
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
movapd xmmword ptr [rcx+64], xmm8
movapd xmmword ptr [rcx+80], xmm9
movapd xmmword ptr [rcx+96], xmm2
movapd xmmword ptr [rcx+112], xmm3
movdqa xmmword ptr [rcx+64], xmm0
movdqa xmmword ptr [rcx+80], xmm1
movdqa xmmword ptr [rcx+96], xmm2
movdqa xmmword ptr [rcx+112], xmm3
lea rcx, [rcx+64]
movapd xmmword ptr [rcx+64], xmm4
movapd xmmword ptr [rcx+80], xmm5
movapd xmmword ptr [rcx+96], xmm6
movapd xmmword ptr [rcx+112], xmm7
movdqa xmmword ptr [rcx+64], xmm4
movdqa xmmword ptr [rcx+80], xmm5
movdqa xmmword ptr [rcx+96], xmm6
movdqa xmmword ptr [rcx+112], xmm7

View file

@ -1,6 +1,12 @@
include program_epilogue_store.inc
;# restore callee-saved registers - Microsoft x64 calling convention
movdqu xmm15, xmmword ptr [rsp]
movdqu xmm14, xmmword ptr [rsp+16]
movdqu xmm13, xmmword ptr [rsp+32]
movdqu xmm12, xmmword ptr [rsp+48]
movdqu xmm11, xmmword ptr [rsp+64]
add rsp, 80
movdqu xmm10, xmmword ptr [rsp]
movdqu xmm9, xmmword ptr [rsp+16]
movdqu xmm8, xmmword ptr [rsp+32]
@ -17,4 +23,4 @@
pop rbx
;# program finished
ret 0
ret

View file

@ -0,0 +1,14 @@
and eax, 262080
lea rcx, [rsi+rax]
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14
andps xmm5, xmm14
andps xmm6, xmm14
andps xmm7, xmm14

View file

@ -0,0 +1,10 @@
and eax, 262080
lea rcx, [rsi+rax]
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

View file

@ -7,13 +7,14 @@
push r15
;# function arguments
mov rbx, rcx ;# loop counter
push rdi ;# RegisterFile& registerFile
mov rbp, qword ptr [rsi] ;# "mx", "ma"
mov rax, qword ptr [rsi+8] ;# uint8_t* dataset
push rax
mov rsi, rdx ;# convertible_t* scratchpad
mov rcx, rdi
mov rbp, qword ptr [rsi] ;# "mx", "ma"
mov eax, ebp ;# "mx"
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
mov rsi, rdx ;# convertible_t* scratchpad
#include "program_prologue_load.inc"
jmp randomx_program_begin
jmp DECL(randomx_loop_begin)

View file

@ -1,27 +1,20 @@
mov rdi, rsp ;# beginning of VM stack
mov ebx, 262145 ;# number of VM instructions to execute + 1
;# zero integer registers
xor r8, r8
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
xorps xmm10, xmm10
cmpeqpd xmm10, xmm10
psrlq xmm10, 1 ;# mask for absolute value = 0x7fffffffffffffff7fffffffffffffff
;# load constant registers
lea rcx, [rcx+120]
movapd xmm8, xmmword ptr [rcx+72]
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl]
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
;# load integer registers
mov r8, qword ptr [rcx+0]
mov r9, qword ptr [rcx+8]
mov r10, qword ptr [rcx+16]
mov r11, qword ptr [rcx+24]
mov r12, qword ptr [rcx+32]
mov r13, qword ptr [rcx+40]
mov r14, qword ptr [rcx+48]
mov r15, qword ptr [rcx+56]
;# load floating point registers
movapd xmm8, xmmword ptr [rcx+64]
movapd xmm9, xmmword ptr [rcx+80]
movapd xmm2, xmmword ptr [rcx+96]
movapd xmm3, xmmword ptr [rcx+112]
lea rcx, [rcx+64]
movapd xmm4, xmmword ptr [rcx+64]
movapd xmm5, xmmword ptr [rcx+80]
movapd xmm6, xmmword ptr [rcx+96]
movapd xmm7, xmmword ptr [rcx+112]

View file

@ -13,14 +13,21 @@
movdqu xmmword ptr [rsp+32], xmm8
movdqu xmmword ptr [rsp+16], xmm9
movdqu xmmword ptr [rsp+0], xmm10
sub rsp, 80
movdqu xmmword ptr [rsp+64], xmm11
movdqu xmmword ptr [rsp+48], xmm12
movdqu xmmword ptr [rsp+32], xmm13
movdqu xmmword ptr [rsp+16], xmm14
movdqu xmmword ptr [rsp+0], xmm15
;# function arguments
push rcx ;# RegisterFile& registerFile
mov rbp, qword ptr [rdx] ;# "mx", "ma"
mov rax, qword ptr [rdx+8] ;# uint8_t* dataset
push rax
mov rsi, r8 ;# convertible_t* scratchpad
; function arguments
push rcx ; RegisterFile& registerFile
mov rbp, qword ptr [rdx] ; "mx", "ma"
mov eax, ebp ; "mx"
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
mov rsi, r8 ; convertible_t* scratchpad
mov rbx, r9 ; loop counter
include program_prologue_load.inc
jmp randomx_program_begin
jmp randomx_loop_begin

View file

@ -1,20 +0,0 @@
db 0, 0, 0, 0 ;# TransformAddress placeholder
mov rcx, qword ptr [rdi] ;# load the dataset address
xor rbp, rax ;# modify "mx"
;# prefetch cacheline "mx"
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rcx+rdx]
;# read cacheline "ma"
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
lea rcx, [rcx+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
ret

View file

@ -0,0 +1,16 @@
xor rbp, rax ;# modify "mx"
and rbp, -64 ;# align "mx" to the start of a cache line
mov edx, ebp ;# edx = mx
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

View file

@ -0,0 +1,11 @@
and eax, 262080
lea rcx, [rsi+rax]
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
mulpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3

View file

@ -0,0 +1,10 @@
and eax, 262080
lea rcx, [rsi+rax]
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15

View file

@ -0,0 +1,6 @@
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128

View file

@ -81,6 +81,8 @@ namespace RandomX {
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
constexpr uint32_t TransformationCount = 90;
constexpr int RegistersCount = 8;
@ -129,7 +131,7 @@ namespace RandomX {
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, RegisterFile&);
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*);
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
extern "C" {
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);

View file

@ -21,14 +21,6 @@ _RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
PUBLIC executeProgram
ALIGN 16
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
executeProgram PROC
; REGISTER ALLOCATION:
; rax -> temporary
@ -114,6 +106,17 @@ executeProgram PROC
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
jmp program_begin
ALIGN 64
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
ALIGN 64
program_begin:
xor eax, r8d ;# read address register 1
and eax, 262080

View file

@ -22,21 +22,21 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
//Integer
#define WT_IADD_R 10
#define WT_IADD_M 3
#define WT_IADD_RC 12
#define WT_IADD_RC 10
#define WT_ISUB_R 10
#define WT_ISUB_M 3
#define WT_IMUL_9C 12
#define WT_IMUL_R 24
#define WT_IMUL_M 8
#define WT_IMUL_9C 10
#define WT_IMUL_R 20
#define WT_IMUL_M 6
#define WT_IMULH_R 6
#define WT_IMULH_M 2
#define WT_ISMULH_R 6
#define WT_ISMULH_M 2
#define WT_IDIV_C 4
#define WT_ISDIV_C 2
#define WT_INEG_R 4
#define WT_IXOR_R 15
#define WT_IXOR_M 5
#define WT_ISDIV_C 4
#define WT_INEG_R 2
#define WT_IXOR_R 12
#define WT_IXOR_M 4
#define WT_IROR_R 10
#define WT_IROL_R 10
@ -58,10 +58,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define WT_FPSQRT_R 6
//Control
#define WT_COND_R 15
#define WT_COND_M 5
#define WT_COND_R 12
#define WT_COND_M 4
#define WT_CFROUND 1
//Store
#define WT_ISTORE 12
#define WT_FSTORE 6
#define WT_NOP 0
constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \
@ -70,7 +74,7 @@ WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_NOP;
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
static_assert(wtSum == 256,
"Sum of instruction weights must be 256");
@ -116,3 +120,40 @@ static_assert(wtSum == 256,
#define REPN(x,N) REPNX(x,N)
#define NUM(x) x
#define WT(x) NUM(WT_##x)
#define REPCASE0(x)
#define REPCASE1(x) case __COUNTER__:
#define REPCASE2(x) REPCASE1(x) case __COUNTER__:
#define REPCASE3(x) REPCASE2(x) case __COUNTER__:
#define REPCASE4(x) REPCASE3(x) case __COUNTER__:
#define REPCASE5(x) REPCASE4(x) case __COUNTER__:
#define REPCASE6(x) REPCASE5(x) case __COUNTER__:
#define REPCASE7(x) REPCASE6(x) case __COUNTER__:
#define REPCASE8(x) REPCASE7(x) case __COUNTER__:
#define REPCASE9(x) REPCASE8(x) case __COUNTER__:
#define REPCASE10(x) REPCASE9(x) case __COUNTER__:
#define REPCASE11(x) REPCASE10(x) case __COUNTER__:
#define REPCASE12(x) REPCASE11(x) case __COUNTER__:
#define REPCASE13(x) REPCASE12(x) case __COUNTER__:
#define REPCASE14(x) REPCASE13(x) case __COUNTER__:
#define REPCASE15(x) REPCASE14(x) case __COUNTER__:
#define REPCASE16(x) REPCASE15(x) case __COUNTER__:
#define REPCASE17(x) REPCASE16(x) case __COUNTER__:
#define REPCASE18(x) REPCASE17(x) case __COUNTER__:
#define REPCASE19(x) REPCASE18(x) case __COUNTER__:
#define REPCASE20(x) REPCASE19(x) case __COUNTER__:
#define REPCASE21(x) REPCASE20(x) case __COUNTER__:
#define REPCASE22(x) REPCASE21(x) case __COUNTER__:
#define REPCASE23(x) REPCASE22(x) case __COUNTER__:
#define REPCASE24(x) REPCASE23(x) case __COUNTER__:
#define REPCASE25(x) REPCASE24(x) case __COUNTER__:
#define REPCASE26(x) REPCASE25(x) case __COUNTER__:
#define REPCASE27(x) REPCASE26(x) case __COUNTER__:
#define REPCASE28(x) REPCASE27(x) case __COUNTER__:
#define REPCASE29(x) REPCASE28(x) case __COUNTER__:
#define REPCASE30(x) REPCASE29(x) case __COUNTER__:
#define REPCASE31(x) REPCASE30(x) case __COUNTER__:
#define REPCASE32(x) REPCASE31(x) case __COUNTER__:
#define REPCASENX(x,N) REPCASE##N(x)
#define REPCASEN(x,N) REPCASENX(x,N)
#define CASE_REP(x) REPCASEN(x, WT(x))

View file

@ -174,7 +174,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
for (int chain = 0; chain < 16; ++chain) {
vm->initializeProgram(hash);
int segment = hash[3] & 3;
vm->setScratchpad(scratchpad);// +segment * RandomX::ScratchpadSize / 4);
vm->setScratchpad(scratchpad + segment * RandomX::ScratchpadSize / 4);
vm->execute();
vm->getResult(nullptr, 0, hash);
}

File diff suppressed because it is too large Load diff