mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
SuperscalarHash JIT compiler
(unfinished)
This commit is contained in:
parent
690707ef49
commit
77dbe14658
18 changed files with 453 additions and 135 deletions
|
@ -97,14 +97,12 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
//1 uOP
|
||||
void AssemblyGeneratorX86::h_IADD_R(Instruction& instr, int i) {
|
||||
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tadd " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tadd " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
}
|
||||
if(instr.dst == 5)
|
||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||
else
|
||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.mod % 4)) << "]" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
|
@ -517,7 +515,7 @@ namespace RandomX {
|
|||
|
||||
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
|
||||
//Integer
|
||||
INST_HANDLE(IADD_R)
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(IADD_RC)
|
||||
INST_HANDLE(ISUB_R)
|
||||
|
|
|
@ -68,7 +68,7 @@ namespace RandomX {
|
|||
void traceflt(Instruction&);
|
||||
void tracenop(Instruction&);
|
||||
|
||||
void h_IADD_R(Instruction&, int);
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_IADD_RC(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
|
|
|
@ -40,9 +40,9 @@ namespace RandomX {
|
|||
os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]";
|
||||
}
|
||||
|
||||
void Instruction::h_IADD_R(std::ostream& os) const {
|
||||
void Instruction::h_IADD_RS(std::ostream& os) const {
|
||||
if (src != dst) {
|
||||
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||
os << "r" << (int)dst << ", r" << (int)src << ", LSH " << (int)(mod % 4) << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl;
|
||||
|
@ -302,13 +302,13 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
void Instruction::h_COND_R(std::ostream& os) const {
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl;
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_COND_M(std::ostream& os) const {
|
||||
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(";
|
||||
genAddressReg(os);
|
||||
os << ", " << (int32_t)getImm32() << "), " << (int)(mod >> 5) << std::endl;
|
||||
os << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISTORE(std::ostream& os) const {
|
||||
|
@ -333,7 +333,7 @@ namespace RandomX {
|
|||
|
||||
const char* Instruction::names[256] = {
|
||||
//Integer
|
||||
INST_NAME(IADD_R)
|
||||
INST_NAME(IADD_RS)
|
||||
INST_NAME(IADD_M)
|
||||
INST_NAME(IADD_RC)
|
||||
INST_NAME(ISUB_R)
|
||||
|
@ -379,7 +379,7 @@ namespace RandomX {
|
|||
|
||||
InstructionVisualizer Instruction::engine[256] = {
|
||||
//Integer
|
||||
INST_HANDLE(IADD_R)
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(IADD_RC)
|
||||
INST_HANDLE(ISUB_R)
|
||||
|
|
|
@ -98,7 +98,7 @@ namespace RandomX {
|
|||
void genAddressImm(std::ostream& os) const;
|
||||
void genAddressRegDst(std::ostream&) const;
|
||||
|
||||
void h_IADD_R(std::ostream&) const;
|
||||
void h_IADD_RS(std::ostream&) const;
|
||||
void h_IADD_M(std::ostream&) const;
|
||||
void h_IADD_RC(std::ostream&) const;
|
||||
void h_ISUB_R(std::ostream&) const;
|
||||
|
|
|
@ -442,7 +442,7 @@ namespace RandomX {
|
|||
auto& instr = program(i);
|
||||
auto& ibc = byteCode[i];
|
||||
switch (instr.opcode) {
|
||||
CASE_REP(IADD_R) {
|
||||
CASE_REP(IADD_RS) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_R;
|
||||
|
|
|
@ -26,9 +26,14 @@ PUBLIC randomx_program_start
|
|||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_light
|
||||
PUBLIC randomx_program_read_dataset_light_sub
|
||||
PUBLIC randomx_dataset_init
|
||||
PUBLIC randomx_program_loop_store
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_sshash_load
|
||||
PUBLIC randomx_sshash_prefetch
|
||||
PUBLIC randomx_sshash_end
|
||||
PUBLIC randomx_sshash_init
|
||||
PUBLIC randomx_program_end
|
||||
|
||||
ALIGN 64
|
||||
|
@ -75,11 +80,93 @@ randomx_program_read_dataset_light_sub PROC
|
|||
include asm/squareHash.inc
|
||||
randomx_program_read_dataset_light_sub ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_dataset_init PROC
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, rcx ;# cache
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
init_block_loop:
|
||||
prefetchw byte ptr [rsi]
|
||||
mov rbx, rbp
|
||||
db 232 ;# 0xE8 = call
|
||||
dd 32768 - distance
|
||||
distance equ $ - offset randomx_dataset_init
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
add rbp, 1
|
||||
add rsi, 64
|
||||
cmp rbp, qword ptr [rsp]
|
||||
jb init_block_loop
|
||||
pop r9
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
randomx_dataset_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_epilogue PROC
|
||||
include asm/program_epilogue_win64.inc
|
||||
randomx_program_epilogue ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_sshash_load PROC
|
||||
include asm/program_sshash_load.inc
|
||||
randomx_sshash_load ENDP
|
||||
|
||||
randomx_sshash_prefetch PROC
|
||||
include asm/program_sshash_prefetch.inc
|
||||
randomx_sshash_prefetch ENDP
|
||||
|
||||
randomx_sshash_end PROC
|
||||
nop
|
||||
randomx_sshash_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_sshash_init PROC
|
||||
lea r8, [rbx+1]
|
||||
include asm/program_sshash_prefetch.inc
|
||||
imul r8, qword ptr [r0_mul]
|
||||
mov r9, qword ptr [r1_add]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr [r2_add]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr [r3_add]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr [r4_add]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr [r5_add]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr [r6_add]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr [r7_add]
|
||||
xor r15, r8
|
||||
jmp randomx_program_end
|
||||
randomx_sshash_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_sshash_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_end PROC
|
||||
nop
|
||||
|
|
|
@ -27,6 +27,11 @@ extern "C" {
|
|||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_program_read_dataset_light_sub();
|
||||
void randomx_dataset_init();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_sshash_load();
|
||||
void randomx_sshash_prefetch();
|
||||
void randomx_sshash_end();
|
||||
void randomx_sshash_init();
|
||||
void randomx_program_end();
|
||||
}
|
|
@ -88,29 +88,40 @@ namespace RandomX {
|
|||
|
||||
#include "JitCompilerX86-static.hpp"
|
||||
|
||||
#define NOP_TEST true
|
||||
|
||||
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
||||
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
|
||||
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
||||
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
|
||||
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
||||
const uint8_t* codeReadDatasetLightSub = (uint8_t*)&randomx_program_read_dataset_light_sub;
|
||||
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
||||
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
||||
const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load;
|
||||
const uint8_t* codeShhPrefetch = (uint8_t*)&randomx_sshash_prefetch;
|
||||
const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end;
|
||||
const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init;
|
||||
|
||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||
const int32_t epilogueSize = codeProgramEnd - codeEpilogue;
|
||||
|
||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||
const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
|
||||
const int32_t readDatasetLightSize = codeLoopStore - codeReadDatasetLight;
|
||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||
const int32_t readDatasetLightSubSize = codeEpilogue - codeReadDatasetLightSub;
|
||||
const int32_t readDatasetLightSubSize = codeDatasetInit - codeReadDatasetLightSub;
|
||||
const int32_t datasetInitSize = codeEpilogue - codeDatasetInit;
|
||||
const int32_t epilogueSize = codeShhLoad - codeEpilogue;
|
||||
const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad;
|
||||
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
|
||||
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
|
||||
|
||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
||||
const int32_t readDatasetLightSubOffset = epilogueOffset - readDatasetLightSubSize;
|
||||
constexpr int32_t superScalarHashOffset = 32768;
|
||||
|
||||
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
|
||||
static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
|
||||
|
@ -166,7 +177,7 @@ namespace RandomX {
|
|||
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
|
||||
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x89, 0x44, 0x24, 0xF8, 0x0F, 0xAE, 0x54, 0x24, 0xF8 };
|
||||
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
|
||||
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
|
||||
static const uint8_t XOR_RCX_RCX[] = { 0x48, 0x33, 0xC9 };
|
||||
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
|
||||
static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 };
|
||||
static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 };
|
||||
|
@ -184,6 +195,18 @@ namespace RandomX {
|
|||
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||
static const uint8_t RET = 0xc3;
|
||||
|
||||
static const uint8_t NOP1[] = { 0x90 };
|
||||
static const uint8_t NOP2[] = { 0x66, 0x90 };
|
||||
static const uint8_t NOP3[] = { 0x0F, 0x1F, 0x00 };
|
||||
static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 };
|
||||
static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
|
||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return codePos - prologueSize;
|
||||
|
@ -196,6 +219,10 @@ namespace RandomX {
|
|||
memcpy(code + readDatasetLightSubOffset, codeReadDatasetLightSub, readDatasetLightSubSize);
|
||||
}
|
||||
|
||||
JitCompilerX86::~JitCompilerX86() {
|
||||
freePagedMemory(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog) {
|
||||
generateProgramPrologue(prog);
|
||||
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
||||
|
@ -216,6 +243,42 @@ namespace RandomX {
|
|||
generateProgramEpilogue(prog);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[N]) {
|
||||
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
|
||||
codePos = superScalarHashOffset + codeSshInitSize;
|
||||
for (unsigned j = 0; j < N; ++j) {
|
||||
LightProgram& prog = programs[j];
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
}
|
||||
emit(codeShhLoad, codeSshLoadSize);
|
||||
if (j < N - 1) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xd8 + prog.getAddressRegister());
|
||||
emit(codeShhPrefetch, codeSshPrefetchSize);
|
||||
int align = (codePos % 16);
|
||||
while (align != 0) {
|
||||
int nopSize = 16 - align;
|
||||
if (nopSize > 8) nopSize = 8;
|
||||
emit(NOPX[nopSize - 1], nopSize);
|
||||
align = (codePos % 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
emitByte(RET);
|
||||
}
|
||||
|
||||
template
|
||||
void JitCompilerX86::generateSuperScalarHash(LightProgram(&programs)[RANDOMX_CACHE_ACCESSES]);
|
||||
|
||||
void JitCompilerX86::generateDatasetInitCode() {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog) {
|
||||
#ifdef RANDOMX_JUMP
|
||||
instructionOffsets.clear();
|
||||
|
@ -253,7 +316,6 @@ namespace RandomX {
|
|||
emit32(prologueSize - codePos - 4);
|
||||
emitByte(JMP);
|
||||
emit32(epilogueOffset - codePos - 4);
|
||||
emitByte(0x90);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
|
@ -287,9 +349,9 @@ namespace RandomX {
|
|||
emit32(instr.getImm32() & ScratchpadL3Mask);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_R(Instruction& instr, int i) {
|
||||
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
/*if (instr.src != instr.dst) {
|
||||
emit(REX_ADD_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
|
@ -297,7 +359,19 @@ namespace RandomX {
|
|||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}*/
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP4);
|
||||
return;
|
||||
}
|
||||
emit(REX_LEA);
|
||||
if (instr.dst == 5) //rbp,r13 cannot be the base register without offset
|
||||
emitByte(0xac);
|
||||
else
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.mod % 4, instr.src, instr.dst);
|
||||
if (instr.dst == 5)
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
|
||||
|
@ -330,10 +404,18 @@ namespace RandomX {
|
|||
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP3);
|
||||
return;
|
||||
}
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP7);
|
||||
return;
|
||||
}
|
||||
emit(REX_81);
|
||||
emitByte(0xe8 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
|
@ -366,10 +448,18 @@ namespace RandomX {
|
|||
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP4);
|
||||
return;
|
||||
}
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP7);
|
||||
return;
|
||||
}
|
||||
emit(REX_IMUL_RRI);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
|
@ -393,6 +483,12 @@ namespace RandomX {
|
|||
|
||||
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP3);
|
||||
emit(NOP3);
|
||||
emit(NOP3);
|
||||
return;
|
||||
}
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
|
@ -422,6 +518,12 @@ namespace RandomX {
|
|||
|
||||
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP3);
|
||||
emit(NOP3);
|
||||
emit(NOP3);
|
||||
return;
|
||||
}
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
|
@ -451,6 +553,13 @@ namespace RandomX {
|
|||
|
||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||
if (instr.getImm32() != 0) {
|
||||
if (false && NOP_TEST) {
|
||||
emitByte(0x66);
|
||||
emitByte(0x66);
|
||||
emit(NOP8);
|
||||
emit(NOP4);
|
||||
return;
|
||||
}
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(MOV_RAX_I);
|
||||
emit64(reciprocal(instr.getImm32()));
|
||||
|
@ -472,10 +581,18 @@ namespace RandomX {
|
|||
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP3);
|
||||
return;
|
||||
}
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP7);
|
||||
return;
|
||||
}
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
|
@ -500,12 +617,21 @@ namespace RandomX {
|
|||
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP3);
|
||||
emit(NOP3);
|
||||
return;
|
||||
}
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
emit(REX_ROT_CL);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
}
|
||||
else {
|
||||
if (NOP_TEST) {
|
||||
emit(NOP4);
|
||||
return;
|
||||
}
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
|
@ -700,6 +826,12 @@ namespace RandomX {
|
|||
const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
|
||||
int reg = getConditionRegister();
|
||||
int target = registerUsage[reg] + 1;
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP7);
|
||||
emit(NOP7);
|
||||
emit(NOP6);
|
||||
}
|
||||
else {
|
||||
emit(REX_ADD_I);
|
||||
emitByte(0xc0 + reg);
|
||||
emit32(1 << shift);
|
||||
|
@ -708,6 +840,7 @@ namespace RandomX {
|
|||
emit32(conditionMask);
|
||||
emit(JZ);
|
||||
emit32(instructionOffsets[target] - (codePos + 4));
|
||||
}
|
||||
for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
|
@ -717,7 +850,14 @@ namespace RandomX {
|
|||
#ifdef RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
emit(XOR_ECX_ECX);
|
||||
if (false && NOP_TEST) {
|
||||
emit(NOP3);
|
||||
emit(NOP7);
|
||||
emit(NOP3);
|
||||
emit(NOP3);
|
||||
return;
|
||||
}
|
||||
emit(XOR_RCX_RCX);
|
||||
emit(REX_CMP_R32I);
|
||||
emitByte(0xf8 + instr.src);
|
||||
emit32(instr.getImm32());
|
||||
|
@ -732,7 +872,7 @@ namespace RandomX {
|
|||
#ifdef RANDOMX_JUMP
|
||||
handleCondition(instr, i);
|
||||
#endif
|
||||
emit(XOR_ECX_ECX);
|
||||
emit(XOR_RCX_RCX);
|
||||
genAddressReg(instr);
|
||||
emit(REX_CMP_M32I);
|
||||
emit32(instr.getImm32());
|
||||
|
@ -765,7 +905,7 @@ namespace RandomX {
|
|||
#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
|
||||
|
||||
InstructionGeneratorX86 JitCompilerX86::engine[256] = {
|
||||
INST_HANDLE(IADD_R)
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(IADD_RC)
|
||||
INST_HANDLE(ISUB_R)
|
||||
|
|
|
@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
namespace RandomX {
|
||||
|
||||
class Program;
|
||||
class LightProgram;
|
||||
class JitCompilerX86;
|
||||
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
||||
|
@ -36,11 +37,18 @@ namespace RandomX {
|
|||
class JitCompilerX86 {
|
||||
public:
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void generateProgram(Program&);
|
||||
void generateProgramLight(Program&);
|
||||
template<size_t N>
|
||||
void generateSuperScalarHash(LightProgram (&programs)[N]);
|
||||
ProgramFunc getProgramFunc() {
|
||||
return (ProgramFunc)code;
|
||||
}
|
||||
DatasetInitFunc getDatasetInitFunc() {
|
||||
generateDatasetInitCode();
|
||||
return (DatasetInitFunc)code;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return code;
|
||||
}
|
||||
|
@ -62,6 +70,8 @@ namespace RandomX {
|
|||
}
|
||||
}
|
||||
|
||||
void generateDatasetInitCode();
|
||||
|
||||
void generateProgramPrologue(Program&);
|
||||
void generateProgramEpilogue(Program&);
|
||||
int getConditionRegister();
|
||||
|
@ -100,13 +110,15 @@ namespace RandomX {
|
|||
|
||||
template<size_t N>
|
||||
void emit(const uint8_t (&src)[N]) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
code[codePos + i] = src[i];
|
||||
}
|
||||
codePos += N;
|
||||
emit(src, N);
|
||||
}
|
||||
|
||||
void h_IADD_R(Instruction&, int);
|
||||
void emit(const uint8_t* src, size_t count) {
|
||||
memcpy(code + codePos, src, count);
|
||||
codePos += count;
|
||||
}
|
||||
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_IADD_RC(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
|
|
|
@ -26,6 +26,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <iomanip>
|
||||
#include "LightProgramGenerator.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
// Intel Ivy Bridge reference
|
||||
|
@ -47,8 +48,8 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
namespace LightInstructionOpcode {
|
||||
constexpr int IADD_R = 0;
|
||||
constexpr int IADD_RC = RANDOMX_FREQ_IADD_R + RANDOMX_FREQ_IADD_M;
|
||||
constexpr int IADD_RS = 0;
|
||||
constexpr int IADD_RC = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M;
|
||||
constexpr int ISUB_R = IADD_RC + RANDOMX_FREQ_IADD_RC;
|
||||
constexpr int IMUL_9C = ISUB_R + RANDOMX_FREQ_ISUB_R + RANDOMX_FREQ_ISUB_M;
|
||||
constexpr int IMUL_R = IMUL_9C + RANDOMX_FREQ_IMUL_9C;
|
||||
|
@ -65,20 +66,18 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
const int lightInstructionOpcode[] = {
|
||||
LightInstructionOpcode::IADD_R,
|
||||
LightInstructionOpcode::IADD_R,
|
||||
LightInstructionOpcode::IADD_RC,
|
||||
LightInstructionOpcode::ISUB_R,
|
||||
LightInstructionOpcode::IMUL_9C,
|
||||
LightInstructionOpcode::IMUL_R,
|
||||
LightInstructionOpcode::IMUL_R,
|
||||
LightInstructionOpcode::IADD_RS,
|
||||
LightInstructionOpcode::ISUB_R, //ISUB_R
|
||||
LightInstructionOpcode::ISUB_R, //ISUB_R
|
||||
LightInstructionOpcode::IMUL_R, //IMUL_R
|
||||
LightInstructionOpcode::IMUL_R, //IMUL_C
|
||||
LightInstructionOpcode::IMULH_R,
|
||||
LightInstructionOpcode::ISMULH_R,
|
||||
LightInstructionOpcode::IMUL_RCP,
|
||||
LightInstructionOpcode::IXOR_R,
|
||||
LightInstructionOpcode::IXOR_R,
|
||||
LightInstructionOpcode::IROR_R,
|
||||
LightInstructionOpcode::IROR_R,
|
||||
LightInstructionOpcode::IXOR_R, //IXOR_R
|
||||
LightInstructionOpcode::IXOR_R, //IXOR_C
|
||||
LightInstructionOpcode::IROR_R, //IROR_R
|
||||
LightInstructionOpcode::IROR_R, //IROR_C
|
||||
LightInstructionOpcode::COND_R
|
||||
};
|
||||
|
||||
|
@ -93,37 +92,30 @@ namespace RandomX {
|
|||
constexpr type P015 = 6;
|
||||
}
|
||||
|
||||
class Blake2Generator {
|
||||
public:
|
||||
Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
|
||||
Blake2Generator::Blake2Generator(const void* seed, int nonce) : dataIndex(sizeof(data)) {
|
||||
memset(data, 0, sizeof(data));
|
||||
memcpy(data, seed, SeedSize);
|
||||
store32(&data[60], nonce);
|
||||
}
|
||||
|
||||
uint8_t getByte() {
|
||||
uint8_t Blake2Generator::getByte() {
|
||||
checkData(1);
|
||||
return data[dataIndex++];
|
||||
}
|
||||
|
||||
uint32_t getInt32() {
|
||||
uint32_t Blake2Generator::getInt32() {
|
||||
checkData(4);
|
||||
auto ret = load32(&data[dataIndex]);
|
||||
dataIndex += 4;
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t data[64];
|
||||
size_t dataIndex;
|
||||
|
||||
void checkData(const size_t bytesNeeded) {
|
||||
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
||||
if (dataIndex + bytesNeeded > sizeof(data)) {
|
||||
blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
dataIndex = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class RegisterInfo {
|
||||
public:
|
||||
|
@ -201,7 +193,7 @@ namespace RandomX {
|
|||
static const MacroOp Xor_ri;
|
||||
static const MacroOp Ror_rcl;
|
||||
static const MacroOp Ror_ri;
|
||||
static const MacroOp TestJmp_fused;
|
||||
static const MacroOp TestJz_fused;
|
||||
static const MacroOp Xor_self;
|
||||
static const MacroOp Cmp_ri;
|
||||
static const MacroOp Setcc_r;
|
||||
|
@ -235,13 +227,13 @@ namespace RandomX {
|
|||
const MacroOp MacroOp::Xor_self = MacroOp("xor rcx,rcx", 3);
|
||||
const MacroOp MacroOp::Cmp_ri = MacroOp("cmp r,i", 7, 1, ExecutionPort::P015);
|
||||
const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05);
|
||||
const MacroOp MacroOp::TestJmp_fused = MacroOp("testjz r,i", 13, 0, ExecutionPort::P5);
|
||||
const MacroOp MacroOp::TestJz_fused = MacroOp("testjz r,i", 13, 0, ExecutionPort::P5);
|
||||
|
||||
const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr };
|
||||
const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
|
||||
const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
|
||||
const MacroOp IROR_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Ror_rcl };
|
||||
const MacroOp COND_R_ops_array[] = { MacroOp::Add_ri, MacroOp(MacroOp::TestJmp_fused, true), MacroOp::Xor_self, MacroOp::Cmp_ri, MacroOp(MacroOp::Setcc_r, true), MacroOp(MacroOp::Add_rr, true) };
|
||||
const MacroOp COND_R_ops_array[] = { MacroOp::Add_ri, MacroOp(MacroOp::TestJz_fused, true), MacroOp::Xor_self, MacroOp::Cmp_ri, MacroOp(MacroOp::Setcc_r, true), MacroOp(MacroOp::Add_rr, true) };
|
||||
|
||||
|
||||
class LightInstructionInfo {
|
||||
|
@ -349,7 +341,7 @@ namespace RandomX {
|
|||
|
||||
class DecoderBuffer {
|
||||
public:
|
||||
static DecoderBuffer Default;
|
||||
static const DecoderBuffer Default;
|
||||
template <size_t N>
|
||||
DecoderBuffer(const char* name, int index, const int(&arr)[N])
|
||||
: name_(name), index_(index), counts_(arr), opsCount_(N) {}
|
||||
|
@ -365,17 +357,17 @@ namespace RandomX {
|
|||
const char* getName() const {
|
||||
return name_;
|
||||
}
|
||||
const DecoderBuffer& fetchNext(int prevType, Blake2Generator& gen) {
|
||||
const DecoderBuffer* fetchNext(int prevType, Blake2Generator& gen) const {
|
||||
if (prevType == LightInstructionType::IMULH_R || prevType == LightInstructionType::ISMULH_R)
|
||||
return decodeBuffer3310; //2-1-1 decode
|
||||
return &decodeBuffer3310; //2-1-1 decode
|
||||
if (index_ == 0) {
|
||||
return decodeBuffer4444; //IMUL_RCP end
|
||||
}
|
||||
if (index_ == 2) {
|
||||
return decodeBuffer133; //COND_R middle
|
||||
return &decodeBuffer4444; //IMUL_RCP end
|
||||
}
|
||||
/*if (index_ == 2) {
|
||||
return &decodeBuffer133; //COND_R middle
|
||||
}*/
|
||||
if (index_ == 7) {
|
||||
return decodeBuffer7333; //COND_R end
|
||||
return &decodeBuffer7333; //COND_R end
|
||||
}
|
||||
return fetchNextDefault(gen);
|
||||
}
|
||||
|
@ -393,12 +385,12 @@ namespace RandomX {
|
|||
static const DecoderBuffer decodeBuffer3373;
|
||||
static const DecoderBuffer decodeBuffer133;
|
||||
static const DecoderBuffer* decodeBuffers[7];
|
||||
const DecoderBuffer& fetchNextDefault(Blake2Generator& gen) {
|
||||
const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const {
|
||||
int select;
|
||||
do {
|
||||
select = gen.getByte() & 7;
|
||||
} while (select == 7);
|
||||
return *decodeBuffers[select];
|
||||
return decodeBuffers[select];
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -420,7 +412,7 @@ namespace RandomX {
|
|||
&DecoderBuffer::decodeBuffer3373,
|
||||
};
|
||||
|
||||
DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
|
||||
const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
|
||||
|
||||
const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
|
||||
const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
|
||||
|
@ -472,7 +464,7 @@ namespace RandomX {
|
|||
case 4:
|
||||
return create(slot_4[gen.getByte() & 3], gen);
|
||||
case 7:
|
||||
if (isLast) {
|
||||
if (false && isLast) {
|
||||
return create(slot_7L, gen);
|
||||
}
|
||||
else {
|
||||
|
@ -595,7 +587,7 @@ namespace RandomX {
|
|||
bool selectDestination(int cycle, RegisterInfo (®isters)[8], Blake2Generator& gen) {
|
||||
std::vector<int> availableRegisters;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_))
|
||||
if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_.getType() != LightInstructionType::IADD_RS || i != 5))
|
||||
availableRegisters.push_back(i);
|
||||
}
|
||||
return selectRegister(availableRegisters, gen, dst_);
|
||||
|
@ -607,6 +599,12 @@ namespace RandomX {
|
|||
if (registers[i].latency <= cycle)
|
||||
availableRegisters.push_back(i);
|
||||
}
|
||||
if (availableRegisters.size() == 2 && info_.getType() == LightInstructionType::IADD_RS) {
|
||||
if (availableRegisters[0] == 5 || availableRegisters[1] == 5) {
|
||||
opGroupPar_ = src_ = 5;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (selectRegister(availableRegisters, gen, src_)) {
|
||||
if (groupParIsSource_)
|
||||
opGroupPar_ = src_;
|
||||
|
@ -666,7 +664,7 @@ namespace RandomX {
|
|||
constexpr int V4_SRC_INDEX_BITS = 3;
|
||||
constexpr int V4_DST_INDEX_BITS = 3;
|
||||
constexpr int CYCLE_MAP_SIZE = RANDOMX_LPROG_LATENCY + 3;
|
||||
constexpr bool TRACE = true;
|
||||
constexpr bool TRACE = false;
|
||||
|
||||
static int blakeCounter = 0;
|
||||
|
||||
|
@ -782,15 +780,14 @@ namespace RandomX {
|
|||
}
|
||||
}
|
||||
|
||||
void generateLightProg2(LightProgram& prog, const void* seed, int indexRegister, int nonce) {
|
||||
double generateLightProg2(LightProgram& prog, Blake2Generator& gen) {
|
||||
|
||||
ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3];
|
||||
memset(portBusy, 0, sizeof(portBusy));
|
||||
RegisterInfo registers[8];
|
||||
Blake2Generator gen(seed, nonce);
|
||||
std::vector<LightInstruction> instructions;
|
||||
|
||||
DecoderBuffer& fetchLine = DecoderBuffer::Default;
|
||||
const DecoderBuffer* fetchLine = &DecoderBuffer::Default;
|
||||
LightInstruction currentInstruction = LightInstruction::Null;
|
||||
int instrIndex = 0;
|
||||
int codeSize = 0;
|
||||
|
@ -806,24 +803,24 @@ namespace RandomX {
|
|||
constexpr int MAX_ATTEMPTS = 4;
|
||||
|
||||
while(!portsSaturated) {
|
||||
fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen);
|
||||
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine.getName() << ")" << std::endl;
|
||||
fetchLine = fetchLine->fetchNext(currentInstruction.getType(), gen);
|
||||
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine->getName() << ")" << std::endl;
|
||||
|
||||
mopIndex = 0;
|
||||
|
||||
while (mopIndex < fetchLine.getSize()) {
|
||||
while (mopIndex < fetchLine->getSize()) {
|
||||
int topCycle = cycle;
|
||||
if (instrIndex >= currentInstruction.getInfo().getSize()) {
|
||||
if (portsSaturated)
|
||||
break;
|
||||
currentInstruction = LightInstruction::createForSlot(gen, fetchLine.getCounts()[mopIndex], fetchLine.getSize() == mopIndex + 1, fetchLine.getIndex() == 0 && mopIndex == 0);
|
||||
currentInstruction = LightInstruction::createForSlot(gen, fetchLine->getCounts()[mopIndex], fetchLine->getSize() == mopIndex + 1, fetchLine->getIndex() == 0 && mopIndex == 0);
|
||||
instrIndex = 0;
|
||||
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||
}
|
||||
MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex);
|
||||
if (fetchLine.getCounts()[mopIndex] != mop.getSize()) {
|
||||
if (TRACE) std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine.getCounts()[mopIndex] << std::endl;
|
||||
return;
|
||||
if (fetchLine->getCounts()[mopIndex] != mop.getSize()) {
|
||||
if (TRACE) std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine->getCounts()[mopIndex] << std::endl;
|
||||
return DBL_MIN;
|
||||
}
|
||||
|
||||
if (TRACE) std::cout << mop.getName() << " ";
|
||||
|
@ -831,7 +828,7 @@ namespace RandomX {
|
|||
mop.setCycle(scheduleCycle);
|
||||
if (scheduleCycle < 0) {
|
||||
if (TRACE) std::cout << "; Failed at cycle " << cycle << std::endl;
|
||||
return;
|
||||
return DBL_MIN;
|
||||
}
|
||||
|
||||
if (instrIndex == currentInstruction.getInfo().getSrcOp()) {
|
||||
|
@ -893,25 +890,29 @@ namespace RandomX {
|
|||
std::cout << "; (* = in use, _ = idle)" << std::endl;
|
||||
|
||||
int portCycles = 0;
|
||||
for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
|
||||
/*for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
|
||||
std::cout << "; " << std::setw(3) << i << " ";
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
std::cout << (portBusy[i][j] ? '*' : '_');
|
||||
portCycles += !!portBusy[i][j];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}*/
|
||||
|
||||
double ipc = (macroOpCount / (double)retireCycle);
|
||||
|
||||
std::cout << "; code size " << codeSize << " bytes" << std::endl;
|
||||
std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
|
||||
std::cout << "; RandomX instructions: " << outIndex << std::endl;
|
||||
std::cout << "; Execution time: " << retireCycle << " cycles" << std::endl;
|
||||
std::cout << "; IPC = " << (macroOpCount / (double)retireCycle) << std::endl;
|
||||
std::cout << "; IPC = " << ipc << std::endl;
|
||||
std::cout << "; Port-cycles: " << portCycles << std::endl;
|
||||
std::cout << "; Multiplications: " << mulCount << std::endl;
|
||||
|
||||
int asicLatency[8];
|
||||
memset(asicLatency, 0, sizeof(asicLatency));
|
||||
|
||||
|
||||
for (int i = 0; i < outIndex; ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
int latDst = asicLatency[instr.dst] + 1;
|
||||
|
@ -919,7 +920,16 @@ namespace RandomX {
|
|||
asicLatency[instr.dst] = std::max(latDst, latSrc);
|
||||
}
|
||||
|
||||
std::cout << "; Multiplications: " << mulCount << std::endl;
|
||||
int asicLatencyFinal = 0;
|
||||
int addressReg = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (asicLatency[i] > asicLatencyFinal) {
|
||||
asicLatencyFinal = asicLatency[i];
|
||||
addressReg = i;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "; ASIC latency: " << asicLatencyFinal << std::endl;
|
||||
|
||||
std::cout << "; ASIC latency:" << std::endl;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
|
@ -931,5 +941,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
prog.setSize(outIndex);
|
||||
prog.setAddressRegister(addressReg);
|
||||
return addressReg;
|
||||
}
|
||||
}
|
|
@ -20,6 +20,18 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "Program.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
void generateLightProgram(LightProgram& prog, const void* seed, int indexRegister, int nonce);
|
||||
void generateLightProg2(LightProgram& prog, const void* seed, int indexRegister, int nonce);
|
||||
|
||||
class Blake2Generator {
|
||||
public:
|
||||
Blake2Generator(const void* seed, int nonce);
|
||||
uint8_t getByte();
|
||||
uint32_t getInt32();
|
||||
private:
|
||||
uint8_t data[64];
|
||||
size_t dataIndex;
|
||||
|
||||
void checkData(const size_t);
|
||||
};
|
||||
|
||||
double generateLightProg2(LightProgram& prog, Blake2Generator& gen);
|
||||
}
|
|
@ -68,6 +68,12 @@ namespace RandomX {
|
|||
void setSize(uint32_t val) {
|
||||
size = val;
|
||||
}
|
||||
int getAddressRegister() {
|
||||
return addrReg;
|
||||
}
|
||||
void setAddressRegister(uint32_t val) {
|
||||
addrReg = val;
|
||||
}
|
||||
private:
|
||||
void print(std::ostream& os) const {
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
|
@ -77,6 +83,7 @@ namespace RandomX {
|
|||
}
|
||||
Instruction programBuffer[RANDOMX_LPROG_MAX_SIZE];
|
||||
uint32_t size;
|
||||
int addrReg;
|
||||
};
|
||||
|
||||
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class Program");
|
||||
|
|
16
src/asm/program_sshash_constants.inc
Normal file
16
src/asm/program_sshash_constants.inc
Normal file
|
@ -0,0 +1,16 @@
|
|||
r0_mul: ;# 6364136223846793005
|
||||
db 45, 127, 149, 76, 45, 244, 81, 88
|
||||
r1_add: ;# 9298410992540426048
|
||||
db 64, 159, 245, 89, 136, 151, 10, 129
|
||||
r2_add: ;# 12065312585734608966
|
||||
db 70, 216, 194, 56, 223, 153, 112, 167
|
||||
r3_add: ;# 9306329213124610396
|
||||
db 92, 9, 34, 191, 28, 185, 38, 129
|
||||
r4_add: ;# 5281919268842080866
|
||||
db 98, 138, 159, 23, 151, 37, 77, 73
|
||||
r5_add: ;# 10536153434571861004
|
||||
db 12, 236, 170, 206, 185, 239, 55, 146
|
||||
r6_add: ;# 3398623926847679864
|
||||
db 120, 45, 230, 108, 116, 86, 42, 47
|
||||
r7_add: ;# 9549104520008361294
|
||||
db 78, 229, 44, 182, 247, 59, 133, 132
|
8
src/asm/program_sshash_load.inc
Normal file
8
src/asm/program_sshash_load.inc
Normal file
|
@ -0,0 +1,8 @@
|
|||
;xor r8, qword ptr [rbx+0]
|
||||
;xor r9, qword ptr [rbx+8]
|
||||
;xor r10, qword ptr [rbx+16]
|
||||
;xor r11, qword ptr [rbx+24]
|
||||
;xor r12, qword ptr [rbx+32]
|
||||
;xor r13, qword ptr [rbx+40]
|
||||
;xor r14, qword ptr [rbx+48]
|
||||
;xor r15, qword ptr [rbx+56]
|
4
src/asm/program_sshash_prefetch.inc
Normal file
4
src/asm/program_sshash_prefetch.inc
Normal file
|
@ -0,0 +1,4 @@
|
|||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
; prefetchnta byte ptr [rbx]
|
|
@ -41,7 +41,7 @@ namespace RandomX {
|
|||
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
|
||||
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
|
||||
|
||||
constexpr int wtSum = RANDOMX_FREQ_IADD_R + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_IADD_RC + RANDOMX_FREQ_ISUB_R + \
|
||||
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_IADD_RC + RANDOMX_FREQ_ISUB_R + \
|
||||
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_9C + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
|
||||
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
|
||||
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \
|
||||
|
@ -141,6 +141,7 @@ namespace RandomX {
|
|||
typedef void(*DatasetReadFunc)(addr_t, MemoryRegisters&, int_reg_t(®)[RegistersCount]);
|
||||
|
||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
|
||||
typedef void(*DatasetInitFunc)(uint8_t* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf);
|
||||
|
|
|
@ -37,7 +37,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
//Number of random Cache accesses per Dataset block. Minimum is 2.
|
||||
#define RANDOMX_CACHE_ACCESSES 8
|
||||
|
||||
#define RANDOMX_LPROG_LATENCY 168
|
||||
#define RANDOMX_LPROG_LATENCY 130
|
||||
#define RANDOMX_LPROG_ASIC_LATENCY 84
|
||||
#define RANDOMX_LPROG_MIN_SIZE 225
|
||||
#define RANDOMX_LPROG_MAX_SIZE 512
|
||||
|
@ -80,12 +80,12 @@ Instruction frequencies (per 256 opcodes)
|
|||
Total sum of frequencies must be 256
|
||||
*/
|
||||
|
||||
#define RANDOMX_FREQ_IADD_R 12
|
||||
#define RANDOMX_FREQ_IADD_RS 32
|
||||
#define RANDOMX_FREQ_IADD_M 7
|
||||
#define RANDOMX_FREQ_IADD_RC 16
|
||||
#define RANDOMX_FREQ_ISUB_R 12
|
||||
#define RANDOMX_FREQ_IADD_RC 0
|
||||
#define RANDOMX_FREQ_ISUB_R 17
|
||||
#define RANDOMX_FREQ_ISUB_M 7
|
||||
#define RANDOMX_FREQ_IMUL_9C 9
|
||||
#define RANDOMX_FREQ_IMUL_9C 0
|
||||
#define RANDOMX_FREQ_IMUL_R 16
|
||||
#define RANDOMX_FREQ_IMUL_M 4
|
||||
#define RANDOMX_FREQ_IMULH_R 4
|
||||
|
|
26
src/main.cpp
26
src/main.cpp
|
@ -37,6 +37,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "Cache.hpp"
|
||||
#include "hashAes1Rx4.hpp"
|
||||
#include "LightProgramGenerator.hpp"
|
||||
#include "JitCompilerX86.hpp"
|
||||
|
||||
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
||||
|
||||
|
@ -204,7 +205,7 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<uint32_t>& atomicNonce, Atomi
|
|||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genLight;
|
||||
bool softAes, genAsm, miningMode, verificationMode, help, largePages, async, genNative, jit, genLight, useSuperscalar;
|
||||
int programCount, threadCount, initThreadCount, epoch;
|
||||
|
||||
readOption("--softAes", argc, argv, softAes);
|
||||
|
@ -220,14 +221,16 @@ int main(int argc, char** argv) {
|
|||
readOption("--genNative", argc, argv, genNative);
|
||||
readOption("--help", argc, argv, help);
|
||||
readOption("--genLight", argc, argv, genLight);
|
||||
readOption("--useSuperscalar", argc, argv, useSuperscalar);
|
||||
|
||||
if (genLight) {
|
||||
RandomX::LightProgram p;
|
||||
RandomX::generateLightProg2(p, seed, 0, programCount);
|
||||
//RandomX::AssemblyGeneratorX86 asmX86;
|
||||
//asmX86.generateProgram(p);
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
RandomX::generateLightProg2(p, gen);
|
||||
RandomX::AssemblyGeneratorX86 asmX86;
|
||||
asmX86.generateProgram(p);
|
||||
//std::ofstream file("lightProg2.asm");
|
||||
//asmX86.printCode(std::cout);
|
||||
asmX86.printCode(std::cout);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -287,6 +290,17 @@ int main(int argc, char** argv) {
|
|||
dataset.dataset.size = datasetSize;
|
||||
RandomX::datasetAlloc(dataset, largePages);
|
||||
const uint64_t datasetBlockCount = datasetSize / RandomX::CacheLineSize;
|
||||
if (useSuperscalar) {
|
||||
RandomX::Blake2Generator gen(seed, programCount);
|
||||
RandomX::LightProgram programs[RANDOMX_CACHE_ACCESSES];
|
||||
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
|
||||
RandomX::generateLightProg2(programs[i], gen);
|
||||
}
|
||||
RandomX::JitCompilerX86 jit86;
|
||||
jit86.generateSuperScalarHash(programs);
|
||||
jit86.getDatasetInitFunc()(cache.memory, dataset.dataset.memory, 0, datasetBlockCount);
|
||||
}
|
||||
else {
|
||||
if (initThreadCount > 1) {
|
||||
auto perThread = datasetBlockCount / initThreadCount;
|
||||
auto remainder = datasetBlockCount % initThreadCount;
|
||||
|
@ -301,10 +315,12 @@ int main(int argc, char** argv) {
|
|||
else {
|
||||
RandomX::datasetInit(cache, dataset.dataset, 0, datasetBlockCount);
|
||||
}
|
||||
}
|
||||
RandomX::deallocCache(cache, largePages);
|
||||
threads.clear();
|
||||
std::cout << "Dataset (" << datasetSize << " bytes) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
return 0;
|
||||
std::cout << "Initializing " << threadCount << " virtual machine(s) ..." << std::endl;
|
||||
for (int i = 0; i < threadCount; ++i) {
|
||||
RandomX::VirtualMachine* vm;
|
||||
|
|
Loading…
Reference in a new issue