mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Fixed cache alignment
Performance tuning
This commit is contained in:
parent
77dbe14658
commit
6e3136b37f
6 changed files with 43 additions and 30 deletions
|
@ -34,7 +34,7 @@ namespace RandomX {
|
||||||
return (uint8_t*)allocLargePagesMemory(size);
|
return (uint8_t*)allocLargePagesMemory(size);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
void* ptr = _mm_malloc(size, sizeof(__m128i));
|
void* ptr = _mm_malloc(size, CacheLineSize);
|
||||||
if (ptr == nullptr)
|
if (ptr == nullptr)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
return (uint8_t*)ptr;
|
return (uint8_t*)ptr;
|
||||||
|
|
|
@ -628,7 +628,7 @@ namespace RandomX {
|
||||||
emitByte(0xc8 + instr.dst);
|
emitByte(0xc8 + instr.dst);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (NOP_TEST) {
|
if (false && NOP_TEST) {
|
||||||
emit(NOP4);
|
emit(NOP4);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -216,7 +216,7 @@ namespace RandomX {
|
||||||
const MacroOp MacroOp::Sub_ri = MacroOp("sub r,i", 7, 1, ExecutionPort::P015);
|
const MacroOp MacroOp::Sub_ri = MacroOp("sub r,i", 7, 1, ExecutionPort::P015);
|
||||||
const MacroOp MacroOp::Imul_rr = MacroOp("imul r,r", 4, 3, ExecutionPort::P1);
|
const MacroOp MacroOp::Imul_rr = MacroOp("imul r,r", 4, 3, ExecutionPort::P1);
|
||||||
const MacroOp MacroOp::Imul_rri = MacroOp("imul r,r,i", 7, 3, ExecutionPort::P1);
|
const MacroOp MacroOp::Imul_rri = MacroOp("imul r,r,i", 7, 3, ExecutionPort::P1);
|
||||||
const MacroOp MacroOp::Imul_r = MacroOp("imul r", 3, 3, ExecutionPort::P1, ExecutionPort::P5);
|
const MacroOp MacroOp::Imul_r = MacroOp("imul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5);
|
||||||
const MacroOp MacroOp::Mul_r = MacroOp("mul r", 3, 3, ExecutionPort::P1, ExecutionPort::P5);
|
const MacroOp MacroOp::Mul_r = MacroOp("mul r", 3, 3, ExecutionPort::P1, ExecutionPort::P5);
|
||||||
const MacroOp MacroOp::Mov_rr = MacroOp("mov r,r", 3);
|
const MacroOp MacroOp::Mov_rr = MacroOp("mov r,r", 3);
|
||||||
const MacroOp MacroOp::Mov_ri64 = MacroOp("mov rax,i64", 10, 1, ExecutionPort::P015);
|
const MacroOp MacroOp::Mov_ri64 = MacroOp("mov rax,i64", 10, 1, ExecutionPort::P015);
|
||||||
|
@ -357,9 +357,11 @@ namespace RandomX {
|
||||||
const char* getName() const {
|
const char* getName() const {
|
||||||
return name_;
|
return name_;
|
||||||
}
|
}
|
||||||
const DecoderBuffer* fetchNext(int prevType, Blake2Generator& gen) const {
|
const DecoderBuffer* fetchNext(int instrType, int cycle, int mulCount, Blake2Generator& gen) const {
|
||||||
if (prevType == LightInstructionType::IMULH_R || prevType == LightInstructionType::ISMULH_R)
|
if (instrType == LightInstructionType::IMULH_R || instrType == LightInstructionType::ISMULH_R)
|
||||||
return &decodeBuffer3310; //2-1-1 decode
|
return &decodeBuffer3310; //2-1-1 decode
|
||||||
|
if (mulCount < cycle)
|
||||||
|
return &decodeBuffer4444_mul;
|
||||||
if (index_ == 0) {
|
if (index_ == 0) {
|
||||||
return &decodeBuffer4444; //IMUL_RCP end
|
return &decodeBuffer4444; //IMUL_RCP end
|
||||||
}
|
}
|
||||||
|
@ -381,15 +383,16 @@ namespace RandomX {
|
||||||
static const DecoderBuffer decodeBuffer7333;
|
static const DecoderBuffer decodeBuffer7333;
|
||||||
static const DecoderBuffer decodeBuffer3337;
|
static const DecoderBuffer decodeBuffer3337;
|
||||||
static const DecoderBuffer decodeBuffer4444;
|
static const DecoderBuffer decodeBuffer4444;
|
||||||
|
static const DecoderBuffer decodeBuffer4444_mul;
|
||||||
static const DecoderBuffer decodeBuffer3733;
|
static const DecoderBuffer decodeBuffer3733;
|
||||||
static const DecoderBuffer decodeBuffer3373;
|
static const DecoderBuffer decodeBuffer3373;
|
||||||
static const DecoderBuffer decodeBuffer133;
|
static const DecoderBuffer decodeBuffer133;
|
||||||
static const DecoderBuffer* decodeBuffers[7];
|
static const DecoderBuffer* decodeBuffers[7];
|
||||||
const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const {
|
const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const {
|
||||||
int select;
|
int select;
|
||||||
do {
|
//do {
|
||||||
select = gen.getByte() & 7;
|
select = gen.getByte() & 3;
|
||||||
} while (select == 7);
|
//} while (select == 7);
|
||||||
return decodeBuffers[select];
|
return decodeBuffers[select];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -397,17 +400,16 @@ namespace RandomX {
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 0, buffer0);
|
const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 0, buffer0);
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
|
const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer3337 = DecoderBuffer("3,3,3,7", 2, buffer2);
|
const DecoderBuffer DecoderBuffer::decodeBuffer3337 = DecoderBuffer("3,3,3,7", 2, buffer2);
|
||||||
|
const DecoderBuffer DecoderBuffer::decodeBuffer4444_mul = DecoderBuffer("4,4,4,4-MUL", 3, buffer4);
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4);
|
const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4);
|
||||||
|
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 5, buffer5);
|
const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 5, buffer5);
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer3373 = DecoderBuffer("3,3,7,3", 6, buffer6);
|
const DecoderBuffer DecoderBuffer::decodeBuffer3373 = DecoderBuffer("3,3,7,3", 6, buffer6);
|
||||||
const DecoderBuffer DecoderBuffer::decodeBuffer133 = DecoderBuffer("13,3", 7, buffer7);
|
const DecoderBuffer DecoderBuffer::decodeBuffer133 = DecoderBuffer("13,3", 7, buffer7);
|
||||||
|
|
||||||
const DecoderBuffer* DecoderBuffer::decodeBuffers[7] = {
|
const DecoderBuffer* DecoderBuffer::decodeBuffers[7] = {
|
||||||
&DecoderBuffer::decodeBuffer3310,
|
&DecoderBuffer::decodeBuffer3310,
|
||||||
&DecoderBuffer::decodeBuffer7333,
|
|
||||||
&DecoderBuffer::decodeBuffer3337,
|
&DecoderBuffer::decodeBuffer3337,
|
||||||
&DecoderBuffer::decodeBuffer4444,
|
|
||||||
&DecoderBuffer::decodeBuffer4444,
|
|
||||||
&DecoderBuffer::decodeBuffer3733,
|
&DecoderBuffer::decodeBuffer3733,
|
||||||
&DecoderBuffer::decodeBuffer3373,
|
&DecoderBuffer::decodeBuffer3373,
|
||||||
};
|
};
|
||||||
|
@ -417,8 +419,8 @@ namespace RandomX {
|
||||||
const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
|
const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R };
|
||||||
const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
|
const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
|
||||||
const LightInstructionInfo* slot_3C[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R, &LightInstructionInfo::IXOR_R };
|
const LightInstructionInfo* slot_3C[] = { &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R, &LightInstructionInfo::IXOR_R };
|
||||||
const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IMUL_R, &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS, &LightInstructionInfo::IMUL_R };
|
const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IROR_C, &LightInstructionInfo::IADD_RS };
|
||||||
const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::ISUB_C, &LightInstructionInfo::IMUL_C, &LightInstructionInfo::IXOR_C, &LightInstructionInfo::ISUB_C };
|
const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IXOR_C, &LightInstructionInfo::ISUB_C };
|
||||||
const LightInstructionInfo* slot_7L = &LightInstructionInfo::COND_R;
|
const LightInstructionInfo* slot_7L = &LightInstructionInfo::COND_R;
|
||||||
const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
|
const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
|
||||||
|
|
||||||
|
@ -448,27 +450,34 @@ namespace RandomX {
|
||||||
instr.setImm32(imm32_);
|
instr.setImm32(imm32_);
|
||||||
}
|
}
|
||||||
|
|
||||||
static LightInstruction createForSlot(Blake2Generator& gen, int slotSize, bool isLast = false, bool complex = false) {
|
static LightInstruction createForSlot(Blake2Generator& gen, int slotSize, int fetchType, bool isLast, bool isFirst) {
|
||||||
switch (slotSize)
|
switch (slotSize)
|
||||||
{
|
{
|
||||||
case 3:
|
case 3:
|
||||||
if (isLast) {
|
if (isLast) {
|
||||||
return create(slot_3L[gen.getByte() & 3], gen);
|
return create(slot_3L[gen.getByte() & 3], gen);
|
||||||
}
|
}
|
||||||
else if (complex) {
|
else if (false && isFirst && fetchType == 0) {
|
||||||
return create(slot_3C[gen.getByte() & 3], gen);
|
return create(slot_3C[gen.getByte() & 3], gen);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return create(slot_3[gen.getByte() & 1], gen);
|
return create(slot_3[gen.getByte() & 1], gen);
|
||||||
}
|
}
|
||||||
case 4:
|
case 4:
|
||||||
return create(slot_4[gen.getByte() & 3], gen);
|
if (fetchType == 3 && !isLast) {
|
||||||
|
return create(&LightInstructionInfo::IMUL_R, gen);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return create(slot_4[gen.getByte() & 1], gen);
|
||||||
|
}
|
||||||
case 7:
|
case 7:
|
||||||
if (false && isLast) {
|
if (false && isLast) {
|
||||||
return create(slot_7L, gen);
|
return create(slot_7L, gen);
|
||||||
}
|
}
|
||||||
else {
|
if (false && isFirst) {
|
||||||
return create(slot_7[gen.getByte() & 3], gen);
|
return create(&LightInstructionInfo::IMUL_C, gen);
|
||||||
|
} else {
|
||||||
|
return create(slot_7[gen.getByte() & 1], gen);
|
||||||
}
|
}
|
||||||
case 10:
|
case 10:
|
||||||
return create(slot_10, gen);
|
return create(slot_10, gen);
|
||||||
|
@ -664,7 +673,11 @@ namespace RandomX {
|
||||||
constexpr int V4_SRC_INDEX_BITS = 3;
|
constexpr int V4_SRC_INDEX_BITS = 3;
|
||||||
constexpr int V4_DST_INDEX_BITS = 3;
|
constexpr int V4_DST_INDEX_BITS = 3;
|
||||||
constexpr int CYCLE_MAP_SIZE = RANDOMX_LPROG_LATENCY + 3;
|
constexpr int CYCLE_MAP_SIZE = RANDOMX_LPROG_LATENCY + 3;
|
||||||
|
#ifndef _DEBUG
|
||||||
constexpr bool TRACE = false;
|
constexpr bool TRACE = false;
|
||||||
|
#else
|
||||||
|
constexpr bool TRACE = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
static int blakeCounter = 0;
|
static int blakeCounter = 0;
|
||||||
|
|
||||||
|
@ -803,7 +816,7 @@ namespace RandomX {
|
||||||
constexpr int MAX_ATTEMPTS = 4;
|
constexpr int MAX_ATTEMPTS = 4;
|
||||||
|
|
||||||
while(!portsSaturated) {
|
while(!portsSaturated) {
|
||||||
fetchLine = fetchLine->fetchNext(currentInstruction.getType(), gen);
|
fetchLine = fetchLine->fetchNext(currentInstruction.getType(), cycle, mulCount, gen);
|
||||||
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine->getName() << ")" << std::endl;
|
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine->getName() << ")" << std::endl;
|
||||||
|
|
||||||
mopIndex = 0;
|
mopIndex = 0;
|
||||||
|
@ -813,7 +826,7 @@ namespace RandomX {
|
||||||
if (instrIndex >= currentInstruction.getInfo().getSize()) {
|
if (instrIndex >= currentInstruction.getInfo().getSize()) {
|
||||||
if (portsSaturated)
|
if (portsSaturated)
|
||||||
break;
|
break;
|
||||||
currentInstruction = LightInstruction::createForSlot(gen, fetchLine->getCounts()[mopIndex], fetchLine->getSize() == mopIndex + 1, fetchLine->getIndex() == 0 && mopIndex == 0);
|
currentInstruction = LightInstruction::createForSlot(gen, fetchLine->getCounts()[mopIndex], fetchLine->getIndex(), fetchLine->getSize() == mopIndex + 1, mopIndex == 0);
|
||||||
instrIndex = 0;
|
instrIndex = 0;
|
||||||
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
;xor r8, qword ptr [rbx+0]
|
xor r8, qword ptr [rbx+0]
|
||||||
;xor r9, qword ptr [rbx+8]
|
xor r9, qword ptr [rbx+8]
|
||||||
;xor r10, qword ptr [rbx+16]
|
xor r10, qword ptr [rbx+16]
|
||||||
;xor r11, qword ptr [rbx+24]
|
xor r11, qword ptr [rbx+24]
|
||||||
;xor r12, qword ptr [rbx+32]
|
xor r12, qword ptr [rbx+32]
|
||||||
;xor r13, qword ptr [rbx+40]
|
xor r13, qword ptr [rbx+40]
|
||||||
;xor r14, qword ptr [rbx+48]
|
xor r14, qword ptr [rbx+48]
|
||||||
;xor r15, qword ptr [rbx+56]
|
xor r15, qword ptr [rbx+56]
|
|
@ -1,4 +1,4 @@
|
||||||
and rbx, 4194303
|
and rbx, 4194303
|
||||||
shl rbx, 6
|
shl rbx, 6
|
||||||
add rbx, rdi
|
add rbx, rdi
|
||||||
; prefetchnta byte ptr [rbx]
|
prefetchnta byte ptr [rbx]
|
|
@ -37,7 +37,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
//Number of random Cache accesses per Dataset block. Minimum is 2.
|
//Number of random Cache accesses per Dataset block. Minimum is 2.
|
||||||
#define RANDOMX_CACHE_ACCESSES 8
|
#define RANDOMX_CACHE_ACCESSES 8
|
||||||
|
|
||||||
#define RANDOMX_LPROG_LATENCY 130
|
#define RANDOMX_LPROG_LATENCY 170
|
||||||
#define RANDOMX_LPROG_ASIC_LATENCY 84
|
#define RANDOMX_LPROG_ASIC_LATENCY 84
|
||||||
#define RANDOMX_LPROG_MIN_SIZE 225
|
#define RANDOMX_LPROG_MIN_SIZE 225
|
||||||
#define RANDOMX_LPROG_MAX_SIZE 512
|
#define RANDOMX_LPROG_MAX_SIZE 512
|
||||||
|
|
Loading…
Reference in a new issue