mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Port mapping
This commit is contained in:
parent
2fd0a125b5
commit
acef5ea0d7
1 changed files with 261 additions and 113 deletions
|
@ -23,6 +23,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "blake2/endian.h";
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace RandomX {
|
||||
// Intel Ivy Bridge reference
|
||||
|
@ -101,9 +103,9 @@ namespace RandomX {
|
|||
constexpr type Null = 0;
|
||||
constexpr type P0 = 1;
|
||||
constexpr type P1 = 2;
|
||||
constexpr type P5 = 4;
|
||||
constexpr type P05 = 6;
|
||||
constexpr type P015 = 7;
|
||||
constexpr type P5 = 3;
|
||||
constexpr type P05 = 4;
|
||||
constexpr type P015 = 5;
|
||||
}
|
||||
|
||||
class Blake2Generator {
|
||||
|
@ -138,6 +140,15 @@ namespace RandomX {
|
|||
}
|
||||
};
|
||||
|
||||
class RegisterInfo {
|
||||
public:
|
||||
RegisterInfo() : lastOpGroup(-1), source(-1), value(0), latency(0) {}
|
||||
int lastOpGroup;
|
||||
int source;
|
||||
int value;
|
||||
int latency;
|
||||
};
|
||||
|
||||
class MacroOp {
|
||||
public:
|
||||
MacroOp(const char* name, int size)
|
||||
|
@ -146,6 +157,8 @@ namespace RandomX {
|
|||
: name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {}
|
||||
MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2)
|
||||
: name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {}
|
||||
MacroOp(const MacroOp& parent, bool dependent)
|
||||
: name_(parent.name_), size_(parent.size_), latency_(parent.latency_), uop1_(parent.uop1_), uop2_(parent.uop2_), dependent_(dependent) {}
|
||||
const char* getName() const {
|
||||
return name_;
|
||||
}
|
||||
|
@ -167,6 +180,27 @@ namespace RandomX {
|
|||
bool isEliminated() const {
|
||||
return uop1_ == ExecutionPort::Null;
|
||||
}
|
||||
bool isDependent() const {
|
||||
return dependent_;
|
||||
}
|
||||
int getCycle() const {
|
||||
return cycle_;
|
||||
}
|
||||
void setCycle(int cycle) {
|
||||
cycle_ = cycle;
|
||||
}
|
||||
MacroOp* getSrcDep() const {
|
||||
return depSrc_;
|
||||
}
|
||||
void setSrcDep(MacroOp* src) {
|
||||
depSrc_ = src;
|
||||
}
|
||||
MacroOp* getDstDep() const {
|
||||
return depDst_;
|
||||
}
|
||||
void setDstDep(MacroOp* dst) {
|
||||
depDst_ = dst;
|
||||
}
|
||||
static const MacroOp Add_rr;
|
||||
static const MacroOp Add_ri;
|
||||
static const MacroOp Lea_sib;
|
||||
|
@ -191,6 +225,10 @@ namespace RandomX {
|
|||
int latency_;
|
||||
ExecutionPort::type uop1_;
|
||||
ExecutionPort::type uop2_;
|
||||
int cycle_;
|
||||
bool dependent_ = false;
|
||||
MacroOp* depDst_ = nullptr;
|
||||
MacroOp* depSrc_ = nullptr;
|
||||
};
|
||||
|
||||
const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015);
|
||||
|
@ -212,49 +250,56 @@ namespace RandomX {
|
|||
const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05);
|
||||
const MacroOp MacroOp::TestJmp_fused = MacroOp("testjmp r,i", 13, 0, ExecutionPort::P5);
|
||||
|
||||
template <typename T, size_t N>
|
||||
T* begin(T(&arr)[N]) { return &arr[0]; }
|
||||
template <typename T, size_t N>
|
||||
T* end(T(&arr)[N]) { return &arr[0] + N; }
|
||||
|
||||
const MacroOp* IMULH_R_ops_array[] = { &MacroOp::Mov_rr, &MacroOp::Mul_r, &MacroOp::Mov_rr };
|
||||
const MacroOp* ISMULH_R_ops_array[] = { &MacroOp::Mov_rr, &MacroOp::Imul_r, &MacroOp::Mov_rr };
|
||||
const MacroOp* IMUL_RCP_ops_array[] = { &MacroOp::Mov_ri64, &MacroOp::Imul_rr };
|
||||
const MacroOp* IROR_R_ops_array[] = { &MacroOp::Mov_rr, &MacroOp::Ror_rcl };
|
||||
const MacroOp* COND_R_ops_array[] = { &MacroOp::Add_ri, &MacroOp::TestJmp_fused, &MacroOp::Xor_self, &MacroOp::Cmp_ri, &MacroOp::Setcc_r, &MacroOp::Add_rr };
|
||||
const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr };
|
||||
const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
|
||||
const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
|
||||
const MacroOp IROR_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Ror_rcl };
|
||||
const MacroOp COND_R_ops_array[] = { MacroOp::Add_ri, MacroOp(MacroOp::TestJmp_fused, true), MacroOp::Xor_self, MacroOp::Cmp_ri, MacroOp(MacroOp::Setcc_r, true), MacroOp(MacroOp::Add_rr, true) };
|
||||
|
||||
|
||||
class LightInstructionInfo {
|
||||
public:
|
||||
LightInstructionInfo(const char* name, const MacroOp* op)
|
||||
: name_(name), op_(op), opsCount_(1), latency_(op->getLatency()) {}
|
||||
LightInstructionInfo(const char* name, int type, const MacroOp& op)
|
||||
: name_(name), type_(type), latency_(op.getLatency()) {
|
||||
ops_.push_back(MacroOp(op));
|
||||
}
|
||||
template <size_t N>
|
||||
LightInstructionInfo(const char* name, const MacroOp*(&arr)[N])
|
||||
: name_(name), ops_(arr), opsCount_(N), latency_(0) {
|
||||
LightInstructionInfo(const char* name, int type, const MacroOp(&arr)[N])
|
||||
: name_(name), type_(type), latency_(0) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
latency_ += arr[i]->getLatency();
|
||||
ops_.push_back(MacroOp(arr[i]));
|
||||
latency_ += ops_.back().getLatency();
|
||||
}
|
||||
static_assert(N > 1, "Invalid array size");
|
||||
}
|
||||
template <size_t N>
|
||||
LightInstructionInfo(const char* name, const MacroOp*(&arr)[N], int latency)
|
||||
: name_(name), ops_(arr), opsCount_(N), latency_(latency) {
|
||||
LightInstructionInfo(const char* name, int type, const MacroOp*(&arr)[N], int latency)
|
||||
: name_(name), type_(type), latency_(latency) {
|
||||
for (unsigned i = 0; i < N; ++i) {
|
||||
ops_.push_back(MacroOp(arr[i]));
|
||||
if (arr[i].isDependent()) {
|
||||
ops_[i].setSrcDep(&ops_[i - 1]);
|
||||
}
|
||||
}
|
||||
static_assert(N > 1, "Invalid array size");
|
||||
}
|
||||
const char* getName() const {
|
||||
return name_;
|
||||
}
|
||||
int getSize() const {
|
||||
return opsCount_;
|
||||
return ops_.size();
|
||||
}
|
||||
bool isSimple() const {
|
||||
return opsCount_ == 1;
|
||||
return getSize() == 1;
|
||||
}
|
||||
int getLatency() const {
|
||||
return latency_;
|
||||
}
|
||||
const MacroOp* getOp(int index) const {
|
||||
return opsCount_ > 1 ? ops_[index] : op_;
|
||||
MacroOp& getOp(int index) {
|
||||
return ops_[index];
|
||||
}
|
||||
int getType() const {
|
||||
return type_;
|
||||
}
|
||||
static const LightInstructionInfo IADD_R;
|
||||
static const LightInstructionInfo IADD_C;
|
||||
|
@ -274,32 +319,29 @@ namespace RandomX {
|
|||
static const LightInstructionInfo NOP;
|
||||
private:
|
||||
const char* name_;
|
||||
union {
|
||||
const MacroOp** ops_;
|
||||
const MacroOp* op_;
|
||||
};
|
||||
int opsCount_;
|
||||
int type_;
|
||||
std::vector<MacroOp> ops_;
|
||||
int latency_;
|
||||
|
||||
LightInstructionInfo(const char* name)
|
||||
: name_(name), opsCount_(0), latency_(0) {}
|
||||
: name_(name), type_(-1), latency_(0) {}
|
||||
};
|
||||
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_R = LightInstructionInfo("IADD_R", &MacroOp::Add_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_C = LightInstructionInfo("IADD_C", &MacroOp::Add_ri);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_RC = LightInstructionInfo("IADD_RC", &MacroOp::Lea_sib);
|
||||
const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", &MacroOp::Sub_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_9C = LightInstructionInfo("IMUL_9C", &MacroOp::Lea_sib);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", &MacroOp::Imul_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", &MacroOp::Imul_rri);
|
||||
const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", IMULH_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", ISMULH_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", IMUL_RCP_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", &MacroOp::Xor_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", &MacroOp::Xor_ri);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", IROR_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", &MacroOp::Ror_ri);
|
||||
const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", COND_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_R = LightInstructionInfo("IADD_R", LightInstructionType::IADD_R, MacroOp::Add_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_C = LightInstructionInfo("IADD_C", LightInstructionType::IADD_C, MacroOp::Add_ri);
|
||||
const LightInstructionInfo LightInstructionInfo::IADD_RC = LightInstructionInfo("IADD_RC", LightInstructionType::IADD_RC, MacroOp::Lea_sib);
|
||||
const LightInstructionInfo LightInstructionInfo::ISUB_R = LightInstructionInfo("ISUB_R", LightInstructionType::ISUB_R, MacroOp::Sub_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_9C = LightInstructionInfo("IMUL_9C", LightInstructionType::IMUL_9C, MacroOp::Lea_sib);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_R = LightInstructionInfo("IMUL_R", LightInstructionType::IMUL_R, MacroOp::Imul_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_C = LightInstructionInfo("IMUL_C", LightInstructionType::IMUL_C, MacroOp::Imul_rri);
|
||||
const LightInstructionInfo LightInstructionInfo::IMULH_R = LightInstructionInfo("IMULH_R", LightInstructionType::IMULH_R, IMULH_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::ISMULH_R = LightInstructionInfo("ISMULH_R", LightInstructionType::ISMULH_R, ISMULH_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IMUL_RCP = LightInstructionInfo("IMUL_RCP", LightInstructionType::IMUL_RCP, IMUL_RCP_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_R = LightInstructionInfo("IXOR_R", LightInstructionType::IXOR_R, MacroOp::Xor_rr);
|
||||
const LightInstructionInfo LightInstructionInfo::IXOR_C = LightInstructionInfo("IXOR_C", LightInstructionType::IXOR_C, MacroOp::Xor_ri);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_R = LightInstructionInfo("IROR_R", LightInstructionType::IROR_R, IROR_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::IROR_C = LightInstructionInfo("IROR_C", LightInstructionType::IROR_C, MacroOp::Ror_ri);
|
||||
const LightInstructionInfo LightInstructionInfo::COND_R = LightInstructionInfo("COND_R", LightInstructionType::COND_R, COND_R_ops_array);
|
||||
const LightInstructionInfo LightInstructionInfo::NOP = LightInstructionInfo("NOP");
|
||||
|
||||
const int buffer0[] = { 3, 3, 10 };
|
||||
|
@ -375,20 +417,37 @@ namespace RandomX {
|
|||
|
||||
DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
|
||||
|
||||
const int slot_3[] = { LightInstructionType::IADD_R, LightInstructionType::ISUB_R, LightInstructionType::IXOR_R, LightInstructionType::IADD_R };
|
||||
const int slot_3L[] = { LightInstructionType::IADD_R, LightInstructionType::ISUB_R, LightInstructionType::IXOR_R, LightInstructionType::IMULH_R, LightInstructionType::ISMULH_R, LightInstructionType::IXOR_R, LightInstructionType::IMULH_R, LightInstructionType::ISMULH_R };
|
||||
const int slot_3F[] = { LightInstructionType::IADD_R, LightInstructionType::ISUB_R, LightInstructionType::IXOR_R, LightInstructionType::IROR_R };
|
||||
const int slot_4[] = { LightInstructionType::IMUL_R, LightInstructionType::IROR_C };
|
||||
const int slot_7[] = { LightInstructionType::IADD_C, LightInstructionType::IMUL_C, LightInstructionType::IXOR_C, LightInstructionType::IXOR_C };
|
||||
const int slot_7L = LightInstructionType::COND_R;
|
||||
const int slot_8[] = { LightInstructionType::IADD_RC, LightInstructionType::IMUL_9C };
|
||||
const int slot_10 = LightInstructionType::IMUL_RCP;
|
||||
const LightInstructionInfo* slot_3[] = { &LightInstructionInfo::IADD_R, &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IADD_R };
|
||||
const LightInstructionInfo* slot_3L[] = { &LightInstructionInfo::IADD_R, &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IMULH_R, &LightInstructionInfo::ISMULH_R };
|
||||
const LightInstructionInfo* slot_3F[] = { &LightInstructionInfo::IADD_R, &LightInstructionInfo::ISUB_R, &LightInstructionInfo::IXOR_R, &LightInstructionInfo::IROR_R };
|
||||
const LightInstructionInfo* slot_4[] = { &LightInstructionInfo::IMUL_R, &LightInstructionInfo::IROR_C };
|
||||
const LightInstructionInfo* slot_7[] = { &LightInstructionInfo::IADD_C, &LightInstructionInfo::IMUL_C, &LightInstructionInfo::IXOR_C, &LightInstructionInfo::IXOR_C };
|
||||
const LightInstructionInfo* slot_7L = &LightInstructionInfo::COND_R;
|
||||
const LightInstructionInfo* slot_8[] = { &LightInstructionInfo::IADD_RC, &LightInstructionInfo::IMUL_9C };
|
||||
const LightInstructionInfo* slot_10 = &LightInstructionInfo::IMUL_RCP;
|
||||
|
||||
template<bool erase>
|
||||
static int selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen) {
|
||||
if (availableRegisters.size() == 0)
|
||||
throw std::runtime_error("No avialable registers");
|
||||
int index;
|
||||
if (availableRegisters.size() > 1) {
|
||||
index = gen.getInt32() % availableRegisters.size();
|
||||
}
|
||||
else {
|
||||
index = 0;
|
||||
}
|
||||
int select = availableRegisters[index];
|
||||
if (erase)
|
||||
availableRegisters.erase(availableRegisters.begin() + index);
|
||||
return select;
|
||||
}
|
||||
|
||||
class LightInstruction {
|
||||
public:
|
||||
Instruction toInstr() {
|
||||
Instruction instr;
|
||||
instr.opcode = lightInstructionOpcode[type_];
|
||||
instr.opcode = lightInstructionOpcode[getType()];
|
||||
instr.dst = dst_;
|
||||
instr.src = src_ >= 0 ? src_ : dst_;
|
||||
instr.mod = mod_;
|
||||
|
@ -396,42 +455,40 @@ namespace RandomX {
|
|||
return instr;
|
||||
}
|
||||
|
||||
static LightInstruction createForSlot(Blake2Generator& gen, int slotSize, bool isLast = false, bool isFirst = false) {
|
||||
static LightInstruction createForSlot(Blake2Generator& gen, int slotSize, std::vector<int>& availableRegisters, bool isLast = false, bool isFirst = false) {
|
||||
switch (slotSize)
|
||||
{
|
||||
case 3:
|
||||
if (isLast) {
|
||||
return create(slot_3L[gen.getByte() & 7], gen);
|
||||
return create(slot_3L[gen.getByte() & 7], availableRegisters, gen);
|
||||
}
|
||||
else if (isFirst) {
|
||||
return create(slot_3F[gen.getByte() & 3], gen);
|
||||
return create(slot_3F[gen.getByte() & 3], availableRegisters, gen);
|
||||
}
|
||||
else {
|
||||
return create(slot_3[gen.getByte() & 3], gen);
|
||||
return create(slot_3[gen.getByte() & 3], availableRegisters, gen);
|
||||
}
|
||||
case 4:
|
||||
return create(slot_4[gen.getByte() & 1], gen);
|
||||
return create(slot_4[gen.getByte() & 1], availableRegisters, gen);
|
||||
case 7:
|
||||
if (isLast) {
|
||||
return create(slot_7L, gen);
|
||||
return create(slot_7L, availableRegisters, gen);
|
||||
}
|
||||
else {
|
||||
return create(slot_7[gen.getByte() & 3], gen);
|
||||
return create(slot_7[gen.getByte() & 3], availableRegisters, gen);
|
||||
}
|
||||
case 8:
|
||||
return create(slot_8[gen.getByte() & 1], gen);
|
||||
return create(slot_8[gen.getByte() & 1], availableRegisters, gen);
|
||||
case 10:
|
||||
return create(slot_10, gen);
|
||||
return create(slot_10, availableRegisters, gen);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static LightInstruction create(int type, Blake2Generator& gen) {
|
||||
LightInstruction li;
|
||||
li.type_ = type;
|
||||
li.opGroup_ = type;
|
||||
switch (type)
|
||||
static LightInstruction create(const LightInstructionInfo* info, std::vector<int>& availableRegisters, Blake2Generator& gen) {
|
||||
LightInstruction li(info);
|
||||
switch (info->getType())
|
||||
{
|
||||
case LightInstructionType::IADD_R: {
|
||||
li.dst_ = gen.getByte() & 7;
|
||||
|
@ -440,7 +497,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::IADD_R;
|
||||
li.opGroup_ = LightInstructionType::IADD_R;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -450,7 +506,6 @@ namespace RandomX {
|
|||
li.src_ = -1;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::IADD_C;
|
||||
li.opGroup_ = LightInstructionType::IADD_R;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -462,7 +517,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::IADD_RC;
|
||||
li.opGroup_ = LightInstructionType::IADD_R;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -474,7 +528,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::ISUB_R;
|
||||
li.opGroup_ = LightInstructionType::IADD_R;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -486,7 +539,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::IMUL_9C;
|
||||
li.opGroup_ = LightInstructionType::IMUL_C;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
@ -498,7 +550,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::IMUL_R;
|
||||
li.opGroup_ = LightInstructionType::IMUL_R;
|
||||
li.opGroupPar_ = gen.getInt32();
|
||||
} break;
|
||||
|
@ -508,7 +559,6 @@ namespace RandomX {
|
|||
li.src_ = -1;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::IMUL_C;
|
||||
li.opGroup_ = LightInstructionType::IMUL_C;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -518,7 +568,6 @@ namespace RandomX {
|
|||
li.src_ = gen.getByte() & 7;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::IMULH_R;
|
||||
li.opGroup_ = LightInstructionType::IMULH_R;
|
||||
li.opGroupPar_ = gen.getInt32();
|
||||
} break;
|
||||
|
@ -528,7 +577,6 @@ namespace RandomX {
|
|||
li.src_ = gen.getByte() & 7;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::ISMULH_R;
|
||||
li.opGroup_ = LightInstructionType::ISMULH_R;
|
||||
li.opGroupPar_ = gen.getInt32();
|
||||
} break;
|
||||
|
@ -538,7 +586,6 @@ namespace RandomX {
|
|||
li.src_ = -1;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::IMUL_RCP;
|
||||
li.opGroup_ = LightInstructionType::IMUL_C;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
@ -550,7 +597,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::IXOR_R;
|
||||
li.opGroup_ = LightInstructionType::IXOR_R;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -560,7 +606,6 @@ namespace RandomX {
|
|||
li.src_ = -1;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::IXOR_C;
|
||||
li.opGroup_ = LightInstructionType::IXOR_R;
|
||||
li.opGroupPar_ = li.src_;
|
||||
} break;
|
||||
|
@ -572,7 +617,6 @@ namespace RandomX {
|
|||
} while (li.dst_ == li.src_);
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = 0;
|
||||
li.info_ = &LightInstructionInfo::IROR_R;
|
||||
li.opGroup_ = LightInstructionType::IROR_R;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
@ -582,7 +626,6 @@ namespace RandomX {
|
|||
li.src_ = -1;
|
||||
li.mod_ = 0;
|
||||
li.imm32_ = gen.getByte();
|
||||
li.info_ = &LightInstructionInfo::IROR_C;
|
||||
li.opGroup_ = LightInstructionType::IROR_R;
|
||||
li.opGroupPar_ = -1;
|
||||
} break;
|
||||
|
@ -592,7 +635,6 @@ namespace RandomX {
|
|||
li.src_ = gen.getByte() & 7;
|
||||
li.mod_ = gen.getByte();
|
||||
li.imm32_ = gen.getInt32();
|
||||
li.info_ = &LightInstructionInfo::COND_R;
|
||||
li.opGroup_ = LightInstructionType::COND_R;
|
||||
li.opGroupPar_ = li.imm32_;
|
||||
} break;
|
||||
|
@ -605,7 +647,7 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
int getType() {
|
||||
return type_;
|
||||
return info_.getType();
|
||||
}
|
||||
int getSource() {
|
||||
return src_;
|
||||
|
@ -620,37 +662,32 @@ namespace RandomX {
|
|||
return opGroupPar_;
|
||||
}
|
||||
|
||||
const LightInstructionInfo* getInfo() {
|
||||
LightInstructionInfo& getInfo() {
|
||||
return info_;
|
||||
}
|
||||
|
||||
static const LightInstruction Null;
|
||||
|
||||
private:
|
||||
int type_;
|
||||
LightInstructionInfo info_;
|
||||
int src_;
|
||||
int dst_;
|
||||
int mod_;
|
||||
uint32_t imm32_;
|
||||
|
||||
const LightInstructionInfo* info_;
|
||||
int opGroup_;
|
||||
int opGroupPar_;
|
||||
|
||||
LightInstruction() {}
|
||||
LightInstruction(int type, const LightInstructionInfo* info) : type_(type), info_(info) {}
|
||||
LightInstruction(const LightInstructionInfo* info) : info_(*info) {
|
||||
for (unsigned i = 0; i < info_.getSize(); ++i) {
|
||||
MacroOp& mop = info_.getOp(i);
|
||||
if (mop.isDependent()) {
|
||||
mop.setSrcDep(&info_.getOp(i - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class RegisterInfo {
|
||||
public:
|
||||
RegisterInfo() : lastOpGroup(-1), source(-1), value(0), latency(0) {}
|
||||
int lastOpGroup;
|
||||
int source;
|
||||
int value;
|
||||
int latency;
|
||||
};
|
||||
|
||||
const LightInstruction LightInstruction::Null = LightInstruction(-1, &LightInstructionInfo::NOP);
|
||||
const LightInstruction LightInstruction::Null = LightInstruction(&LightInstructionInfo::NOP);
|
||||
|
||||
constexpr int ALU_COUNT_MUL = 1;
|
||||
constexpr int ALU_COUNT = 4;
|
||||
|
@ -660,6 +697,73 @@ namespace RandomX {
|
|||
|
||||
static int blakeCounter = 0;
|
||||
|
||||
static int scheduleUop(const MacroOp& mop, ExecutionPort::type(&portBusy)[RANDOMX_LPROG_LATENCY + 1][3], int cycle, int depCycle) {
|
||||
if (mop.isDependent()) {
|
||||
cycle = std::max(cycle, depCycle);
|
||||
}
|
||||
if (mop.isEliminated()) {
|
||||
std::cout << "; (eliminated)" << std::endl;
|
||||
return cycle;
|
||||
}
|
||||
else if (mop.isSimple()) {
|
||||
if (mop.getUop1() <= ExecutionPort::P5) {
|
||||
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
|
||||
if (!portBusy[cycle][mop.getUop1() - 1]) {
|
||||
std::cout << "; P" << mop.getUop1() - 1 << " at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][mop.getUop1() - 1] = mop.getUop1();
|
||||
return cycle;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (mop.getUop1() == ExecutionPort::P05) {
|
||||
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
|
||||
if (!portBusy[cycle][0]) {
|
||||
std::cout << "; P0 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][0] = mop.getUop1();
|
||||
return cycle;
|
||||
}
|
||||
if (!portBusy[cycle][2]) {
|
||||
std::cout << "; P2 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][2] = mop.getUop1();
|
||||
return cycle;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
|
||||
if (!portBusy[cycle][0]) {
|
||||
std::cout << "; P0 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][0] = mop.getUop1();
|
||||
return cycle;
|
||||
}
|
||||
if (!portBusy[cycle][2]) {
|
||||
std::cout << "; P2 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][2] = mop.getUop1();
|
||||
return cycle;
|
||||
}
|
||||
if (!portBusy[cycle][1]) {
|
||||
std::cout << "; P1 at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][1] = mop.getUop1();
|
||||
return cycle;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (; cycle <= RANDOMX_LPROG_LATENCY; ++cycle) {
|
||||
if (!portBusy[cycle][mop.getUop1() - 1] && !portBusy[cycle][mop.getUop2() - 1]) {
|
||||
std::cout << "; P" << mop.getUop1() - 1 << " P" << mop.getUop2() - 1 << " at cycle " << cycle << std::endl;
|
||||
portBusy[cycle][mop.getUop1() - 1] = mop.getUop1();
|
||||
portBusy[cycle][mop.getUop2() - 1] = mop.getUop2();
|
||||
return cycle;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Unable to map operation '" << mop.getName() << "' to execution port";
|
||||
return -1;
|
||||
}
|
||||
|
||||
// If we don't have enough data available, generate more
|
||||
static FORCE_INLINE void check_data(size_t& data_index, const size_t bytes_needed, uint8_t* data, const size_t data_size)
|
||||
{
|
||||
|
@ -673,11 +777,12 @@ namespace RandomX {
|
|||
|
||||
void generateLightProg2(LightProgram& prog, const void* seed, int indexRegister) {
|
||||
|
||||
bool portBusy[RANDOMX_LPROG_LATENCY][3];
|
||||
ExecutionPort::type portBusy[RANDOMX_LPROG_LATENCY + 1][3];
|
||||
memset(portBusy, 0, sizeof(portBusy));
|
||||
RegisterInfo registers[8];
|
||||
bool decoderBusy[RANDOMX_LPROG_LATENCY][4];
|
||||
Blake2Generator gen(seed);
|
||||
std::vector<LightInstruction> instructions;
|
||||
std::vector<int> availableRegisters;
|
||||
|
||||
DecoderBuffer& fetchLine = DecoderBuffer::Default;
|
||||
LightInstruction currentInstruction = LightInstruction::Null;
|
||||
|
@ -685,35 +790,78 @@ namespace RandomX {
|
|||
int codeSize = 0;
|
||||
int macroOpCount = 0;
|
||||
int rxOpCount = 0;
|
||||
|
||||
for (int cycle = 0; cycle < 170; ++cycle) {
|
||||
fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen);
|
||||
std::cout << "; cycle " << cycle << " buffer " << fetchLine.getName() << std::endl;
|
||||
|
||||
int cycle = 0;
|
||||
int depCycle = 0;
|
||||
int mopIndex = 0;
|
||||
bool portsSaturated = false;
|
||||
|
||||
while (mopIndex < fetchLine.getSize()) {
|
||||
if (instrIndex >= currentInstruction.getInfo()->getSize()) {
|
||||
currentInstruction = LightInstruction::createForSlot(gen, fetchLine.getCounts()[mopIndex], fetchLine.getSize() == mopIndex + 1, fetchLine.getIndex() == 0 && mopIndex == 0);
|
||||
while(!portsSaturated) {
|
||||
fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen);
|
||||
std::cout << "; ------------- fetch cycle " << cycle << " (" << fetchLine.getName() << ")" << std::endl;
|
||||
|
||||
availableRegisters.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (registers[i].latency <= cycle)
|
||||
availableRegisters.push_back(i);
|
||||
}
|
||||
|
||||
mopIndex = 0;
|
||||
|
||||
while (!portsSaturated && mopIndex < fetchLine.getSize()) {
|
||||
if (instrIndex >= currentInstruction.getInfo().getSize()) {
|
||||
currentInstruction = LightInstruction::createForSlot(gen, fetchLine.getCounts()[mopIndex], availableRegisters, fetchLine.getSize() == mopIndex + 1, fetchLine.getIndex() == 0 && mopIndex == 0);
|
||||
instrIndex = 0;
|
||||
std::cout << "; " << currentInstruction.getInfo()->getName() << std::endl;
|
||||
std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
|
||||
rxOpCount++;
|
||||
}
|
||||
if (fetchLine.getCounts()[mopIndex] != currentInstruction.getInfo()->getOp(instrIndex)->getSize()) {
|
||||
std::cout << "ERROR instruction " << currentInstruction.getInfo()->getOp(instrIndex)->getName() << " doesn't fit into slot of size " << fetchLine.getCounts()[mopIndex] << std::endl;
|
||||
MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex);
|
||||
if (fetchLine.getCounts()[mopIndex] != mop.getSize()) {
|
||||
std::cout << "ERROR instruction " << mop.getName() << " doesn't fit into slot of size " << fetchLine.getCounts()[mopIndex] << std::endl;
|
||||
return;
|
||||
}
|
||||
std::cout << currentInstruction.getInfo()->getOp(instrIndex)->getName() << std::endl;
|
||||
codeSize += currentInstruction.getInfo()->getOp(instrIndex)->getSize();
|
||||
|
||||
std::cout << mop.getName() << " ";
|
||||
codeSize += mop.getSize();
|
||||
mopIndex++;
|
||||
instrIndex++;
|
||||
macroOpCount++;
|
||||
int scheduleCycle = scheduleUop(mop, portBusy, cycle, depCycle);
|
||||
if (scheduleCycle >= RANDOMX_LPROG_LATENCY) {
|
||||
portsSaturated = true;
|
||||
}
|
||||
mop.setCycle(scheduleCycle);
|
||||
depCycle = scheduleCycle + mop.getLatency();
|
||||
}
|
||||
++cycle;
|
||||
}
|
||||
|
||||
while (instrIndex < currentInstruction.getInfo().getSize()) {
|
||||
if (mopIndex >= fetchLine.getSize()) {
|
||||
fetchLine = fetchLine.fetchNext(currentInstruction.getType(), gen);
|
||||
std::cout << "; cycle " << cycle++ << " buffer " << fetchLine.getName() << std::endl;
|
||||
mopIndex = 0;
|
||||
}
|
||||
MacroOp& mop = currentInstruction.getInfo().getOp(instrIndex);
|
||||
std::cout << mop.getName() << " ";
|
||||
codeSize += mop.getSize();
|
||||
mopIndex++;
|
||||
instrIndex++;
|
||||
macroOpCount++;
|
||||
int scheduleCycle = scheduleUop(mop, portBusy, cycle, depCycle);
|
||||
mop.setCycle(scheduleCycle);
|
||||
depCycle = scheduleCycle + mop.getLatency();
|
||||
}
|
||||
|
||||
std::cout << "; code size " << codeSize << std::endl;
|
||||
std::cout << "; x86 macro-ops: " << macroOpCount << std::endl;
|
||||
std::cout << "; RandomX instructions: " << rxOpCount << std::endl;
|
||||
|
||||
for (int i = 0; i < RANDOMX_LPROG_LATENCY + 1; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
std::cout << (portBusy[i][j] ? '*' : '_');
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void generateLightProgram(LightProgram& prog, const void* seed, int indexRegister) {
|
||||
|
|
Loading…
Reference in a new issue