Fix header dependency of superscalar_program.hpp

Fix tests
Fix a typo in design.md
This commit is contained in:
tevador 2019-06-24 13:58:41 +02:00
parent e6ba0a1b7d
commit 1f62d787ad
5 changed files with 290 additions and 266 deletions

View File

@ -426,7 +426,7 @@ The following 10 designs were simulated and the average number of clock cycles t
|-------|-----------|----------|---------------|-----------------------|---| |-------|-----------|----------|---------------|-----------------------|---|
|#1|1 EXU + 1 MEM|in-order|non-speculative|293|0.87| |#1|1 EXU + 1 MEM|in-order|non-speculative|293|0.87|
|#2|1 EXU + 1 MEM|in-order|speculative|262|0.98| |#2|1 EXU + 1 MEM|in-order|speculative|262|0.98|
|#3|1 EXU + 1 MEM|in-order|non-speculative|197|1.3| |#3|2 EXU + 1 MEM|in-order|non-speculative|197|1.3|
|#4|2 EXU + 1 MEM|in-order|speculative|161|1.6| |#4|2 EXU + 1 MEM|in-order|speculative|161|1.6|
|#5|2 EXU + 1 MEM|out-of-order|non-speculative|144|1.8| |#5|2 EXU + 1 MEM|out-of-order|non-speculative|144|1.8|
|#6|2 EXU + 1 MEM|out-of-order|speculative|122|2.1| |#6|2 EXU + 1 MEM|out-of-order|speculative|122|2.1|

View File

@ -51,6 +51,7 @@ namespace randomx {
ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov)
IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul) IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul)
COUNT = 14,
INVALID = -1 INVALID = -1
}; };

View File

@ -30,7 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstdint> #include <cstdint>
#include "instruction.hpp" #include "instruction.hpp"
#include "configuration.h" #include "common.hpp"
namespace randomx { namespace randomx {

View File

@ -321,7 +321,7 @@ int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool prin
return cycle; return cycle;
} }
#include "../instruction_weights.hpp" #include "../bytecode_machine.hpp"
//old register selection //old register selection
struct RegisterUsage { struct RegisterUsage {
@ -355,284 +355,307 @@ int analyze(randomx::Program& p) {
auto& instr = p(i); auto& instr = p(i);
int opcode = instr.opcode; int opcode = instr.opcode;
instr.opcode = 0; instr.opcode = 0;
switch (opcode) {
CASE_REP(IADD_RS) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_INT;
instr.opcode |= DST_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IADD_M) { if (opcode < randomx::ceil_IADD_RS) {
instr.dst = instr.dst % randomx::RegistersCount; instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM; instr.opcode |= SRC_INT;
instr.opcode |= DST_INT; instr.opcode |= DST_INT;
if (instr.src != instr.dst) { registerUsage[instr.dst].lastUsed = i;
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); continue;
} }
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(ISUB_R) { if (opcode < randomx::ceil_IADD_M) {
instr.dst = instr.dst % randomx::RegistersCount; instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT; instr.opcode |= SRC_MEM;
instr.opcode |= SRC_INT; instr.opcode |= DST_INT;
registerUsage[instr.dst].lastUsed = i; if (instr.src != instr.dst) {
} break;
CASE_REP(ISUB_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IMUL_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IMUL_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IMULH_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IMULH_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(ISMULH_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(ISMULH_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!randomx::isPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;
registerUsage[instr.dst].lastUsed = i;
}
} break;
CASE_REP(INEG_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IXOR_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IXOR_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IROR_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(IROL_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
} break;
CASE_REP(ISWAP_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
if (instr.src != instr.dst) {
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
instr.opcode |= OP_SWAP;
registerUsage[instr.dst].lastUsed = i;
registerUsage[instr.src].lastUsed = i;
}
} break;
CASE_REP(FSWAP_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_FLT;
} break;
CASE_REP(FADD_R) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.opcode |= DST_FLT;
instr.opcode |= OP_FLOAT;
} break;
CASE_REP(FADD_M) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_FLT;
instr.opcode |= SRC_MEM;
instr.opcode |= OP_FLOAT;
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
} break; }
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(FSUB_R) { if (opcode < randomx::ceil_ISUB_R) {
instr.dst = instr.dst % randomx::RegisterCountFlt; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_FLT; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= OP_FLOAT; instr.opcode |= DST_INT;
} break; instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(FSUB_M) { if (opcode < randomx::ceil_ISUB_M) {
instr.dst = instr.dst % randomx::RegisterCountFlt; instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_FLT; instr.opcode |= SRC_MEM;
instr.opcode |= SRC_MEM; instr.opcode |= DST_INT;
instr.opcode |= OP_FLOAT; if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
} break; }
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(FSCAL_R) { if (opcode < randomx::ceil_IMUL_R) {
instr.dst = instr.dst % randomx::RegisterCountFlt; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_FLT; instr.src = instr.src % randomx::RegistersCount;
} break; instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(FMUL_R) { if (opcode < randomx::ceil_IMUL_M) {
instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_FLT; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= OP_FLOAT; instr.opcode |= SRC_MEM;
} break; instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
CASE_REP(FDIV_M) {
instr.dst = 4 + instr.dst % randomx::RegisterCountFlt;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_FLT;
instr.opcode |= SRC_MEM;
instr.opcode |= OP_FLOAT;
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
} break; }
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(FSQRT_R) { if (opcode < randomx::ceil_IMULH_R) {
instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_FLT; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= OP_FLOAT; instr.opcode |= DST_INT;
} break; instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(CBRANCH) { if (opcode < randomx::ceil_IMULH_M) {
instr.opcode |= OP_BRANCH; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT; instr.src = instr.src % randomx::RegistersCount;
//jump condition instr.opcode |= SRC_MEM;
//int reg = getConditionRegister(registerUsage); instr.opcode |= DST_INT;
int reg = instr.dst % randomx::RegistersCount; if (instr.src != instr.dst) {
int target = registerUsage[reg].lastUsed; instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
int offset = (i - target); }
instr.mod = offset; else {
jumpCount += offset; instr.imm32 &= randomx::ScratchpadL3Mask;
p(target + 1).opcode |= BRANCH_TARGET; }
registerUsage[reg].count++; registerUsage[instr.dst].lastUsed = i;
instr.dst = reg; continue;
//mark all registers as used }
for (unsigned j = 0; j < randomx::RegistersCount; ++j) {
registerUsage[j].lastUsed = i;
}
} break;
CASE_REP(CFROUND) { if (opcode < randomx::ceil_ISMULH_R) {
instr.src = instr.src % randomx::RegistersCount; instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= SRC_INT; instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= OP_CFROUND; instr.opcode |= DST_INT;
} break; instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
CASE_REP(ISTORE) { if (opcode < randomx::ceil_ISMULH_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
continue;
}
if (opcode < randomx::ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!randomx::isPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount; instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount; instr.opcode |= DST_INT;
instr.opcode |= DST_MEM; registerUsage[instr.dst].lastUsed = i;
if (instr.getModCond() < randomx::StoreL3Condition) }
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); continue;
else }
instr.imm32 &= randomx::ScratchpadL3Mask;
} break;
CASE_REP(NOP) { if (opcode < randomx::ceil_INEG_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
} break; if (opcode < randomx::ceil_IXOR_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
if (opcode < randomx::ceil_IXOR_M) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_MEM;
instr.opcode |= DST_INT;
if (instr.src != instr.dst) {
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
}
else {
instr.imm32 &= randomx::ScratchpadL3Mask;
}
registerUsage[instr.dst].lastUsed = i;
continue;
}
if (opcode < randomx::ceil_IROR_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
if (opcode < randomx::ceil_IROL_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
registerUsage[instr.dst].lastUsed = i;
continue;
}
if (opcode < randomx::ceil_ISWAP_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
if (instr.src != instr.dst) {
instr.opcode |= DST_INT;
instr.opcode |= SRC_INT;
instr.opcode |= OP_SWAP;
registerUsage[instr.dst].lastUsed = i;
registerUsage[instr.src].lastUsed = i;
}
continue;
}
if (opcode < randomx::ceil_FSWAP_R) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_FLT;
continue;
}
if (opcode < randomx::ceil_FADD_R) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.opcode |= DST_FLT;
instr.opcode |= OP_FLOAT;
continue;
}
if (opcode < randomx::ceil_FADD_M) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_FLT;
instr.opcode |= SRC_MEM;
instr.opcode |= OP_FLOAT;
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
continue;
}
if (opcode < randomx::ceil_FSUB_R) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.opcode |= DST_FLT;
instr.opcode |= OP_FLOAT;
continue;
}
if (opcode < randomx::ceil_FSUB_M) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_FLT;
instr.opcode |= SRC_MEM;
instr.opcode |= OP_FLOAT;
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
continue;
}
if (opcode < randomx::ceil_FSCAL_R) {
instr.dst = instr.dst % randomx::RegisterCountFlt;
instr.opcode |= DST_FLT;
continue;
}
if (opcode < randomx::ceil_FMUL_R) {
instr.dst = 4 + instr.dst % randomx::RegisterCountFlt;
instr.opcode |= DST_FLT;
instr.opcode |= OP_FLOAT;
continue;
}
if (opcode < randomx::ceil_FDIV_M) {
instr.dst = 4 + instr.dst % randomx::RegisterCountFlt;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_FLT;
instr.opcode |= SRC_MEM;
instr.opcode |= OP_FLOAT;
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
continue;
}
if (opcode < randomx::ceil_FSQRT_R) {
instr.dst = 4 + instr.dst % randomx::RegisterCountFlt;
instr.opcode |= DST_FLT;
instr.opcode |= OP_FLOAT;
continue;
}
if (opcode < randomx::ceil_CBRANCH) {
instr.opcode |= OP_BRANCH;
instr.opcode |= DST_INT;
int reg = instr.dst % randomx::RegistersCount;
int target = registerUsage[reg].lastUsed;
int offset = (i - target);
instr.mod = offset;
jumpCount += offset;
p(target + 1).opcode |= BRANCH_TARGET;
registerUsage[reg].count++;
instr.dst = reg;
//mark all registers as used
for (unsigned j = 0; j < randomx::RegistersCount; ++j) {
registerUsage[j].lastUsed = i;
}
continue;
}
if (opcode < randomx::ceil_CFROUND) {
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_INT;
instr.opcode |= OP_CFROUND;
continue;
}
if (opcode < randomx::ceil_ISTORE) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= DST_MEM;
if (instr.getModCond() < randomx::StoreL3Condition)
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);
else
instr.imm32 &= randomx::ScratchpadL3Mask;
continue;
}
if (opcode < randomx::ceil_NOP) {
default:
UNREACHABLE;
} }
} }
return jumpCount; return jumpCount;

View File

@ -8,7 +8,7 @@ const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191,
int main() { int main() {
constexpr int count = 1000000; constexpr int count = 1000000;
int isnCounts[randomx::SuperscalarInstructionType::COUNT] = { 0 }; int isnCounts[(int)randomx::SuperscalarInstructionType::COUNT] = { 0 };
int64_t asicLatency = 0; int64_t asicLatency = 0;
int64_t codesize = 0; int64_t codesize = 0;
int64_t cpuLatency = 0; int64_t cpuLatency = 0;
@ -44,7 +44,7 @@ int main() {
std::cout << "Avg. RandomX ops: " << (size / (double)count) << std::endl; std::cout << "Avg. RandomX ops: " << (size / (double)count) << std::endl;
std::cout << "Frequencies: " << std::endl; std::cout << "Frequencies: " << std::endl;
for (unsigned j = 0; j < randomx::SuperscalarInstructionType::COUNT; ++j) { for (unsigned j = 0; j < (int)randomx::SuperscalarInstructionType::COUNT; ++j) {
std::cout << j << " " << isnCounts[j] << " " << isnCounts[j] / (double)size << std::endl; std::cout << j << " " << isnCounts[j] << " " << isnCounts[j] / (double)size << std::endl;
} }