From 1f62d787ad762abe60b1945d298081fb16e29b2b Mon Sep 17 00:00:00 2001 From: tevador Date: Mon, 24 Jun 2019 13:58:41 +0200 Subject: [PATCH] Fix header dependency of superscalar_program.hpp Fix tests Fix a typo in design.md --- doc/design.md | 2 +- src/superscalar.hpp | 1 + src/superscalar_program.hpp | 2 +- src/tests/perf-simulation.cpp | 547 +++++++++++++++++--------------- src/tests/superscalar-stats.cpp | 4 +- 5 files changed, 290 insertions(+), 266 deletions(-) diff --git a/doc/design.md b/doc/design.md index 16dc895..1a77458 100644 --- a/doc/design.md +++ b/doc/design.md @@ -426,7 +426,7 @@ The following 10 designs were simulated and the average number of clock cycles t |-------|-----------|----------|---------------|-----------------------|---| |#1|1 EXU + 1 MEM|in-order|non-speculative|293|0.87| |#2|1 EXU + 1 MEM|in-order|speculative|262|0.98| -|#3|1 EXU + 1 MEM|in-order|non-speculative|197|1.3| +|#3|2 EXU + 1 MEM|in-order|non-speculative|197|1.3| |#4|2 EXU + 1 MEM|in-order|speculative|161|1.6| |#5|2 EXU + 1 MEM|out-of-order|non-speculative|144|1.8| |#6|2 EXU + 1 MEM|out-of-order|speculative|122|2.1| diff --git a/src/superscalar.hpp b/src/superscalar.hpp index 2e55533..bc101c4 100644 --- a/src/superscalar.hpp +++ b/src/superscalar.hpp @@ -51,6 +51,7 @@ namespace randomx { ISMULH_R = 12, //1+2+1 0+(p1,p5)+0 3 3+3+3 (mov+imul+mov) IMUL_RCP = 13, //1+1 p015+p1 4 10+4 (mov+imul) + COUNT = 14, INVALID = -1 }; diff --git a/src/superscalar_program.hpp b/src/superscalar_program.hpp index bff586c..38c2ae4 100644 --- a/src/superscalar_program.hpp +++ b/src/superscalar_program.hpp @@ -30,7 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "instruction.hpp" -#include "configuration.h" +#include "common.hpp" namespace randomx { diff --git a/src/tests/perf-simulation.cpp b/src/tests/perf-simulation.cpp index dfac0ff..1f9b3f2 100644 --- a/src/tests/perf-simulation.cpp +++ b/src/tests/perf-simulation.cpp @@ -321,7 +321,7 @@ int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool prin return cycle; } -#include "../instruction_weights.hpp" +#include "../bytecode_machine.hpp" //old register selection struct RegisterUsage { @@ -355,284 +355,307 @@ int analyze(randomx::Program& p) { auto& instr = p(i); int opcode = instr.opcode; instr.opcode = 0; - switch (opcode) { - CASE_REP(IADD_RS) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_INT; - instr.opcode |= DST_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - CASE_REP(IADD_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - } break; + if (opcode < randomx::ceil_IADD_RS) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(ISUB_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(ISUB_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IMUL_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IMUL_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IMULH_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IMULH_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(ISMULH_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(ISMULH_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IMUL_RCP) { - uint64_t divisor = instr.getImm32(); - if (!randomx::isPowerOf2(divisor)) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.opcode |= DST_INT; - registerUsage[instr.dst].lastUsed = i; - } - } break; - - CASE_REP(INEG_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.opcode |= DST_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IXOR_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IXOR_M) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_MEM; - instr.opcode |= DST_INT; - if (instr.src != instr.dst) { - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } - else { - instr.imm32 &= randomx::ScratchpadL3Mask; - } - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IROR_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(IROL_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - registerUsage[instr.dst].lastUsed = i; - } break; - - CASE_REP(ISWAP_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - if (instr.src != instr.dst) { - instr.opcode |= DST_INT; - instr.opcode |= SRC_INT; - instr.opcode |= OP_SWAP; - registerUsage[instr.dst].lastUsed = i; - registerUsage[instr.src].lastUsed = i; - } - } break; - - CASE_REP(FSWAP_R) { - instr.dst = instr.dst % randomx::RegistersCount; - instr.opcode |= DST_FLT; - } break; - - CASE_REP(FADD_R) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - } break; - - CASE_REP(FADD_M) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_FLT; - instr.opcode |= SRC_MEM; - instr.opcode |= OP_FLOAT; + if (opcode < randomx::ceil_IADD_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } break; + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(FSUB_R) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - } break; + if (opcode < randomx::ceil_ISUB_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(FSUB_M) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_FLT; - instr.opcode |= SRC_MEM; - instr.opcode |= OP_FLOAT; + if (opcode < randomx::ceil_ISUB_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } break; + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(FSCAL_R) { - instr.dst = instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - } break; + if (opcode < randomx::ceil_IMUL_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(FMUL_R) { - instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - } break; - - CASE_REP(FDIV_M) { - instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_FLT; - instr.opcode |= SRC_MEM; - instr.opcode |= OP_FLOAT; + if (opcode < randomx::ceil_IMUL_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - } break; + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(FSQRT_R) { - instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; - instr.opcode |= DST_FLT; - instr.opcode |= OP_FLOAT; - } break; + if (opcode < randomx::ceil_IMULH_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(CBRANCH) { - instr.opcode |= OP_BRANCH; - instr.opcode |= DST_INT; - //jump condition - //int reg = getConditionRegister(registerUsage); - int reg = instr.dst % randomx::RegistersCount; - int target = registerUsage[reg].lastUsed; - int offset = (i - target); - instr.mod = offset; - jumpCount += offset; - p(target + 1).opcode |= BRANCH_TARGET; - registerUsage[reg].count++; - instr.dst = reg; - //mark all registers as used - for (unsigned j = 0; j < randomx::RegistersCount; ++j) { - registerUsage[j].lastUsed = i; - } - } break; + if (opcode < randomx::ceil_IMULH_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(CFROUND) { - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= SRC_INT; - instr.opcode |= OP_CFROUND; - } break; + if (opcode < randomx::ceil_ISMULH_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } - CASE_REP(ISTORE) { + if (opcode < randomx::ceil_ISMULH_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + continue; + } + + if (opcode < randomx::ceil_IMUL_RCP) { + uint64_t divisor = instr.getImm32(); + if (!randomx::isPowerOf2(divisor)) { instr.dst = instr.dst % randomx::RegistersCount; - instr.src = instr.src % randomx::RegistersCount; - instr.opcode |= DST_MEM; - if (instr.getModCond() < randomx::StoreL3Condition) - instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); - else - instr.imm32 &= randomx::ScratchpadL3Mask; - } break; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } + continue; + } - CASE_REP(NOP) { + if (opcode < randomx::ceil_INEG_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } - } break; + if (opcode < randomx::ceil_IXOR_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } + + if (opcode < randomx::ceil_IXOR_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + continue; + } + + if (opcode < randomx::ceil_IROR_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } + + if (opcode < randomx::ceil_IROL_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + continue; + } + + if (opcode < randomx::ceil_ISWAP_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + if (instr.src != instr.dst) { + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + instr.opcode |= OP_SWAP; + registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.src].lastUsed = i; + } + continue; + } + + if (opcode < randomx::ceil_FSWAP_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_FLT; + continue; + } + + if (opcode < randomx::ceil_FADD_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + continue; + } + + if (opcode < randomx::ceil_FADD_M) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + continue; + } + + if (opcode < randomx::ceil_FSUB_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + continue; + } + + if (opcode < randomx::ceil_FSUB_M) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + continue; + } + + if (opcode < randomx::ceil_FSCAL_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + continue; + } + + if (opcode < randomx::ceil_FMUL_R) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + continue; + } + + if (opcode < randomx::ceil_FDIV_M) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + continue; + } + + if (opcode < randomx::ceil_FSQRT_R) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + continue; + } + + if (opcode < randomx::ceil_CBRANCH) { + instr.opcode |= OP_BRANCH; + instr.opcode |= DST_INT; + int reg = instr.dst % randomx::RegistersCount; + int target = registerUsage[reg].lastUsed; + int offset = (i - target); + instr.mod = offset; + jumpCount += offset; + p(target + 1).opcode |= BRANCH_TARGET; + registerUsage[reg].count++; + instr.dst = reg; + //mark all registers as used + for (unsigned j = 0; j < randomx::RegistersCount; ++j) { + registerUsage[j].lastUsed = i; + } + continue; + } + + if (opcode < randomx::ceil_CFROUND) { + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= OP_CFROUND; + continue; + } + + if (opcode < randomx::ceil_ISTORE) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_MEM; + if (instr.getModCond() < randomx::StoreL3Condition) + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + else + instr.imm32 &= randomx::ScratchpadL3Mask; + continue; + } + + if (opcode < randomx::ceil_NOP) { - default: - UNREACHABLE; } } return jumpCount; diff --git a/src/tests/superscalar-stats.cpp b/src/tests/superscalar-stats.cpp index 956580f..d0322a4 100644 --- a/src/tests/superscalar-stats.cpp +++ b/src/tests/superscalar-stats.cpp @@ -8,7 +8,7 @@ const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, int main() { constexpr int count = 1000000; - int isnCounts[randomx::SuperscalarInstructionType::COUNT] = { 0 }; + int isnCounts[(int)randomx::SuperscalarInstructionType::COUNT] = { 0 }; int64_t asicLatency = 0; int64_t codesize = 0; int64_t cpuLatency = 0; @@ -44,7 +44,7 @@ int main() { std::cout << "Avg. RandomX ops: " << (size / (double)count) << std::endl; std::cout << "Frequencies: " << std::endl; - for (unsigned j = 0; j < randomx::SuperscalarInstructionType::COUNT; ++j) { + for (unsigned j = 0; j < (int)randomx::SuperscalarInstructionType::COUNT; ++j) { std::cout << j << " " << isnCounts[j] << " " << isnCounts[j] / (double)size << std::endl; }