diff --git a/randomx.sln b/randomx.sln index abd1c69..bc93b7f 100644 --- a/randomx.sln +++ b/randomx.sln @@ -25,6 +25,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scratchpad-entropy", "vcxpr EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jit-performance", "vcxproj\jit-performance.vcxproj", "{535F2111-FA81-4C76-A354-EDD2F9AA00E3}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "perf-simulation", "vcxproj\perf-simulation.vcxproj", "{F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -113,6 +115,14 @@ Global {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.Build.0 = Release|x64 {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.ActiveCfg = Release|Win32 {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.Build.0 = Release|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x64.ActiveCfg = Debug|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x64.Build.0 = Debug|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x86.ActiveCfg = Debug|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Debug|x86.Build.0 = Debug|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x64.ActiveCfg = Release|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x64.Build.0 = Release|x64 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x86.ActiveCfg = Release|Win32 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -127,6 +137,7 @@ Global {3E490DEC-1874-43AA-92DA-1AC57C217EAC} = {4A4A689F-86AF-41C0-A974-1080506D0923} {FF8BD408-AFD8-43C6-BE98-4D03B37E840B} = {4A4A689F-86AF-41C0-A974-1080506D0923} {535F2111-FA81-4C76-A354-EDD2F9AA00E3} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} = {4A4A689F-86AF-41C0-A974-1080506D0923} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {4EBC03DB-AE37-4141-8147-692F16E0ED02} diff --git a/src/tests/perf-simulation.cpp b/src/tests/perf-simulation.cpp new file mode 100644 index 0000000..3a8f6f4 --- /dev/null +++ b/src/tests/perf-simulation.cpp @@ -0,0 +1,645 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "utility.hpp" +#include "../common.hpp" +#include "../aes_hash.hpp" +#include "../program.hpp" +#include "../blake2/blake2.h" +#include +#include + +int analyze(randomx::Program& p); +int executeInOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline); +int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline); + +constexpr uint32_t DST_NOP = 0; +constexpr uint32_t DST_INT = 1; +constexpr uint32_t DST_FLT = 2; +constexpr uint32_t DST_MEM = 3; +constexpr uint32_t MASK_DST = 3; + +constexpr uint32_t SRC_NOP = 0; +constexpr uint32_t SRC_INT = 4; +constexpr uint32_t SRC_FLT = 8; +constexpr uint32_t SRC_MEM = 12; +constexpr uint32_t MASK_SRC = 12; + +constexpr uint32_t OP_CFROUND = 16; +constexpr uint32_t OP_SWAP = 32; +constexpr uint32_t OP_BRANCH = 48; +constexpr uint32_t MASK_EXT = 48; + +constexpr uint32_t OP_FLOAT = 64; +constexpr uint32_t BRANCH_TARGET = 128; + +//template +void generate(randomx::Program& p, uint32_t nonce) { + alignas(16) uint64_t hash[8]; + blake2b(hash, sizeof(hash), &nonce, sizeof(nonce), nullptr, 0); + fillAes1Rx4((void*)hash, sizeof(p), &p); +} + +bool has(randomx::Instruction& instr, uint32_t mask, uint32_t prop) { + return (instr.opcode & mask) == prop; +} + +bool has(randomx::Instruction& instr, uint32_t prop) { + return (instr.opcode & prop) != 0; +} + +int main(int argc, char** argv) { + int nonces, seed, executionPorts, memoryPorts, pipeline; + bool print, reorder, speculate; + readOption("--print", argc, argv, print); + readOption("--reorder", argc, argv, reorder); + readOption("--speculate", argc, argv, speculate); + readIntOption("--nonces", argc, argv, nonces, 1); + readIntOption("--seed", argc, argv, seed, 0); + readIntOption("--executionPorts", argc, argv, executionPorts, 4); + readIntOption("--memoryPorts", argc, argv, memoryPorts, 2); + readIntOption("--pipeline", argc, argv, pipeline, 3 + speculate); + randomx::Program p, original; + double totalCycles = 0.0; + double jumpCount = 0; + for (int i = 0; i < nonces; ++i) { + generate(original, i ^ seed); + memcpy(&p, &original, sizeof(p)); + jumpCount += analyze(p); + totalCycles += + reorder + ? + executeOutOfOrder(p, original, print, executionPorts, memoryPorts, speculate, pipeline) + : + executeInOrder(p, original, print, executionPorts, memoryPorts, speculate, pipeline); + } + totalCycles /= nonces; + jumpCount /= nonces; + std::cout << "Execution took " << totalCycles << " cycles per program" << std::endl; + //std::cout << "Jump count: " << jumpCount << std::endl; + return 0; +} + +int executeInOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) { + int cycle = pipeline - 1; + int index = 0; + int branchCount = 0; + int int_reg_ready[randomx::RegistersCount] = { 0 }; + int flt_reg_ready[randomx::RegistersCount] = { 0 }; + //each workgroup takes 1 or 2 cycles (2 cycles if any instruction has a memory operand) + while (index < RANDOMX_PROGRAM_SIZE) { + int memoryReads = 0; + int memoryAccesses = 0; + bool hasRound = false; + int workers = 0; + //std::cout << "-----------" << std::endl; + for (; workers < executionPorts && memoryAccesses < memoryPorts && index < RANDOMX_PROGRAM_SIZE; ++workers) { + auto& instr = p(index); + auto& origi = original(index); + origi.dst %= randomx::RegistersCount; + origi.src %= randomx::RegistersCount; + + //check dependencies + if (has(instr, MASK_SRC, SRC_INT) && int_reg_ready[instr.src] > cycle) + break; + + if (has(instr, MASK_SRC, SRC_MEM) && int_reg_ready[instr.src] > cycle) + break; + + if (has(instr, MASK_DST, DST_FLT) && flt_reg_ready[instr.dst] > cycle) + break; + + if (has(instr, MASK_DST, DST_INT) && int_reg_ready[instr.dst] > cycle) + break; + + if (hasRound && has(instr, OP_FLOAT)) + break; + + //execute + index++; + + if (has(instr, MASK_EXT, OP_BRANCH)) { + branchCount++; + } + + if (has(instr, MASK_DST, DST_FLT)) + flt_reg_ready[instr.dst] = cycle + 1; + + if (has(instr, MASK_DST, DST_INT)) + int_reg_ready[instr.dst] = cycle + 1; + + if (has(instr, MASK_EXT, OP_SWAP)) { + int_reg_ready[instr.src] = cycle + 1; + } + + if (has(instr, MASK_EXT, OP_CFROUND)) + hasRound = true; + + if (has(instr, MASK_SRC, SRC_MEM)) { + memoryReads++; + memoryAccesses++; + if (print) + std::cout << std::setw(2) << (cycle + 2) << ": " << origi; + } + else { + if (print) + std::cout << std::setw(2) << (cycle + 1) << ": " << origi; + } + + if (has(instr, MASK_DST, DST_MEM)) { + memoryAccesses++; + } + + //non-speculative execution must stall after branch + if (!speculate && has(instr, MASK_EXT, OP_BRANCH)) { + cycle += pipeline - 1; + break; + } + } + //std::cout << " workers: " << workers << std::endl; + cycle++; + if (memoryReads) + cycle++; + } + if (speculate) { + //account for mispredicted branches + int i = 0; + while (branchCount--) { + auto entropy = p.getEntropy(i / 8); + entropy >> (i % 8) * 8; + if ((entropy & 0xff) == 0) // 1/256 chance to flush the pipeline + cycle += pipeline - 1; + } + } + return cycle; +} + +int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) { + int index = 0; + int busyExecutionPorts[RANDOMX_PROGRAM_SIZE] = { 0 }; + int busyMemoryPorts[RANDOMX_PROGRAM_SIZE] = { 0 }; + int int_reg_ready[randomx::RegistersCount] = { 0 }; + int flt_reg_ready[randomx::RegistersCount] = { 0 }; + int fprcReady = 0; + int lastBranch = 0; + int branchCount = 0; + for (; index < RANDOMX_PROGRAM_SIZE; ++index) { + auto& instr = p(index); + int retireCycle = pipeline - 1; + + //non-speculative execution cannot reorder across branches + if (!speculate && !has(instr, MASK_EXT, OP_BRANCH)) + retireCycle = std::max(lastBranch + pipeline - 1, retireCycle); + + //check dependencies + if (has(instr, MASK_SRC, SRC_INT)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.src]); + } + + if (has(instr, MASK_SRC, SRC_MEM)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.src]); + //find free memory port + do { + retireCycle++; + } while (busyMemoryPorts[retireCycle - 1] >= memoryPorts); + busyMemoryPorts[retireCycle - 1]++; + } + + if (has(instr, MASK_DST, DST_FLT)) { + retireCycle = std::max(retireCycle, flt_reg_ready[instr.dst]); + } + + if (has(instr, MASK_DST, DST_INT)) { + retireCycle = std::max(retireCycle, int_reg_ready[instr.dst]); + } + + //floating point operations depend on the fprc register + if (has(instr, OP_FLOAT)) + retireCycle = std::max(retireCycle, fprcReady); + + //execute + if (has(instr, MASK_DST, DST_MEM)) { + //find free memory port + do { + retireCycle++; + } while (busyMemoryPorts[retireCycle - 1] >= memoryPorts); + busyMemoryPorts[retireCycle - 1]++; + } + + if (has(instr, MASK_DST, DST_FLT)) { + //find free execution port + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + flt_reg_ready[instr.dst] = retireCycle; + } + + if (has(instr, MASK_DST, DST_INT)) { + //find free execution port + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + int_reg_ready[instr.dst] = retireCycle; + } + + if (has(instr, MASK_EXT, OP_SWAP)) { + int_reg_ready[instr.src] = retireCycle; + } + + if (has(instr, MASK_EXT, OP_CFROUND)) { + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + fprcReady = retireCycle; + } + + if (has(instr, MASK_EXT, OP_BRANCH)) { + /*if (!speculate && instr.mod == 1) { //simulated predication + do { + retireCycle++; + } while (busyExecutionPorts[retireCycle - 1] >= executionPorts); + busyExecutionPorts[retireCycle - 1]++; + int_reg_ready[instr.dst] = retireCycle; + }*/ + //else { + lastBranch = std::max(lastBranch, retireCycle); + branchCount++; + //} + } + + //print + auto& origi = original(index); + origi.dst %= randomx::RegistersCount; + origi.src %= randomx::RegistersCount; + if (print) { + std::cout << std::setw(2) << retireCycle << ": " << origi; + if (has(instr, MASK_EXT, OP_BRANCH)) { + std::cout << " jump: " << (int)instr.mod << std::endl; + } + } + } + int cycle = 0; + for (int i = 0; i < randomx::RegistersCount; ++i) { + cycle = std::max(cycle, int_reg_ready[i]); + } + for (int i = 0; i < randomx::RegistersCount; ++i) { + cycle = std::max(cycle, flt_reg_ready[i]); + } + if (speculate) { + //account for mispredicted branches + int i = 0; + while (branchCount--) { + auto entropy = p.getEntropy(i / 8); + entropy >> (i % 8) * 8; + if ((entropy & 0xff) == 0) // 1/256 chance to flush the pipeline + cycle += pipeline - 1; + } + } + return cycle; +} + +#include "../instruction_weights.hpp" + +//old register selection +struct RegisterUsage { + int32_t lastUsed; + int32_t count; +}; + +inline int getConditionRegister(RegisterUsage(®isterUsage)[randomx::RegistersCount]) { + int min = INT_MAX; + int minCount = 0; + int minIndex; + //prefer registers that have been used as a condition register fewer times + for (unsigned i = 0; i < randomx::RegistersCount; ++i) { + if (registerUsage[i].lastUsed < min || (registerUsage[i].lastUsed == min && registerUsage[i].count < minCount)) { + min = registerUsage[i].lastUsed; + minCount = registerUsage[i].count; + minIndex = i; + } + } + return minIndex; +} + +int analyze(randomx::Program& p) { + int jumpCount = 0; + RegisterUsage registerUsage[randomx::RegistersCount]; + for (unsigned i = 0; i < randomx::RegistersCount; ++i) { + registerUsage[i].lastUsed = -1; + registerUsage[i].count = 0; + } + for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { + auto& instr = p(i); + int opcode = instr.opcode; + instr.opcode = 0; + switch (opcode) { + CASE_REP(IADD_RS) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IADD_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISUB_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISUB_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMUL_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMUL_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMULH_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMULH_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISMULH_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISMULH_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IMUL_RCP) { + uint64_t divisor = instr.getImm32(); + if (!randomx::isPowerOf2(divisor)) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } + } break; + + CASE_REP(INEG_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IXOR_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IXOR_M) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_MEM; + instr.opcode |= DST_INT; + if (instr.src != instr.dst) { + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } + else { + instr.imm32 &= randomx::ScratchpadL3Mask; + } + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IROR_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(IROL_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + registerUsage[instr.dst].lastUsed = i; + } break; + + CASE_REP(ISWAP_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + if (instr.src != instr.dst) { + instr.opcode |= DST_INT; + instr.opcode |= SRC_INT; + instr.opcode |= OP_SWAP; + registerUsage[instr.dst].lastUsed = i; + registerUsage[instr.src].lastUsed = i; + } + } break; + + CASE_REP(FSWAP_R) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.opcode |= DST_FLT; + } break; + + CASE_REP(FADD_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(FADD_M) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } break; + + CASE_REP(FSUB_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(FSUB_M) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } break; + + CASE_REP(FSCAL_R) { + instr.dst = instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + } break; + + CASE_REP(FMUL_R) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(FDIV_M) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= DST_FLT; + instr.opcode |= SRC_MEM; + instr.opcode |= OP_FLOAT; + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + } break; + + CASE_REP(FSQRT_R) { + instr.dst = 4 + instr.dst % randomx::RegisterCountFlt; + instr.opcode |= DST_FLT; + instr.opcode |= OP_FLOAT; + } break; + + CASE_REP(CBRANCH) { + instr.opcode |= OP_BRANCH; + instr.opcode |= DST_INT; + //jump condition + //int reg = getConditionRegister(registerUsage); + int reg = instr.dst % randomx::RegistersCount; + int target = registerUsage[reg].lastUsed; + int offset = (i - target); + instr.mod = offset; + jumpCount += offset; + p(target + 1).opcode |= BRANCH_TARGET; + registerUsage[reg].count++; + instr.dst = reg; + //mark all registers as used + for (unsigned j = 0; j < randomx::RegistersCount; ++j) { + registerUsage[j].lastUsed = i; + } + } break; + + CASE_REP(CFROUND) { + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= OP_CFROUND; + } break; + + CASE_REP(ISTORE) { + instr.dst = instr.dst % randomx::RegistersCount; + instr.src = instr.src % randomx::RegistersCount; + instr.opcode |= SRC_INT; + instr.opcode |= DST_MEM; + if (instr.getModCond() < randomx::StoreL3Condition) + instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask); + else + instr.imm32 &= randomx::ScratchpadL3Mask; + } break; + + CASE_REP(NOP) { + + } break; + + default: + UNREACHABLE; + } + } + return jumpCount; +} diff --git a/vcxproj/perf-simulation.vcxproj b/vcxproj/perf-simulation.vcxproj new file mode 100644 index 0000000..7570a8b --- /dev/null +++ b/vcxproj/perf-simulation.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {F1FC7AC0-2773-4A57-AFA7-56BB07216AA2} + perfsimulation + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/vcxproj/perf-simulation.vcxproj.filters b/vcxproj/perf-simulation.vcxproj.filters new file mode 100644 index 0000000..5870291 --- /dev/null +++ b/vcxproj/perf-simulation.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file