From 9404516dd87a722b2aa763a36d6c9045b4fc4fbf Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 12 Apr 2019 14:56:20 +0200 Subject: [PATCH] Refactoring --- src/LightProgramGenerator.cpp | 47 +++++++++++++++---- src/LightProgramGenerator.hpp | 2 +- src/common.hpp | 1 + src/main.cpp | 6 +-- vcxproj/randomx.vcxproj | 29 ++++++++++++ vcxproj/randomx.vcxproj.filters | 83 +++++++++++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 14 deletions(-) diff --git a/src/LightProgramGenerator.cpp b/src/LightProgramGenerator.cpp index 8692dc3..40a767b 100644 --- a/src/LightProgramGenerator.cpp +++ b/src/LightProgramGenerator.cpp @@ -507,8 +507,16 @@ namespace RandomX { bool selectDestination(int cycle, RegisterInfo (®isters)[8], Blake2Generator& gen) { std::vector availableRegisters; + //Conditions for the destination register: + // * value must be ready at the required cycle + // * cannot be the same as the source register unless the instruction allows it + // - this avoids optimizable instructions such as "xor r, r" or "sub r, r" + // * either the last instruction applied to the register or its source must be different than this instruction + // - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2" + // - it also avoids accumulation of trailing zeroes in registers due to excessive multiplication + // * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction) for (unsigned i = 0; i < 8; ++i) { - if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != 5)) + if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != LimitedAddressRegister)) availableRegisters.push_back(i); } return selectRegister(availableRegisters, gen, dst_); @@ -516,13 +524,15 @@ namespace RandomX { bool selectSource(int cycle, RegisterInfo(®isters)[8], Blake2Generator& gen) { std::vector availableRegisters; + //all registers that are ready at the cycle for (unsigned i = 0; i < 8; ++i) { if (registers[i].latency <= cycle) availableRegisters.push_back(i); } + //if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) { - if (availableRegisters[0] == 5 || availableRegisters[1] == 5) { - opGroupPar_ = src_ = 5; + if (availableRegisters[0] == LimitedAddressRegister || availableRegisters[1] == LimitedAddressRegister) { + opGroupPar_ = src_ = LimitedAddressRegister; return true; } } @@ -656,7 +666,7 @@ namespace RandomX { return -1; } - double generateLightProg2(LightProgram& prog, Blake2Generator& gen) { + double generateSuperscalar(LightProgram& prog, Blake2Generator& gen) { ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3]; memset(portBusy, 0, sizeof(portBusy)); @@ -674,6 +684,7 @@ namespace RandomX { int programSize = 0; int mulCount = 0; int decodeCycle; + int throwAwayCount = 0; //decode instructions for RANDOMX_SUPERSCALAR_LATENCY cycles or until an execution port is saturated. //Each decode cycle decodes 16 bytes of x86 code. @@ -722,12 +733,20 @@ namespace RandomX { } //if no register was found, throw the instruction away and try another one if (forward == LOOK_FORWARD_CYCLES) { - macroOpIndex = currentInstruction.getInfo().getSize(); - if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; - continue; + if (throwAwayCount < MAX_THROWAWAY_COUNT) { + throwAwayCount++; + macroOpIndex = currentInstruction.getInfo().getSize(); + if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; + continue; + } + //abort this decode buffer + /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available" << std::endl; + currentInstruction = LightInstruction::Null; + break; } if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; } + throwAwayCount = 0; //find a destination register that will be ready when this instruction executes if (macroOpIndex == currentInstruction.getInfo().getDstOp()) { int forward; @@ -737,12 +756,20 @@ namespace RandomX { ++cycle; } if (forward == LOOK_FORWARD_CYCLES) { //throw instruction away - macroOpIndex = currentInstruction.getInfo().getSize(); - if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; - continue; + if (throwAwayCount < MAX_THROWAWAY_COUNT) { + throwAwayCount++; + macroOpIndex = currentInstruction.getInfo().getSize(); + if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; + continue; + } + //abort this decode buffer + /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; + currentInstruction = LightInstruction::Null; + break; } if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; } + throwAwayCount = 0; //recalculate when the instruction can be scheduled for execution based on operand availability scheduleCycle = scheduleMop(mop, portBusy, scheduleCycle, scheduleCycle); diff --git a/src/LightProgramGenerator.hpp b/src/LightProgramGenerator.hpp index 7030d10..beb7974 100644 --- a/src/LightProgramGenerator.hpp +++ b/src/LightProgramGenerator.hpp @@ -54,5 +54,5 @@ namespace RandomX { void checkData(const size_t); }; - double generateLightProg2(LightProgram& prog, Blake2Generator& gen); + double generateSuperscalar(LightProgram& prog, Blake2Generator& gen); } \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index 83a9bc7..034c10f 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -95,6 +95,7 @@ namespace RandomX { constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8; constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64; constexpr int RegistersCount = 8; + constexpr int LimitedAddressRegister = 5; //x86 r13 register struct Cache { uint8_t* memory; diff --git a/src/main.cpp b/src/main.cpp index 4866804..a120cf9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -228,11 +228,11 @@ int main(int argc, char** argv) { if (genSuperscalar) { RandomX::LightProgram p; RandomX::Blake2Generator gen(seed, programCount); - RandomX::generateLightProg2(p, gen); + RandomX::generateSuperscalar(p, gen); RandomX::AssemblyGeneratorX86 asmX86; asmX86.generateAsm(p); //std::ofstream file("lightProg2.asm"); - asmX86.printCode(std::cout); + //asmX86.printCode(std::cout); return 0; } @@ -288,7 +288,7 @@ int main(int argc, char** argv) { if (!legacy) { RandomX::Blake2Generator gen(seed, programCount); for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - RandomX::generateLightProg2(programs[i], gen); + RandomX::generateSuperscalar(programs[i], gen); } } if (!miningMode) { diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj index 3dc09c8..1c1cae0 100644 --- a/vcxproj/randomx.vcxproj +++ b/vcxproj/randomx.vcxproj @@ -149,6 +149,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vcxproj/randomx.vcxproj.filters b/vcxproj/randomx.vcxproj.filters index 9f33e02..5b821c8 100644 --- a/vcxproj/randomx.vcxproj.filters +++ b/vcxproj/randomx.vcxproj.filters @@ -84,4 +84,87 @@ Source Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + \ No newline at end of file