From 4c66b2305ac69bfe1980d1b098c3cc875cca979c Mon Sep 17 00:00:00 2001 From: tevador Date: Mon, 22 Apr 2019 18:20:46 +0200 Subject: [PATCH] Formatting & refactoring --- src/assembly_generator_x86.cpp | 6 --- src/configuration.h | 6 --- src/dataset.cpp | 7 --- src/instruction.hpp | 45 ++++++++-------- src/instructions_portable.cpp | 20 +++---- src/jit_compiler_x86.cpp | 98 ++++------------------------------ src/randomx.h | 8 +-- src/superscalar.cpp | 6 +-- src/vm_compiled.cpp | 1 - src/vm_compiled.hpp | 4 -- src/vm_interpreted.cpp | 31 ----------- src/vm_interpreted.hpp | 2 - 12 files changed, 47 insertions(+), 187 deletions(-) diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index 485c549..18fa18e 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -34,8 +34,6 @@ namespace randomx { static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" }; static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" }; - static const char* fsumInstr[4] = { "paddb", "paddw", "paddd", "paddq" }; - static const char* regA4 = "xmm12"; static const char* dblMin = "xmm13"; static const char* absMask = "xmm14"; @@ -58,7 +56,6 @@ namespace randomx { instr.src %= RegistersCount; instr.dst %= RegistersCount; generateCode(instr, i); - //asmCode << std::endl; } } @@ -494,7 +491,6 @@ namespace randomx { //2 uOPs void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { - //std::swap(registerUsage[instr.dst], registerUsage[instr.src]); registerUsage[instr.dst] = i; registerUsage[instr.src] = i; asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl; @@ -516,7 +512,6 @@ namespace randomx { instr.dst %= 4; instr.src %= 4; asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; - //asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl; traceflt(instr); } @@ -534,7 +529,6 @@ namespace randomx { instr.dst %= 4; instr.src %= 4; asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl; - //asmCode << "\t" << fsumInstr[instr.mod % 4] << " " << signMask << ", " << regF[instr.dst] << std::endl; traceflt(instr); } diff --git a/src/configuration.h b/src/configuration.h index 44db843..ef57775 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -40,12 +40,6 @@ along with RandomX. If not, see. //Dataset size in bytes. Must be a power of 2. #define RANDOMX_DATASET_SIZE (2ULL * 1024 * 1024 * 1024) -//Number of blocks per epoch -#define RANDOMX_EPOCH_BLOCKS 2048 - -//Number of blocks between the seed block and the start of new epoch -#define RANDOMX_EPOCH_LAG 64 - //Number of instructions in a RandomX program #define RANDOMX_PROGRAM_SIZE 256 diff --git a/src/dataset.cpp b/src/dataset.cpp index 1e4ab09..5858115 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -40,13 +40,6 @@ along with RandomX. If not, see. #include "argon2.h" #include "argon2_core.h" -#if defined(__SSE2__) -#include -#define PREFETCHNTA(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA) -#else -#define PREFETCH(memory) -#endif - randomx_dataset::~randomx_dataset() { } diff --git a/src/instruction.hpp b/src/instruction.hpp index 2fef4de..323c1f5 100644 --- a/src/instruction.hpp +++ b/src/instruction.hpp @@ -43,30 +43,27 @@ namespace randomx { constexpr int ISMULH_R = 10; constexpr int ISMULH_M = 11; constexpr int IMUL_RCP = 12; - //constexpr int ISDIV_C = 13; - constexpr int INEG_R = 14; - constexpr int IXOR_R = 15; - constexpr int IXOR_M = 16; - constexpr int IROR_R = 17; - constexpr int IROL_R = 18; - constexpr int ISWAP_R = 19; - constexpr int FSWAP_R = 20; - constexpr int FADD_R = 21; - constexpr int FADD_M = 22; - constexpr int FSUB_R = 23; - constexpr int FSUB_M = 24; - constexpr int FSCAL_R = 25; - constexpr int FMUL_R = 26; - constexpr int FMUL_M = 27; - constexpr int FDIV_R = 28; - constexpr int FDIV_M = 29; - constexpr int FSQRT_R = 30; - constexpr int COND_R = 31; - constexpr int COND_M = 32; - constexpr int CFROUND = 33; - constexpr int ISTORE = 34; - constexpr int FSTORE = 35; - constexpr int NOP = 36; + constexpr int INEG_R = 13; + constexpr int IXOR_R = 14; + constexpr int IXOR_M = 15; + constexpr int IROR_R = 16; + constexpr int IROL_R = 17; + constexpr int ISWAP_R = 18; + constexpr int FSWAP_R = 19; + constexpr int FADD_R = 20; + constexpr int FADD_M = 21; + constexpr int FSUB_R = 22; + constexpr int FSUB_M = 23; + constexpr int FSCAL_R = 24; + constexpr int FMUL_R = 25; + constexpr int FDIV_M = 26; + constexpr int FSQRT_R = 27; + constexpr int COND_R = 28; + constexpr int COND_M = 29; + constexpr int CFROUND = 30; + constexpr int ISTORE = 31; + constexpr int FSTORE = 32; + constexpr int NOP = 33; } class Instruction { diff --git a/src/instructions_portable.cpp b/src/instructions_portable.cpp index ac12eed..cfa20ab 100644 --- a/src/instructions_portable.cpp +++ b/src/instructions_portable.cpp @@ -73,14 +73,14 @@ along with RandomX. If not, see. #define HAVE_SMULH #endif - static void setRoundMode__(uint32_t mode) { + static void setRoundMode_(uint32_t mode) { _controlfp(mode, _MCW_RC); } #define HAVE_SETROUNDMODE_IMPL #endif #ifndef HAVE_SETROUNDMODE_IMPL - static void setRoundMode__(uint32_t mode) { + static void setRoundMode_(uint32_t mode) { fesetround(mode); } #endif @@ -135,7 +135,7 @@ along with RandomX. If not, see. #if defined(__has_builtin) #if __has_builtin(__builtin_sub_overflow) - static inline bool subOverflow__(uint32_t a, uint32_t b) { + static inline bool subOverflow_(uint32_t a, uint32_t b) { int32_t temp; return __builtin_sub_overflow(unsigned32ToSigned2sCompl(a), unsigned32ToSigned2sCompl(b), &temp); } @@ -144,7 +144,7 @@ along with RandomX. If not, see. #endif #ifndef HAVE_SUB_OVERFLOW - static inline bool subOverflow__(uint32_t a, uint32_t b) { + static inline bool subOverflow_(uint32_t a, uint32_t b) { auto c = unsigned32ToSigned2sCompl(a - b); return (c < unsigned32ToSigned2sCompl(a)) != (unsigned32ToSigned2sCompl(b) > 0); } @@ -166,16 +166,16 @@ static inline double FlushNaN(double x) { void setRoundMode(uint32_t rcflag) { switch (rcflag & 3) { case RoundDown: - setRoundMode__(FE_DOWNWARD); + setRoundMode_(FE_DOWNWARD); break; case RoundUp: - setRoundMode__(FE_UPWARD); + setRoundMode_(FE_UPWARD); break; case RoundToZero: - setRoundMode__(FE_TOWARDZERO); + setRoundMode_(FE_TOWARDZERO); break; case RoundToNearest: - setRoundMode__(FE_TONEAREST); + setRoundMode_(FE_TONEAREST); break; default: UNREACHABLE; @@ -194,9 +194,9 @@ bool condition(uint32_t type, uint32_t value, uint32_t imm32) { case 3: return unsigned32ToSigned2sCompl(value - imm32) >= 0; case 4: - return subOverflow__(value, imm32); + return subOverflow_(value, imm32); case 5: - return !subOverflow__(value, imm32); + return !subOverflow_(value, imm32); case 6: return unsigned32ToSigned2sCompl(value) < unsigned32ToSigned2sCompl(imm32); case 7: diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 865f932..1d1f80b 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -103,13 +103,11 @@ namespace randomx { ; xmm11 -> "a3" ; xmm12 -> temporary ; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff - ; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000 + ; xmm14 -> exponent 2**-240 = 0x30f00000000xxxxx30f00000000xxxxx ; xmm15 -> scale mask = 0x81f000000000000081f0000000000000 */ -#define NOP_TEST true - const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue; const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin; const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load; @@ -254,18 +252,10 @@ namespace randomx { void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg) { generateProgramPrologue(prog, pcfg); - //if (superscalar) { emit(codeReadDatasetLightSshInit, readDatasetLightInitSize); emitByte(CALL); emit32(superScalarHashOffset - (codePos + 4)); emit(codeReadDatasetLightSshFin, readDatasetLightFinSize); - /*} - else { - memcpy(code + codePos, codeReadDatasetLight, readDatasetLightSize); - codePos += readDatasetLightSize; - emitByte(CALL); - emit32(readDatasetLightSubOffset - (codePos + 4)); - }*/ generateProgramEpilogue(prog); } @@ -483,10 +473,6 @@ namespace randomx { emitByte(0xc0 + instr.dst); emit32(instr.getImm32()); }*/ - if (false && NOP_TEST) { - emit(NOP4); - return; - } emit(REX_LEA); if (instr.dst == RegisterNeedsDisplacement) emitByte(0xac); @@ -527,18 +513,10 @@ namespace randomx { void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - if (false && NOP_TEST) { - emit(NOP3); - return; - } emit(REX_SUB_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); } else { - if (false && NOP_TEST) { - emit(NOP7); - return; - } emit(REX_81); emitByte(0xe8 + instr.dst); emit32(instr.getImm32()); @@ -571,18 +549,10 @@ namespace randomx { void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - if (false && NOP_TEST) { - emit(NOP4); - return; - } emit(REX_IMUL_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); } else { - if (false && NOP_TEST) { - emit(NOP7); - return; - } emit(REX_IMUL_RRI); emitByte(0xc0 + 9 * instr.dst); emit32(instr.getImm32()); @@ -606,12 +576,6 @@ namespace randomx { void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; - if (false && NOP_TEST) { - emit(NOP3); - emit(NOP3); - emit(NOP3); - return; - } emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -641,12 +605,6 @@ namespace randomx { void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; - if (false && NOP_TEST) { - emit(NOP3); - emit(NOP3); - emit(NOP3); - return; - } emit(REX_MOV_RR64); emitByte(0xc0 + instr.dst); emit(REX_MUL_R); @@ -676,13 +634,6 @@ namespace randomx { void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { if (instr.getImm32() != 0) { - if (false && NOP_TEST) { - emitByte(0x66); - emitByte(0x66); - emit(NOP8); - emit(NOP4); - return; - } registerUsage[instr.dst] = i; emit(MOV_RAX_I); emit64(randomx_reciprocal(instr.getImm32())); @@ -704,18 +655,10 @@ namespace randomx { void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - if (false && NOP_TEST) { - emit(NOP3); - return; - } emit(REX_XOR_RR); emitByte(0xc0 + 8 * instr.dst + instr.src); } else { - if (false && NOP_TEST) { - emit(NOP7); - return; - } emit(REX_XOR_RI); emitByte(0xf0 + instr.dst); emit32(instr.getImm32()); @@ -740,21 +683,12 @@ namespace randomx { void JitCompilerX86::h_IROR_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { - if (false && NOP_TEST) { - emit(NOP3); - emit(NOP3); - return; - } emit(REX_MOV_RR); emitByte(0xc8 + instr.src); emit(REX_ROT_CL); emitByte(0xc8 + instr.dst); } else { - if (false && NOP_TEST) { - emit(NOP4); - return; - } emit(REX_ROT_I8); emitByte(0xc8 + instr.dst); emitByte(instr.getImm32() & 63); @@ -949,21 +883,14 @@ namespace randomx { const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift; int reg = getConditionRegister(); int target = registerUsage[reg] + 1; - if (false && NOP_TEST) { - emit(NOP7); - emit(NOP7); - emit(NOP6); - } - else { - emit(REX_ADD_I); - emitByte(0xc0 + reg); - emit32(1 << shift); - emit(REX_TEST); - emitByte(0xc0 + reg); - emit32(conditionMask); - emit(JZ); - emit32(instructionOffsets[target] - (codePos + 4)); - } + emit(REX_ADD_I); + emitByte(0xc0 + reg); + emit32(1 << shift); + emit(REX_TEST); + emitByte(0xc0 + reg); + emit32(conditionMask); + emit(JZ); + emit32(instructionOffsets[target] - (codePos + 4)); for (unsigned j = 0; j < 8; ++j) { //mark all registers as used registerUsage[j] = i; } @@ -973,13 +900,6 @@ namespace randomx { #ifdef RANDOMX_JUMP handleCondition(instr, i); #endif - if (false && NOP_TEST) { - emit(NOP3); - emit(NOP7); - emit(NOP3); - emit(NOP3); - return; - } emit(XOR_ECX_ECX); emit(REX_CMP_R32I); emitByte(0xf8 + instr.src); diff --git a/src/randomx.h b/src/randomx.h index 37365bd..f5dec33 100644 --- a/src/randomx.h +++ b/src/randomx.h @@ -50,8 +50,8 @@ extern "C" { * subsequent Dataset initialization faster * * @return Pointer to an allocated randomx_cache structure. - NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT - is set and JIT compilation is not supported on the current platform. + * NULL is returned if memory allocation fails or if the RANDOMX_FLAG_JIT + * is set and JIT compilation is not supported on the current platform. */ randomx_cache *randomx_alloc_cache(randomx_flags flags); @@ -76,9 +76,9 @@ void randomx_release_cache(randomx_cache* cache); * * @param flags is the initialization flags. Only one flag is supported (can be set or not set): * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages - + * * @return Pointer to an allocated randomx_cache structure. - NULL is returned if memory allocation fails. + * NULL is returned if memory allocation fails. */ randomx_dataset *randomx_alloc_dataset(randomx_flags flags); diff --git a/src/superscalar.cpp b/src/superscalar.cpp index f71c1fe..8bf757e 100644 --- a/src/superscalar.cpp +++ b/src/superscalar.cpp @@ -700,7 +700,7 @@ namespace randomx { //calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution int scheduleCycle = scheduleMop(mop, portBusy, cycle, depCycle); if (scheduleCycle < 0) { - /*if (TRACE)*/ std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; + if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; //__debugbreak(); portsSaturated = true; break; @@ -725,7 +725,7 @@ namespace randomx { continue; } //abort this decode buffer - /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; + if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; currentInstruction = SuperscalarInstruction::Null; break; } @@ -748,7 +748,7 @@ namespace randomx { continue; } //abort this decode buffer - /*if (TRACE)*/ std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; + if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; currentInstruction = SuperscalarInstruction::Null; break; } diff --git a/src/vm_compiled.cpp b/src/vm_compiled.cpp index 098245e..7e4ef92 100644 --- a/src/vm_compiled.cpp +++ b/src/vm_compiled.cpp @@ -42,7 +42,6 @@ namespace randomx { template void CompiledVm::execute() { - //executeProgram(reg, mem, scratchpad, InstructionCount); compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS); } diff --git a/src/vm_compiled.hpp b/src/vm_compiled.hpp index 0f4b2f6..5dcf2ae 100644 --- a/src/vm_compiled.hpp +++ b/src/vm_compiled.hpp @@ -28,10 +28,6 @@ along with RandomX. If not, see. namespace randomx { - extern "C" { - void executeProgram(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t); - } - template class CompiledVm : public VmBase { public: diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 55b7fbd..4508330 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -115,7 +115,6 @@ namespace randomx { void InterpretedVm::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { auto& ibc = byteCode[ic]; if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); - //if(trace) printState(r, f, e, a); switch (ibc.type) { case InstructionType::IADD_RS: { @@ -237,15 +236,9 @@ namespace randomx { *ibc.creg += (1 << ibc.shift); const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift; if ((*ibc.creg & conditionMask) == 0) { -#ifdef STATS - count_JUMP_taken++; -#endif ic = ibc.target; break; } -#ifdef STATS - count_JUMP_not_taken++; -#endif #endif *ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0; } break; @@ -255,15 +248,9 @@ namespace randomx { *ibc.creg += (1uLL << ibc.shift); const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift; if ((*ibc.creg & conditionMask) == 0) { -#ifdef STATS - count_JUMP_taken++; -#endif ic = ibc.target; break; } -#ifdef STATS - count_JUMP_not_taken++; -#endif #endif *ibc.idst += condition(ibc.condition, load64(getScratchpadAddress(ibc)), ibc.imm) ? 1 : 0; } break; @@ -328,7 +315,6 @@ namespace randomx { } for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { - //std::cout << "Iteration " << iter << std::endl; uint64_t spMix = r[config.readReg0] ^ r[config.readReg1]; spAddr0 ^= spMix; spAddr0 &= ScratchpadL3Mask64; @@ -366,7 +352,6 @@ namespace randomx { mem.mx ^= r[config.readReg2] ^ r[config.readReg3]; mem.mx &= CacheLineAlignMask; datasetRead(mem.ma, r); - //executeSuperscalar(datasetBase + mem.ma / CacheLineSize, r); std::swap(mem.mx, mem.ma); if (trace) { @@ -450,22 +435,6 @@ namespace randomx { r[i] ^= datasetLine[i]; } - /*template - void InterpretedVirtualMachine::precompileSuperscalar(SuperscalarProgram* programs) { - memcpy(superScalarPrograms, programs, sizeof(superScalarPrograms)); - reciprocals.clear(); - for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { - for (unsigned j = 0; j < superScalarPrograms[i].getSize(); ++j) { - Instruction& instr = superScalarPrograms[i](j); - if (instr.opcode == SuperscalarInstructionType::IMUL_RCP) { - auto rcp = reciprocal(instr.getImm32()); - instr.setImm32(reciprocals.size()); - reciprocals.push_back(rcp); - } - } - } - }*/ - #include "instruction_weights.hpp" template diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp index 3a69de4..e45b4fd 100644 --- a/src/vm_interpreted.hpp +++ b/src/vm_interpreted.hpp @@ -49,8 +49,6 @@ namespace randomx { uint16_t shift; }; - constexpr int asedwfagdewsa = sizeof(InstructionByteCode); - template class InterpretedVm : public VmBase { public: