From 447e8a1d4fe3d28a7af355b47b4a800460c952a2 Mon Sep 17 00:00:00 2001 From: tevador Date: Fri, 15 Feb 2019 10:41:02 +0100 Subject: [PATCH] Simplified division in interpreted mode Fixed incorrect condition code in JitCompilerX86 Refactoring --- src/AssemblyGeneratorX86.cpp | 4 +-- src/InterpretedVirtualMachine.cpp | 57 ++++++++++++++----------------- src/InterpretedVirtualMachine.hpp | 15 +++----- src/JitCompilerX86.cpp | 4 +-- src/main.cpp | 2 +- 5 files changed, 35 insertions(+), 47 deletions(-) diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 1e51fac..27ec601 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -431,8 +431,8 @@ namespace RandomX { asmCode << "\tldmxcsr dword ptr [rsp-8]" << std::endl; } - static inline const char* condition(Instruction& instr, bool invert = false) { - switch (((instr.mod >> 2) & 7) ^ invert) + static inline const char* condition(Instruction& instr) { + switch ((instr.mod >> 2) & 7) { case 0: return "be"; diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 71c03af..455c089 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -33,7 +33,6 @@ along with RandomX. If not, see. #ifdef STATS #include #endif -#include "divideByConstantCodegen.h" #ifdef FPUCHECK constexpr bool fpuCheck = true; @@ -136,23 +135,21 @@ namespace RandomX { } break; case InstructionType::IDIV_C: { - if (ibc.signedMultiplier != 0) { - int_reg_t dividend = *ibc.idst; - int_reg_t quotient = dividend >> ibc.preShift; - if (ibc.increment) { - quotient = quotient == UINT64_MAX ? UINT64_MAX : quotient + 1; - } - quotient = mulh(quotient, ibc.signedMultiplier); - quotient >>= ibc.postShift; - *ibc.idst += quotient; - } - else { - *ibc.idst += *ibc.idst >> ibc.shift; - } + uint64_t dividend = *ibc.idst; + uint64_t quotient = dividend / ibc.imm; + *ibc.idst += quotient; } break; case InstructionType::ISDIV_C: { - + if (ibc.simm != -1) { + int64_t dividend = unsigned64ToSigned2sCompl(*ibc.idst); + int64_t quotient = dividend / ibc.simm; + *ibc.idst += quotient; + } + else { + uint64_t quotient = ~(*ibc.idst) + 1; + *ibc.idst += quotient; + } } break; case InstructionType::INEG_R: { @@ -204,8 +201,8 @@ namespace RandomX { } break; case InstructionType::FSCAL_R: { - const __m128d signMask = _mm_castsi128_pd(_mm_set1_epi64x(0x81F0000000000000)); - *ibc.fdst = _mm_xor_pd(*ibc.fdst, signMask); + const __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(0x81F0000000000000)); + *ibc.fdst = _mm_xor_pd(*ibc.fdst, mask); } break; case InstructionType::FMUL_R: { @@ -516,20 +513,7 @@ namespace RandomX { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::IDIV_C; ibc.idst = &r[dst]; - if (divisor & (divisor - 1)) { - magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8); - ibc.signedMultiplier = mi.multiplier; - ibc.preShift = mi.pre_shift; - ibc.postShift = mi.post_shift; - ibc.increment = mi.increment; - } - else { - ibc.signedMultiplier = 0; - int shift = 0; - while (divisor >>= 1) - ++shift; - ibc.shift = shift; - } + ibc.imm = divisor; } else { ibc.type = InstructionType::NOP; @@ -537,7 +521,16 @@ namespace RandomX { } break; CASE_REP(ISDIV_C) { - ibc.type = InstructionType::NOP; + int32_t divisor = unsigned32ToSigned2sCompl(instr.imm32); + if (divisor != 0) { + auto dst = instr.dst % RegistersCount; + ibc.type = InstructionType::ISDIV_C; + ibc.idst = &r[dst]; + ibc.simm = divisor; + } + else { + ibc.type = InstructionType::NOP; + } } break; CASE_REP(INEG_R) { diff --git a/src/InterpretedVirtualMachine.hpp b/src/InterpretedVirtualMachine.hpp index 4db4ae4..ac82ea5 100644 --- a/src/InterpretedVirtualMachine.hpp +++ b/src/InterpretedVirtualMachine.hpp @@ -38,23 +38,18 @@ namespace RandomX { typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&); - struct alignas(16) InstructionByteCode { + struct alignas(8) InstructionByteCode { int_reg_t* idst; int_reg_t* isrc; - int_reg_t imm; + union { + uint64_t imm; + int64_t simm; + }; __m128d* fdst; __m128d* fsrc; uint32_t condition; uint32_t memMask; uint32_t type; - union { - uint64_t unsignedMultiplier; - int64_t signedMultiplier; - }; - unsigned shift; - unsigned preShift; - unsigned postShift; - bool increment; }; constexpr int asedwfagdewsa = sizeof(InstructionByteCode); diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index c725c6e..dc812f2 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -666,8 +666,8 @@ namespace RandomX { emit(AND_OR_MOV_LDMXCSR); } - static inline uint8_t condition(Instruction& instr, bool invert = false) { - switch ((instr.mod & 7) ^ invert) + static inline uint8_t condition(Instruction& instr) { + switch ((instr.mod >> 2) & 7) { case 0: return 0x96; //setbe diff --git a/src/main.cpp b/src/main.cpp index 552675f..51df7f6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -102,7 +102,7 @@ public: os << std::endl; } private: - void print(std::atomic& hash, std::ostream& os) { + static void print(std::atomic& hash, std::ostream& os) { auto h = hash.load(); outputHex(std::cout, (char*)&h, sizeof(h)); }