From ca96270509d31a45a525d9145990c11aab24a451 Mon Sep 17 00:00:00 2001 From: tevador Date: Tue, 30 Apr 2019 21:14:50 +0200 Subject: [PATCH] Group E exponent changed from a static value (-240) to dynamic --- src/asm/program_xmm_constants.inc | 4 +-- src/assembly_generator_x86.cpp | 13 ++++--- src/common.hpp | 10 ++++++ src/intrin_portable.h | 6 ---- src/tests/benchmark.cpp | 2 +- src/virtual_machine.cpp | 56 ++++++++++++++++++------------- src/vm_interpreted.cpp | 9 +++-- vcxproj/benchmark.vcxproj | 2 +- 8 files changed, 56 insertions(+), 46 deletions(-) diff --git a/src/asm/program_xmm_constants.inc b/src/asm/program_xmm_constants.inc index 5c2600b..e5219ac 100644 --- a/src/asm/program_xmm_constants.inc +++ b/src/asm/program_xmm_constants.inc @@ -1,6 +1,6 @@ mantissaMask: - db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0 + db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0 exp240: - db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48 + db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 scaleMask: db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129 \ No newline at end of file diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index be3e359..8af12c4 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -35,9 +35,9 @@ namespace randomx { static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" }; static const char* tempRegx = "xmm12"; - static const char* mantissaMask = "xmm13"; - static const char* exponentMask = "xmm14"; - static const char* scaleMask = "xmm15"; + static const char* mantissaMaskReg = "xmm13"; + static const char* exponentMaskReg = "xmm14"; + static const char* scaleMaskReg = "xmm15"; static const char* regIc = "rbx"; static const char* regIc32 = "ebx"; static const char* regIc8 = "bl"; @@ -328,7 +328,6 @@ namespace randomx { traceint(instr); } - //4 uOPs void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) { registerUsage[instr.dst].lastUsed = i; asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; @@ -489,7 +488,7 @@ namespace randomx { void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { instr.dst %= RegisterCountFlt; - asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl; + asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl; traceflt(instr); } @@ -504,8 +503,8 @@ namespace randomx { instr.dst %= RegisterCountFlt; genAddressReg(instr); asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; - asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl; - asmCode << "\torps " << tempRegx << ", " << exponentMask << std::endl; + asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl; + asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl; asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl; traceflt(instr); } diff --git a/src/common.hpp b/src/common.hpp index 671e24d..f5e9361 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -122,6 +122,16 @@ namespace randomx { return minIndex; } + constexpr int mantissaSize = 52; + constexpr int exponentSize = 11; + constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; + constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1; + constexpr int exponentBias = 1023; + constexpr int dynamicExponentBits = 4; + constexpr int staticExponentBits = 4; + constexpr uint64_t constExponentBits = 0x300; + constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1; + struct MemoryRegisters { addr_t mx, ma; uint8_t* memory = nullptr; diff --git a/src/intrin_portable.h b/src/intrin_portable.h index 4ad9726..4364610 100644 --- a/src/intrin_portable.h +++ b/src/intrin_portable.h @@ -312,12 +312,6 @@ inline __m128d load_cvt_i32x2(const void* addr) { return _mm_cvtepi32_pd(ix); } -template -constexpr uint64_t ieee_get_exponent_mask() { - static_assert(E > -1023, "Invalid exponent value"); - return (uint64_t)(E + 1023U) << 52; -} - double loadDoublePortable(const void* addr); uint64_t mulh(uint64_t, uint64_t); int64_t smulh(int64_t, int64_t); diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 0b96f5d..dc335f4 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -229,7 +229,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: 092868e4cee629a5b3848b97a52199d8a158e5b56ab9064764cda7ff656f3741" << std::endl; + std::cout << "Reference result: 6d95d8d07fa3a80771f33d1b20452b61ab2d0bf21058b5e586fad38bf3e1e0ca" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } diff --git a/src/virtual_machine.cpp b/src/virtual_machine.cpp index 3707ba7..086d438 100644 --- a/src/virtual_machine.cpp +++ b/src/virtual_machine.cpp @@ -35,30 +35,40 @@ void randomx_vm::resetRoundingMode() { initFpu(); } -constexpr int mantissaSize = 52; -constexpr int exponentSize = 11; -constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; -constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1; -constexpr int exponentBias = 1023; +namespace randomx { + + static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) { + auto exponent = entropy >> 59; //0..31 + auto mantissa = entropy & mantissaMask; + exponent += exponentBias; + exponent &= exponentMask; + exponent <<= mantissaSize; + return exponent | mantissa; + } + + static inline uint64_t getStaticExponent(uint64_t entropy) { + auto exponent = constExponentBits; + exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits; + exponent <<= mantissaSize; + return exponent; + } + + static inline uint64_t getFloatMask(uint64_t entropy) { + constexpr uint64_t mask22bit = (1ULL << 22) - 1; + return (entropy & mask22bit) | getStaticExponent(entropy); + } -static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) { - auto exponent = entropy >> 59; //0..31 - auto mantissa = entropy & mantissaMask; - exponent += exponentBias; - exponent &= exponentMask; - exponent <<= mantissaSize; - return exponent | mantissa; } void randomx_vm::initialize() { - store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0))); - store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1))); - store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2))); - store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3))); - store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4))); - store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5))); - store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6))); - store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7))); + store64(®.a[0].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(0))); + store64(®.a[0].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(1))); + store64(®.a[1].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(2))); + store64(®.a[1].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(3))); + store64(®.a[2].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(4))); + store64(®.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5))); + store64(®.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6))); + store64(®.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7))); mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask; mem.mx = program.getEntropy(10); auto addressRegisters = program.getEntropy(12); @@ -70,10 +80,8 @@ void randomx_vm::initialize() { addressRegisters >>= 1; config.readReg3 = 6 + (addressRegisters & 1); datasetOffset = (program.getEntropy(13) & randomx::DatasetExtraItems) * randomx::CacheLineSize; - constexpr uint64_t mask22bit = (1ULL << 22) - 1; - constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>(); - store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240); - store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240); + store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14))); + store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15))); } namespace randomx { diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 25e2a54..6891ad4 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -61,11 +61,10 @@ namespace randomx { template FORCE_INLINE __m128d InterpretedVm::maskRegisterExponentMantissa(__m128d x) { - constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1; - const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64)); - const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask); - x = _mm_and_pd(x, mantissaMask); - x = _mm_or_pd(x, exponentMask); + const __m128d xmantissaMask = _mm_castsi128_pd(_mm_set_epi64x(dynamicMantissaMask, dynamicMantissaMask)); + const __m128d xexponentMask = _mm_load_pd((const double*)&config.eMask); + x = _mm_and_pd(x, xmantissaMask); + x = _mm_or_pd(x, xexponentMask); return x; } diff --git a/vcxproj/benchmark.vcxproj b/vcxproj/benchmark.vcxproj index 27031e3..165f100 100644 --- a/vcxproj/benchmark.vcxproj +++ b/vcxproj/benchmark.vcxproj @@ -96,7 +96,7 @@ Level3 Disabled - true + false true