From a22e3b3cb009b425b59ff711c9a59a99f83778d4 Mon Sep 17 00:00:00 2001 From: tevador Date: Sat, 4 May 2019 19:40:25 +0200 Subject: [PATCH] 30% faster JIT compiler --- makefile | 3 +- randomx.sln | 11 ++ src/asm/randomx_reciprocal.inc | 7 ++ src/jit_compiler_x86.cpp | 2 +- src/jit_compiler_x86.hpp | 18 +--- src/jit_compiler_x86_static.S | 5 + src/jit_compiler_x86_static.asm | 5 + src/reciprocal.c | 2 +- src/reciprocal.h | 1 + src/tests/jit-performance.cpp | 44 ++++++++ vcxproj/jit-performance.vcxproj | 128 ++++++++++++++++++++++++ vcxproj/jit-performance.vcxproj.filters | 22 ++++ 12 files changed, 231 insertions(+), 17 deletions(-) create mode 100644 src/asm/randomx_reciprocal.inc create mode 100644 src/tests/jit-performance.cpp create mode 100644 vcxproj/jit-performance.vcxproj create mode 100644 vcxproj/jit-performance.vcxproj.filters diff --git a/makefile b/makefile index 3b39f4b..8dcefed 100644 --- a/makefile +++ b/makefile @@ -95,7 +95,8 @@ $(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \ $(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \ $(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \ $(SRCDIR)/asm/program_epilogue_store.inc $(SRCDIR)/asm/program_sshash_load.inc \ - $(SRCDIR)/asm/program_sshash_prefetch.inc $(SRCDIR)/asm/program_sshash_constants.inc + $(SRCDIR)/asm/program_sshash_prefetch.inc $(SRCDIR)/asm/program_sshash_constants.inc \ + $(SRCDIR)/asm/randomx_reciprocal.inc $(OBJDIR)/soft_aes.o: $(SRCDIR)/soft_aes.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h $(OBJDIR)/virtual_memory.o: $(SRCDIR)/virtual_memory.cpp $(SRCDIR)/virtual_memory.hpp $(OBJDIR)/vm_interpreted.o: $(SRCDIR)/vm_interpreted.cpp $(SRCDIR)/vm_interpreted.hpp \ diff --git a/randomx.sln b/randomx.sln index 024e742..abd1c69 100644 --- a/randomx.sln +++ b/randomx.sln @@ -23,6 +23,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "code-generator", "vcxproj\c EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "scratchpad-entropy", "vcxproj\scratchpad-entropy.vcxproj", "{FF8BD408-AFD8-43C6-BE98-4D03B37E840B}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jit-performance", "vcxproj\jit-performance.vcxproj", "{535F2111-FA81-4C76-A354-EDD2F9AA00E3}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -103,6 +105,14 @@ Global {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x64.Build.0 = Release|x64 {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x86.ActiveCfg = Release|Win32 {FF8BD408-AFD8-43C6-BE98-4D03B37E840B}.Release|x86.Build.0 = Release|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x64.ActiveCfg = Debug|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x64.Build.0 = Debug|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x86.ActiveCfg = Debug|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Debug|x86.Build.0 = Debug|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.ActiveCfg = Release|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x64.Build.0 = Release|x64 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.ActiveCfg = Release|Win32 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -116,6 +126,7 @@ Global {44947B9C-E6B1-4C06-BD01-F8EF43B59223} = {4A4A689F-86AF-41C0-A974-1080506D0923} {3E490DEC-1874-43AA-92DA-1AC57C217EAC} = {4A4A689F-86AF-41C0-A974-1080506D0923} {FF8BD408-AFD8-43C6-BE98-4D03B37E840B} = {4A4A689F-86AF-41C0-A974-1080506D0923} + {535F2111-FA81-4C76-A354-EDD2F9AA00E3} = {4A4A689F-86AF-41C0-A974-1080506D0923} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {4EBC03DB-AE37-4141-8147-692F16E0ED02} diff --git a/src/asm/randomx_reciprocal.inc b/src/asm/randomx_reciprocal.inc new file mode 100644 index 0000000..e1f22fd --- /dev/null +++ b/src/asm/randomx_reciprocal.inc @@ -0,0 +1,7 @@ + mov edx, 1 + mov r8, rcx + xor eax, eax + bsr rcx, rcx + shl rdx, cl + div r8 + ret \ No newline at end of file diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 82a5503..4918fc5 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -616,7 +616,7 @@ namespace randomx { if (!isPowerOf2(divisor)) { registerUsage[instr.dst].lastUsed = i; emit(MOV_RAX_I); - emit64(randomx_reciprocal(divisor)); + emit64(randomx_reciprocal_fast(divisor)); emit(REX_IMUL_RM); emitByte(0xc0 + 8 * instr.dst); } diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp index 5e64cb2..d814281 100644 --- a/src/jit_compiler_x86.hpp +++ b/src/jit_compiler_x86.hpp @@ -78,23 +78,13 @@ namespace randomx { } void emit32(uint32_t val) { - code[codePos + 0] = val; - code[codePos + 1] = val >> 8; - code[codePos + 2] = val >> 16; - code[codePos + 3] = val >> 24; - codePos += 4; + memcpy(code + codePos, &val, sizeof val); + codePos += sizeof val; } void emit64(uint64_t val) { - code[codePos + 0] = val; - code[codePos + 1] = val >> 8; - code[codePos + 2] = val >> 16; - code[codePos + 3] = val >> 24; - code[codePos + 4] = val >> 32; - code[codePos + 5] = val >> 40; - code[codePos + 6] = val >> 48; - code[codePos + 7] = val >> 56; - codePos += 8; + memcpy(code + codePos, &val, sizeof val); + codePos += sizeof val; } template diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S index 04dbaa9..3254c4c 100644 --- a/src/jit_compiler_x86_static.S +++ b/src/jit_compiler_x86_static.S @@ -42,6 +42,7 @@ .global DECL(randomx_sshash_end) .global DECL(randomx_sshash_init) .global DECL(randomx_program_end) +.global DECL(randomx_reciprocal_fast) #define db .byte @@ -158,3 +159,7 @@ DECL(randomx_sshash_init): .balign 64 DECL(randomx_program_end): nop + +DECL(randomx_reciprocal_fast): + mov rcx, rdi + #include "asm/randomx_reciprocal.inc" diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm index 92d2ebd..d515828 100644 --- a/src/jit_compiler_x86_static.asm +++ b/src/jit_compiler_x86_static.asm @@ -35,6 +35,7 @@ PUBLIC randomx_sshash_prefetch PUBLIC randomx_sshash_end PUBLIC randomx_sshash_init PUBLIC randomx_program_end +PUBLIC randomx_reciprocal_fast ALIGN 64 randomx_program_prologue PROC @@ -169,6 +170,10 @@ randomx_program_end PROC nop randomx_program_end ENDP +randomx_reciprocal_fast PROC + include asm/randomx_reciprocal.inc +randomx_reciprocal_fast ENDP + _RANDOMX_JITX86_STATIC ENDS ENDIF diff --git a/src/reciprocal.c b/src/reciprocal.c index 862d7ec..ebe02ee 100644 --- a/src/reciprocal.c +++ b/src/reciprocal.c @@ -56,4 +56,4 @@ uint64_t randomx_reciprocal(uint64_t divisor) { } return quotient; -} \ No newline at end of file +} diff --git a/src/reciprocal.h b/src/reciprocal.h index 4132d6e..68afe29 100644 --- a/src/reciprocal.h +++ b/src/reciprocal.h @@ -26,6 +26,7 @@ extern "C" { #endif uint64_t randomx_reciprocal(uint64_t); +uint64_t randomx_reciprocal_fast(uint64_t); #if defined(__cplusplus) } diff --git a/src/tests/jit-performance.cpp b/src/tests/jit-performance.cpp new file mode 100644 index 0000000..57e8c7d --- /dev/null +++ b/src/tests/jit-performance.cpp @@ -0,0 +1,44 @@ +#include "../aes_hash.hpp" +#include "../jit_compiler_x86.hpp" +#include "../program.hpp" +#include "utility.hpp" +#include "stopwatch.hpp" +#include "../blake2/blake2.h" +#include "../reciprocal.h" + +int main(int argc, char** argv) { + int count; + readInt(argc, argv, count, 1000000); + + const char seed[] = "JIT performance test seed"; + uint8_t hash[64]; + + blake2b(&hash, sizeof hash, &seed, sizeof seed, nullptr, 0); + + randomx::ProgramConfiguration config; + + randomx::Program program; + randomx::JitCompilerX86 jit; + + std::cout << "Compiling " << count << " programs..." << std::endl; + + Stopwatch sw(true); + + for (int i = 0; i < count; ++i) { + fillAes1Rx4(hash, sizeof(program), &program); + auto addressRegisters = program.getEntropy(12); + config.readReg0 = 0 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg1 = 2 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg2 = 4 + (addressRegisters & 1); + addressRegisters >>= 1; + config.readReg3 = 6 + (addressRegisters & 1); + jit.generateProgram(program, config); + } + + std::cout << "Elapsed: " << sw.getElapsed() << " s" << std::endl; + + dump((const char*)jit.getProgramFunc(), randomx::CodeSize, "program.bin"); + return 0; +} \ No newline at end of file diff --git a/vcxproj/jit-performance.vcxproj b/vcxproj/jit-performance.vcxproj new file mode 100644 index 0000000..5028e93 --- /dev/null +++ b/vcxproj/jit-performance.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {535F2111-FA81-4C76-A354-EDD2F9AA00E3} + jitperformance + 10.0.17763.0 + + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + Application + true + v141 + MultiByte + + + Application + false + v141 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + {3346a4ad-c438-4324-8b77-47a16452954b} + + + + + + \ No newline at end of file diff --git a/vcxproj/jit-performance.vcxproj.filters b/vcxproj/jit-performance.vcxproj.filters new file mode 100644 index 0000000..46a0be0 --- /dev/null +++ b/vcxproj/jit-performance.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file