Light JIT compiler - Linux

This commit is contained in:
tevador 2019-03-22 11:00:21 +01:00
parent 73a11f5c01
commit 28ed776fbe
3 changed files with 23 additions and 158 deletions

View file

@ -9,7 +9,7 @@ OBJDIR=obj
LDFLAGS=-lpthread LDFLAGS=-lpthread
CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c CPPSRC=src/argon2_core.c src/Cache.cpp src/divideByConstantCodegen.c src/Instruction.cpp src/JitCompilerX86.cpp src/Program.cpp src/VirtualMachine.cpp src/argon2_ref.c src/CompiledVirtualMachine.cpp src/executeProgram-linux.cpp src/instructionsPortable.cpp src/LightClientAsyncWorker.cpp src/softAes.cpp src/virtualMemory.cpp src/AssemblyGeneratorX86.cpp src/dataset.cpp src/hashAes1Rx4.cpp src/InterpretedVirtualMachine.cpp src/main.cpp src/TestAluFpu.cpp src/blake2/blake2b.c
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o) ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o CompiledLightVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o reciprocal.o LightClientAsyncWorker.o hashAes1Rx4.o)
ifeq ($(PLATFORM),amd64) ifeq ($(PLATFORM),amd64)
ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o
CXXFLAGS += -maes CXXFLAGS += -maes
@ -23,8 +23,13 @@ all: release
release: CXXFLAGS += -march=native -O3 -flto release: CXXFLAGS += -march=native -O3 -flto
release: CCFLAGS += -march=native -O3 -flto release: CCFLAGS += -march=native -O3 -flto
release: LDFLAGS += -flto
release: $(BINDIR)/randomx release: $(BINDIR)/randomx
nolto: CXXFLAGS += -march=native -O3
nolto: CCFLAGS += -march=native -O3
nolto: $(BINDIR)/randomx
debug: CXXFLAGS += -g debug: CXXFLAGS += -g
debug: CCFLAGS += -g debug: CCFLAGS += -g
debug: LDFLAGS += -g debug: LDFLAGS += -g
@ -59,8 +64,11 @@ $(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cp
$(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR) $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR)
$(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@ $(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h) | $(OBJDIR) $(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h JitCompilerX86.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
$(OBJDIR)/CompiledLightVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledLightVirtualMachine.cpp CompiledLightVirtualMachine.hpp common.hpp configuration.h JitCompilerX86.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledLightVirtualMachine.cpp -o $@
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp configuration.h) | $(OBJDIR) $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp configuration.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
@ -74,10 +82,10 @@ $(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h intrin
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp configuration.h) | $(OBJDIR) $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp configuration.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR) $(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc read_dataset_light.inc read_dataset_light_sub.inc)) | $(OBJDIR)
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@ $(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc) configuration.h) | $(OBJDIR) $(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc initBlock.inc) configuration.h) | $(OBJDIR)
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@ $(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR) $(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR)

View file

@ -31,8 +31,10 @@
.global DECL(randomx_program_loop_load) .global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start) .global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset) .global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_light)
.global DECL(randomx_program_loop_store) .global DECL(randomx_program_loop_store)
.global DECL(randomx_program_loop_end) .global DECL(randomx_program_loop_end)
.global DECL(randomx_program_read_dataset_light_sub)
.global DECL(randomx_program_epilogue) .global DECL(randomx_program_epilogue)
.global DECL(randomx_program_end) .global DECL(randomx_program_end)
@ -58,12 +60,21 @@ DECL(randomx_program_start):
DECL(randomx_program_read_dataset): DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc" #include "asm/program_read_dataset.inc"
DECL(randomx_program_read_dataset_light):
#include "asm/program_read_dataset_light.inc"
DECL(randomx_program_loop_store): DECL(randomx_program_loop_store):
#include "asm/program_loop_store.inc" #include "asm/program_loop_store.inc"
DECL(randomx_program_loop_end): DECL(randomx_program_loop_end):
nop nop
.balign 64
DECL(randomx_program_read_dataset_light_sub):
#include "asm/program_read_dataset_light_sub.inc"
squareHashSub:
#include "asm/squareHash.inc"
.balign 64 .balign 64
DECL(randomx_program_epilogue): DECL(randomx_program_epilogue):
#include "asm/program_epilogue_linux.inc" #include "asm/program_epilogue_linux.inc"

View file

@ -1,154 +0,0 @@
;# 90 address transformations
;# forced REX prefix is used to make all transformations 4 bytes long
lea eax, [rax+rax*8+109]
db 64
xor eax, 96
lea eax, [rax+rax*8-19]
db 64
add eax, -98
db 64
add eax, -21
db 64
xor eax, -80
lea eax, [rax+rax*8-92]
db 64
add eax, 113
lea eax, [rax+rax*8+100]
db 64
add eax, -39
db 64
xor eax, 120
lea eax, [rax+rax*8-119]
db 64
add eax, -113
db 64
add eax, 111
db 64
xor eax, 104
lea eax, [rax+rax*8-83]
lea eax, [rax+rax*8+127]
db 64
xor eax, -112
db 64
add eax, 89
db 64
add eax, -32
db 64
add eax, 104
db 64
xor eax, -120
db 64
xor eax, 24
lea eax, [rax+rax*8+9]
db 64
add eax, -31
db 64
xor eax, -16
db 64
add eax, 68
lea eax, [rax+rax*8-110]
db 64
xor eax, 64
db 64
xor eax, -40
db 64
xor eax, -8
db 64
add eax, -10
db 64
xor eax, -32
db 64
add eax, 14
lea eax, [rax+rax*8-46]
db 64
xor eax, -104
lea eax, [rax+rax*8+36]
db 64
add eax, 100
lea eax, [rax+rax*8-65]
lea eax, [rax+rax*8+27]
lea eax, [rax+rax*8+91]
db 64
add eax, -101
db 64
add eax, -94
lea eax, [rax+rax*8-10]
db 64
xor eax, 80
db 64
add eax, -108
db 64
add eax, -58
db 64
xor eax, 48
lea eax, [rax+rax*8+73]
db 64
xor eax, -48
db 64
xor eax, 32
db 64
xor eax, -96
db 64
add eax, 118
db 64
add eax, 91
lea eax, [rax+rax*8+18]
db 64
add eax, -11
lea eax, [rax+rax*8+63]
db 64
add eax, 114
lea eax, [rax+rax*8+45]
db 64
add eax, -67
db 64
add eax, 53
lea eax, [rax+rax*8-101]
lea eax, [rax+rax*8-1]
db 64
xor eax, 16
lea eax, [rax+rax*8-37]
lea eax, [rax+rax*8-28]
lea eax, [rax+rax*8-55]
db 64
xor eax, -88
db 64
xor eax, -72
db 64
add eax, 36
db 64
xor eax, -56
db 64
add eax, 116
db 64
xor eax, 88
db 64
xor eax, -128
db 64
add eax, 50
db 64
add eax, 105
db 64
add eax, -37
db 64
xor eax, 112
db 64
xor eax, 8
db 64
xor eax, -24
lea eax, [rax+rax*8+118]
db 64
xor eax, 72
db 64
xor eax, -64
db 64
add eax, 40
lea eax, [rax+rax*8-74]
lea eax, [rax+rax*8+82]
lea eax, [rax+rax*8+54]
db 64
xor eax, 56
db 64
xor eax, 40
db 64
add eax, 87