diff --git a/doc/program.asm b/doc/program.asm index cced195..a1909c4 100644 --- a/doc/program.asm +++ b/doc/program.asm @@ -80,7 +80,7 @@ randomx_isn_20: add r8, 2 test r8, 254 jz randomx_isn_0 - xor rcx, rcx + xor ecx, ecx cmp r11d, 1593588996 seto cl add r14, rcx @@ -102,7 +102,7 @@ randomx_isn_24: add r8, 64 test r8, 8128 jz randomx_isn_21 - xor rcx, rcx + xor ecx, ecx cmp r8d, 149087159 setno cl add r14, rcx @@ -197,7 +197,7 @@ randomx_isn_51: add r12, 128 test r12, 16256 jz randomx_isn_25 - xor rcx, rcx + xor ecx, ecx cmp r11d, -1975981803 setbe cl add r10, rcx @@ -212,7 +212,7 @@ randomx_isn_54: add r8, 64 test r8, 8128 jz randomx_isn_52 - xor rcx, rcx + xor ecx, ecx cmp r9d, 1917049931 setns cl add r13, rcx @@ -290,7 +290,7 @@ randomx_isn_74: add r9, 4 test r9, 508 jz randomx_isn_55 - xor rcx, rcx + xor ecx, ecx cmp r11d, -1200328848 setns cl add r14, rcx @@ -352,7 +352,7 @@ randomx_isn_89: add r8, 64 test r8, 8128 jz randomx_isn_75 - xor rcx, rcx + xor ecx, ecx cmp r8d, -122257389 setno cl add r10, rcx @@ -556,7 +556,7 @@ randomx_isn_143: add r14, 4 test r14, 508 jz randomx_isn_110 - xor rcx, rcx + xor ecx, ecx cmp r9d, 880467599 setge cl add r13, rcx @@ -960,7 +960,7 @@ randomx_isn_247: add r9, 4 test r9, 508 jz randomx_isn_223 - xor rcx, rcx + xor ecx, ecx cmp r13d, -8545330 setbe cl add r10, rcx diff --git a/makefile b/makefile index e295359..5585b2b 100644 --- a/makefile +++ b/makefile @@ -4,6 +4,7 @@ AR=gcc-ar PLATFORM=$(shell uname -m) CXXFLAGS=-std=c++11 CCFLAGS= +ARFLAGS=rcs BINDIR=bin SRCDIR=src TESTDIR=src/tests @@ -21,13 +22,17 @@ ifeq ($(PLATFORM),x86_64) CXXFLAGS += -maes endif -release: CXXFLAGS += -march=native -O3 -flto -release: CCFLAGS += -march=native -O3 -flto +release: CXXFLAGS += -O3 -flto +release: CCFLAGS += -O3 -flto release: LDFLAGS += -flto release: $(BINARIES) -nolto: CXXFLAGS += -march=native -O3 -nolto: CCFLAGS += -march=native -O3 +native: CXXFLAGS += -march=native -O3 -flto +native: CCFLAGS += -march=native -O3 -flto +native: $(BINARIES) + +nolto: CXXFLAGS += -O3 +nolto: CCFLAGS += -O3 nolto: $(BINARIES) debug: CXXFLAGS += -g @@ -42,10 +47,8 @@ profile: $(BINDIR)/benchmark test: CXXFLAGS += -O0 -$(RXA): $(RXOBJS) - $(AR) rcs $@ $(RXOBJS) -$(OBJDIR)/%.o: | $(OBJDIR) -$(BINDIR)/%: | $(BINDIR) +$(RXA): $(RXOBJS) | $(BINDIR) + $(AR) $(ARFLAGS) $@ $(RXOBJS) $(OBJDIR): mkdir $(OBJDIR) $(BINDIR): @@ -65,7 +68,7 @@ $(OBJDIR)/code-generator.o: $(TESTDIR)/code-generator.cpp $(TESTDIR)/utility.hpp $(CXX) $(CXXFLAGS) -c $< -o $@ $(BINDIR)/code-generator: $(OBJDIR)/code-generator.o $(RXA) $(CXX) $(LDFLAGS) $< $(RXA) -o $@ -$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h +$(OBJDIR)/aes_hash.o: $(SRCDIR)/aes_hash.cpp $(SRCDIR)/soft_aes.h $(SRCDIR)/intrin_portable.h | $(OBJDIR) $(OBJDIR)/argon2_ref.o: $(SRCDIR)/argon2_ref.c $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \ $(SRCDIR)/blake2/blamka-round-ref.h $(SRCDIR)/blake2/blake2.h \ $(SRCDIR)/blake2/blake2-impl.h $(SRCDIR)/blake2/endian.h $(SRCDIR)/blake2/blake2-impl.h \ diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index 18fa18e..0d93752 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -34,15 +34,13 @@ namespace randomx { static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" }; static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" }; - static const char* regA4 = "xmm12"; - static const char* dblMin = "xmm13"; - static const char* absMask = "xmm14"; - static const char* signMask = "xmm15"; - static const char* regMx = "rbp"; + static const char* tempRegx = "xmm12"; + static const char* mantissaMask = "xmm13"; + static const char* exponentMask = "xmm14"; + static const char* scaleMask = "xmm15"; static const char* regIc = "rbx"; static const char* regIc32 = "ebx"; static const char* regIc8 = "bl"; - static const char* regDatasetAddr = "rdi"; static const char* regScratchpadAddr = "rsi"; void AssemblyGeneratorX86::generateProgram(Program& prog) { @@ -274,7 +272,6 @@ namespace randomx { return (int32_t)instr.getImm32() & ScratchpadL3Mask; } - //1 uOP void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) { registerUsage[instr.dst] = i; if(instr.dst == RegisterNeedsDisplacement) @@ -284,27 +281,18 @@ namespace randomx { traceint(instr); } - //2.75 uOP void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); - asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; + asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; } else { - asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; + asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; } traceint(instr); } - //1 uOP - void AssemblyGeneratorX86::h_IADD_RC(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - traceint(instr); - } - - //1 uOP void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { @@ -316,27 +304,18 @@ namespace randomx { traceint(instr); } - //2.75 uOP void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); - asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; + asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; } else { - asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; + asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; } traceint(instr); } - //1 uOP - void AssemblyGeneratorX86::h_IMUL_9C(Instruction& instr, int i) { - registerUsage[instr.dst] = i; - asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.dst] << "*8" << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl; - traceint(instr); - } - - //1 uOP void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { @@ -348,15 +327,14 @@ namespace randomx { traceint(instr); } - //2.75 uOP void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); - asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; + asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; } else { - asmCode << "\timul " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; + asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; } traceint(instr); } @@ -370,23 +348,21 @@ namespace randomx { traceint(instr); } - //5.75 uOPs void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, "ecx"); asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\tmul qword ptr [rsi+rcx]" << std::endl; + asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl; } else { asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\tmul qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; + asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; } asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; traceint(instr); } - //4 uOPs void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; @@ -395,30 +371,27 @@ namespace randomx { traceint(instr); } - //5.75 uOPs void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr, "ecx"); asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\timul qword ptr [rsi+rcx]" << std::endl; + asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl; } else { asmCode << "\tmov rax, " << regR[instr.dst] << std::endl; - asmCode << "\timul qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; + asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; } asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl; traceint(instr); } - //1 uOP void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; asmCode << "\tneg " << regR[instr.dst] << std::endl; traceint(instr); } - //1 uOP void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { @@ -430,20 +403,18 @@ namespace randomx { traceint(instr); } - //2.75 uOP void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { genAddressReg(instr); - asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+rax]" << std::endl; + asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; } else { - asmCode << "\txor " << regR[instr.dst] << ", qword ptr [rsi+" << genAddressImm(instr) << "]" << std::endl; + asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl; } traceint(instr); } - //1.75 uOPs void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { @@ -456,7 +427,6 @@ namespace randomx { traceint(instr); } - //1.75 uOPs void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) { registerUsage[instr.dst] = i; if (instr.src != instr.dst) { @@ -469,7 +439,6 @@ namespace randomx { traceint(instr); } - //2 uOPs void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { if (instr.getImm32() != 0) { registerUsage[instr.dst] = i; @@ -483,12 +452,6 @@ namespace randomx { } } - //~8.5 uOPs - void AssemblyGeneratorX86::h_ISDIV_C(Instruction& instr, int i) { - tracenop(instr); - } - - //2 uOPs void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) { if (instr.src != instr.dst) { registerUsage[instr.dst] = i; @@ -501,13 +464,11 @@ namespace randomx { } } - //1 uOPs void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) { asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl; traceflt(instr); } - //1 uOP void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; @@ -515,16 +476,14 @@ namespace randomx { traceflt(instr); } - //5 uOPs void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); - asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; - asmCode << "\taddpd " << regF[instr.dst] << ", xmm12" << std::endl; + asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl; traceflt(instr); } - //1 uOP void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; @@ -532,23 +491,20 @@ namespace randomx { traceflt(instr); } - //5 uOPs void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); - asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; - asmCode << "\tsubpd " << regF[instr.dst] << ", xmm12" << std::endl; + asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl; traceflt(instr); } - //1 uOP void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { instr.dst %= 4; - asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl; + asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl; traceflt(instr); } - //1 uOPs void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) { instr.dst %= 4; instr.src %= 4; @@ -556,45 +512,22 @@ namespace randomx { traceflt(instr); } - //7 uOPs - void AssemblyGeneratorX86::h_FMUL_M(Instruction& instr, int i) { - instr.dst %= 4; - genAddressReg(instr); - asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; - asmCode << "\tandps xmm12, xmm14" << std::endl; - asmCode << "\tmulpd " << regE[instr.dst] << ", xmm12" << std::endl; - asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl; - traceflt(instr); - } - - //2 uOPs - void AssemblyGeneratorX86::h_FDIV_R(Instruction& instr, int i) { - instr.dst %= 4; - instr.src %= 4; - asmCode << "\tdivpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl; - asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl; - traceflt(instr); - } - - //7 uOPs void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) { instr.dst %= 4; genAddressReg(instr); - asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl; - asmCode << "\tandps xmm12, xmm13" << std::endl; - asmCode << "\torps xmm12, xmm14" << std::endl; - asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl; + asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl; + asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl; + asmCode << "\torps " << tempRegx << ", " << exponentMask << std::endl; + asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl; traceflt(instr); } - //1 uOP void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) { instr.dst %= 4; asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl; traceflt(instr); } - //6 uOPs void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) { asmCode << "\tmov rax, " << regR[instr.src] << std::endl; int rotate = (13 - (instr.getImm32() & 63)) & 63; @@ -645,7 +578,6 @@ namespace randomx { } } - //4 uOPs void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) { handleCondition(instr, i); asmCode << "\txor ecx, ecx" << std::endl; @@ -655,28 +587,9 @@ namespace randomx { traceint(instr); } - //6 uOPs - void AssemblyGeneratorX86::h_COND_M(Instruction& instr, int i) { - handleCondition(instr, i); - asmCode << "\txor ecx, ecx" << std::endl; - genAddressReg(instr); - asmCode << "\tcmp dword ptr [rsi+rax], " << (int32_t)instr.getImm32() << std::endl; - asmCode << "\tset" << condition(instr) << " cl" << std::endl; - asmCode << "\tadd " << regR[instr.dst] << ", rcx" << std::endl; - traceint(instr); - } - - //3 uOPs void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) { genAddressRegDst(instr); - asmCode << "\tmov qword ptr [rsi+rax], " << regR[instr.src] << std::endl; - tracenop(instr); - } - - //3 uOPs - void AssemblyGeneratorX86::h_FSTORE(Instruction& instr, int i) { - genAddressRegDst(instr, 16); - asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl; + asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl; tracenop(instr); } @@ -692,10 +605,8 @@ namespace randomx { //Integer INST_HANDLE(IADD_RS) INST_HANDLE(IADD_M) - INST_HANDLE(IADD_RC) INST_HANDLE(ISUB_R) INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_9C) INST_HANDLE(IMUL_R) INST_HANDLE(IMUL_M) INST_HANDLE(IMULH_R) @@ -727,7 +638,6 @@ namespace randomx { //Control INST_HANDLE(COND_R) - INST_HANDLE(COND_M) INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) diff --git a/src/assembly_generator_x86.hpp b/src/assembly_generator_x86.hpp index f5b523f..60ea7ab 100644 --- a/src/assembly_generator_x86.hpp +++ b/src/assembly_generator_x86.hpp @@ -50,10 +50,8 @@ namespace randomx { void tracenop(Instruction&); void h_IADD_RS(Instruction&, int); void h_IADD_M(Instruction&, int); - void h_IADD_RC(Instruction&, int); void h_ISUB_R(Instruction&, int); void h_ISUB_M(Instruction&, int); - void h_IMUL_9C(Instruction&, int); void h_IMUL_R(Instruction&, int); void h_IMUL_M(Instruction&, int); void h_IMULH_R(Instruction&, int); @@ -75,15 +73,11 @@ namespace randomx { void h_FSUB_M(Instruction&, int); void h_FSCAL_R(Instruction&, int); void h_FMUL_R(Instruction&, int); - void h_FMUL_M(Instruction&, int); - void h_FDIV_R(Instruction&, int); void h_FDIV_M(Instruction&, int); void h_FSQRT_R(Instruction&, int); void h_COND_R(Instruction&, int); - void h_COND_M(Instruction&, int); void h_CFROUND(Instruction&, int); void h_ISTORE(Instruction&, int); - void h_FSTORE(Instruction&, int); void h_NOP(Instruction&, int); static InstructionGenerator engine[256]; diff --git a/src/common.hpp b/src/common.hpp index a35f8ec..84d0e26 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -40,13 +40,13 @@ namespace randomx { static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2."); static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1"); - constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_IADD_RC + RANDOMX_FREQ_ISUB_R + \ - RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_9C + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ + constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \ + RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \ RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \ RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_ISWAP_R + \ RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \ RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_COND_R + \ - RANDOMX_FREQ_COND_M + RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; + RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP; static_assert(wtSum == 256, "Sum of instruction frequencies must be 256."); diff --git a/src/configuration.h b/src/configuration.h index ef57775..bf10f51 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -68,10 +68,8 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_IADD_RS 32 #define RANDOMX_FREQ_IADD_M 7 -#define RANDOMX_FREQ_IADD_RC 0 #define RANDOMX_FREQ_ISUB_R 17 #define RANDOMX_FREQ_ISUB_M 7 -#define RANDOMX_FREQ_IMUL_9C 0 #define RANDOMX_FREQ_IMUL_R 16 #define RANDOMX_FREQ_IMUL_M 4 #define RANDOMX_FREQ_IMULH_R 4 @@ -97,7 +95,6 @@ Total sum of frequencies must be 256 #define RANDOMX_FREQ_FSQRT_R 6 #define RANDOMX_FREQ_COND_R 8 -#define RANDOMX_FREQ_COND_M 0 #define RANDOMX_FREQ_CFROUND 1 #define RANDOMX_FREQ_ISTORE 16 diff --git a/src/instruction.cpp b/src/instruction.cpp index 24a44b7..9f1b681 100644 --- a/src/instruction.cpp +++ b/src/instruction.cpp @@ -65,10 +65,6 @@ namespace randomx { } } - void Instruction::h_IADD_RC(std::ostream& os) const { - os << "r" << (int)dst << ", r" << (int)src << ", " << (int32_t)getImm32() << std::endl; - } - //1 uOP void Instruction::h_ISUB_R(std::ostream& os) const { if (src != dst) { @@ -92,10 +88,6 @@ namespace randomx { } } - void Instruction::h_IMUL_9C(std::ostream& os) const { - os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl; - } - void Instruction::h_IMUL_R(std::ostream& os) const { if (src != dst) { os << "r" << (int)dst << ", r" << (int)src << std::endl; @@ -200,10 +192,6 @@ namespace randomx { os << "r" << (int)dst << ", " << getImm32() << std::endl; } - void Instruction::h_ISDIV_C(std::ostream& os) const { - os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl; - } - void Instruction::h_ISWAP_R(std::ostream& os) const { os << "r" << (int)dst << ", r" << (int)src << std::endl; } @@ -251,19 +239,6 @@ namespace randomx { os << "e" << dstIndex << ", a" << srcIndex << std::endl; } - void Instruction::h_FMUL_M(std::ostream& os) const { - auto dstIndex = dst % 4; - os << "e" << dstIndex << ", "; - genAddressReg(os); - os << std::endl; - } - - void Instruction::h_FDIV_R(std::ostream& os) const { - auto dstIndex = dst % 4; - auto srcIndex = src % 4; - os << "e" << dstIndex << ", a" << srcIndex << std::endl; - } - void Instruction::h_FDIV_M(std::ostream& os) const { auto dstIndex = dst % 4; os << "e" << dstIndex << ", "; @@ -308,24 +283,11 @@ namespace randomx { os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl; } - void Instruction::h_COND_M(std::ostream& os) const { - os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "("; - genAddressReg(os); - os << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl; - } - void Instruction::h_ISTORE(std::ostream& os) const { genAddressRegDst(os); os << ", r" << (int)src << std::endl; } - void Instruction::h_FSTORE(std::ostream& os) const { - const char reg = (src >= 4) ? 'e' : 'f'; - genAddressRegDst(os); - auto srcIndex = src % 4; - os << ", " << reg << srcIndex << std::endl; - } - void Instruction::h_NOP(std::ostream& os) const { os << std::endl; } @@ -338,10 +300,8 @@ namespace randomx { //Integer INST_NAME(IADD_RS) INST_NAME(IADD_M) - INST_NAME(IADD_RC) INST_NAME(ISUB_R) INST_NAME(ISUB_M) - INST_NAME(IMUL_9C) INST_NAME(IMUL_R) INST_NAME(IMUL_M) INST_NAME(IMULH_R) @@ -372,7 +332,6 @@ namespace randomx { //Control INST_NAME(COND_R) - INST_NAME(COND_M) INST_NAME(CFROUND) INST_NAME(ISTORE) @@ -384,10 +343,8 @@ namespace randomx { //Integer INST_HANDLE(IADD_RS) INST_HANDLE(IADD_M) - INST_HANDLE(IADD_RC) INST_HANDLE(ISUB_R) INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_9C) INST_HANDLE(IMUL_R) INST_HANDLE(IMUL_M) INST_HANDLE(IMULH_R) @@ -417,13 +374,9 @@ namespace randomx { INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) - //Control INST_HANDLE(COND_R) - INST_HANDLE(COND_M) INST_HANDLE(CFROUND) - INST_HANDLE(ISTORE) - INST_HANDLE(NOP) }; diff --git a/src/instruction.hpp b/src/instruction.hpp index 323c1f5..3420cda 100644 --- a/src/instruction.hpp +++ b/src/instruction.hpp @@ -32,38 +32,34 @@ namespace randomx { namespace InstructionType { constexpr int IADD_RS = 0; constexpr int IADD_M = 1; - constexpr int IADD_RC = 2; - constexpr int ISUB_R = 3; - constexpr int ISUB_M = 4; - constexpr int IMUL_9C = 5; - constexpr int IMUL_R = 6; - constexpr int IMUL_M = 7; - constexpr int IMULH_R = 8; - constexpr int IMULH_M = 9; - constexpr int ISMULH_R = 10; - constexpr int ISMULH_M = 11; - constexpr int IMUL_RCP = 12; - constexpr int INEG_R = 13; - constexpr int IXOR_R = 14; - constexpr int IXOR_M = 15; - constexpr int IROR_R = 16; - constexpr int IROL_R = 17; - constexpr int ISWAP_R = 18; - constexpr int FSWAP_R = 19; - constexpr int FADD_R = 20; - constexpr int FADD_M = 21; - constexpr int FSUB_R = 22; - constexpr int FSUB_M = 23; - constexpr int FSCAL_R = 24; - constexpr int FMUL_R = 25; - constexpr int FDIV_M = 26; - constexpr int FSQRT_R = 27; - constexpr int COND_R = 28; - constexpr int COND_M = 29; - constexpr int CFROUND = 30; - constexpr int ISTORE = 31; - constexpr int FSTORE = 32; - constexpr int NOP = 33; + constexpr int ISUB_R = 2; + constexpr int ISUB_M = 3; + constexpr int IMUL_R = 4; + constexpr int IMUL_M = 5; + constexpr int IMULH_R = 6; + constexpr int IMULH_M = 7; + constexpr int ISMULH_R = 8; + constexpr int ISMULH_M = 9; + constexpr int IMUL_RCP = 10; + constexpr int INEG_R = 11; + constexpr int IXOR_R = 12; + constexpr int IXOR_M = 13; + constexpr int IROR_R = 14; + constexpr int IROL_R = 15; + constexpr int ISWAP_R = 16; + constexpr int FSWAP_R = 17; + constexpr int FADD_R = 18; + constexpr int FADD_M = 19; + constexpr int FSUB_R = 20; + constexpr int FSUB_M = 21; + constexpr int FSCAL_R = 22; + constexpr int FMUL_R = 23; + constexpr int FDIV_M = 24; + constexpr int FSQRT_R = 25; + constexpr int COND_R = 26; + constexpr int CFROUND = 27; + constexpr int ISTORE = 28; + constexpr int NOP = 29; } class Instruction { @@ -112,10 +108,8 @@ namespace randomx { void genAddressRegDst(std::ostream&) const; void h_IADD_RS(std::ostream&) const; void h_IADD_M(std::ostream&) const; - void h_IADD_RC(std::ostream&) const; void h_ISUB_R(std::ostream&) const; void h_ISUB_M(std::ostream&) const; - void h_IMUL_9C(std::ostream&) const; void h_IMUL_R(std::ostream&) const; void h_IMUL_M(std::ostream&) const; void h_IMULH_R(std::ostream&) const; @@ -123,7 +117,6 @@ namespace randomx { void h_ISMULH_R(std::ostream&) const; void h_ISMULH_M(std::ostream&) const; void h_IMUL_RCP(std::ostream&) const; - void h_ISDIV_C(std::ostream&) const; void h_INEG_R(std::ostream&) const; void h_IXOR_R(std::ostream&) const; void h_IXOR_M(std::ostream&) const; @@ -137,15 +130,11 @@ namespace randomx { void h_FSUB_M(std::ostream&) const; void h_FSCAL_R(std::ostream&) const; void h_FMUL_R(std::ostream&) const; - void h_FMUL_M(std::ostream&) const; - void h_FDIV_R(std::ostream&) const; void h_FDIV_M(std::ostream&) const; void h_FSQRT_R(std::ostream&) const; void h_COND_R(std::ostream&) const; - void h_COND_M(std::ostream&) const; void h_CFROUND(std::ostream&) const; void h_ISTORE(std::ostream&) const; - void h_FSTORE(std::ostream&) const; void h_NOP(std::ostream&) const; }; diff --git a/src/instructions_portable.cpp b/src/instructions_portable.cpp index cfa20ab..b77e93b 100644 --- a/src/instructions_portable.cpp +++ b/src/instructions_portable.cpp @@ -19,7 +19,7 @@ along with RandomX. If not, see. //#define DEBUG -#pragma STDC FENV_ACCESS on +#pragma STDC FENV_ACCESS ON #include #include #ifdef DEBUG diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index 1d1f80b..b24eba3 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -953,10 +953,8 @@ namespace randomx { InstructionGeneratorX86 JitCompilerX86::engine[256] = { INST_HANDLE(IADD_RS) INST_HANDLE(IADD_M) - INST_HANDLE(IADD_RC) INST_HANDLE(ISUB_R) INST_HANDLE(ISUB_M) - INST_HANDLE(IMUL_9C) INST_HANDLE(IMUL_R) INST_HANDLE(IMUL_M) INST_HANDLE(IMULH_R) @@ -980,7 +978,6 @@ namespace randomx { INST_HANDLE(FDIV_M) INST_HANDLE(FSQRT_R) INST_HANDLE(COND_R) - INST_HANDLE(COND_M) INST_HANDLE(CFROUND) INST_HANDLE(ISTORE) INST_HANDLE(NOP) diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index 22109d3..3d82b18 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -125,10 +125,6 @@ namespace randomx { *ibc.idst += load64(getScratchpadAddress(ibc)); } break; - case InstructionType::IADD_RC: { - *ibc.idst += *ibc.isrc + ibc.imm; - } break; - case InstructionType::ISUB_R: { *ibc.idst -= *ibc.isrc; } break; @@ -137,10 +133,6 @@ namespace randomx { *ibc.idst -= load64(getScratchpadAddress(ibc)); } break; - case InstructionType::IMUL_9C: { - *ibc.idst += 8 * *ibc.idst + ibc.imm; - } break; - case InstructionType::IMUL_R: { //also handles IMUL_RCP *ibc.idst *= *ibc.isrc; } break; @@ -243,18 +235,6 @@ namespace randomx { *ibc.idst += condition(ibc.condition, *ibc.isrc, ibc.imm) ? 1 : 0; } break; - case InstructionType::COND_M: { -#ifdef RANDOMX_JUMP - *ibc.creg += (1uLL << ibc.shift); - const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift; - if ((*ibc.creg & conditionMask) == 0) { - ic = ibc.target; - break; - } -#endif - *ibc.idst += condition(ibc.condition, load64(getScratchpadAddress(ibc)), ibc.imm) ? 1 : 0; - } break; - case InstructionType::CFROUND: { setRoundMode(rotr(*ibc.isrc, ibc.imm) % 4); } break; @@ -482,16 +462,6 @@ namespace randomx { registerUsage[instr.dst] = i; } break; - CASE_REP(IADD_RC) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::IADD_RC; - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - registerUsage[instr.dst] = i; - } break; - CASE_REP(ISUB_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; @@ -524,14 +494,6 @@ namespace randomx { registerUsage[instr.dst] = i; } break; - CASE_REP(IMUL_9C) { - auto dst = instr.dst % RegistersCount; - ibc.type = InstructionType::IMUL_9C; - ibc.idst = &r[dst]; - ibc.imm = signExtend2sCompl(instr.getImm32()); - registerUsage[instr.dst] = i; - } break; - CASE_REP(IMUL_R) { auto dst = instr.dst % RegistersCount; auto src = instr.src % RegistersCount; @@ -808,25 +770,6 @@ namespace randomx { } } break; - CASE_REP(COND_M) { - auto dst = instr.dst % RegistersCount; - auto src = instr.src % RegistersCount; - ibc.type = InstructionType::COND_M; - ibc.idst = &r[dst]; - ibc.isrc = &r[src]; - ibc.condition = instr.getModCond(); - ibc.imm = instr.getImm32(); - ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask); - //jump condition - int reg = getConditionRegister(registerUsage); - ibc.target = registerUsage[reg]; - ibc.shift = instr.getModShift3(); - ibc.creg = &r[reg]; - for (unsigned j = 0; j < 8; ++j) { //mark all registers as used - registerUsage[j] = i; - } - } break; - CASE_REP(CFROUND) { auto src = instr.src % 8; ibc.isrc = &r[src]; diff --git a/src/vm_interpreted_light.hpp b/src/vm_interpreted_light.hpp index 67bf49a..9e68fee 100644 --- a/src/vm_interpreted_light.hpp +++ b/src/vm_interpreted_light.hpp @@ -40,7 +40,7 @@ namespace randomx { void setDataset(randomx_dataset* dataset) override { } void setCache(randomx_cache* cache) override; protected: - virtual void datasetRead(uint32_t address, int_reg_t(&r)[8]); + void datasetRead(uint32_t address, int_reg_t(&r)[8]) override; private: randomx_cache* cachePtr; };