Print average program code size

Fixed assembly for MUL_64 and IMUL_32 Division weight 4 -> 8
2024-08-15 00:23:14 +00:00 · 2019-01-12 16:05:09 +01:00 · 2019-01-12 16:05:09 +01:00 · 1426fcbab5
commit 1426fcbab5
parent 2756bcdcfe
8 changed files with 337 additions and 225 deletions
--- a/src/AssemblyGeneratorX86.cpp
+++ b/src/AssemblyGeneratorX86.cpp
@ -222,7 +222,7 @@ namespace RandomX {
 	void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
 		genar(instr, i);
 		asmCode << "\timul rax, ";
-		if ((instr.locb & 7) >= 6) {
+		if ((instr.locb & 3) == 0) {
 			asmCode << "rax, ";
 		}
 		genbia(instr);
@ -250,7 +250,7 @@ namespace RandomX {
 	void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
 		genar(instr, i);
 		asmCode << "\tmovsxd rcx, eax" << std::endl;
-		if ((instr.locb & 7) >= 6) {
+		if ((instr.locb & 3) == 0) {
 			asmCode << "\tmov rax, " << instr.imm32 << std::endl;
 		}
 		else {
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@ -26,7 +26,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 namespace RandomX {
 	CompiledVirtualMachine::CompiledVirtualMachine(bool softAes) : VirtualMachine(softAes) {
-
+		totalSize = 0;
 	}
 	void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
@ -48,6 +48,7 @@ namespace RandomX {
 	void CompiledVirtualMachine::execute() {
 		//executeProgram(reg, mem, scratchpad, readDataset);
 		totalSize += compiler.getCodeSize();
 		compiler.getProgramFunc()(reg, mem, scratchpad);
 #ifdef TRACEVM
 		for (int32_t i = InstructionCount - 1; i >= 0; --i) {
--- a/src/CompiledVirtualMachine.hpp
+++ b/src/CompiledVirtualMachine.hpp
@ -44,10 +44,14 @@ namespace RandomX {
 		void* getProgram() {
 			return compiler.getCode();
 		}
 		uint64_t getTotalSize() {
 			return totalSize;
 		}
 	private:
 #ifdef TRACEVM
 		convertible_t tracepad[InstructionCount];
 #endif
 		JitCompilerX86 compiler;
 		uint64_t totalSize;
 	};
 }
--- a/src/JitCompilerX86.cpp
+++ b/src/JitCompilerX86.cpp
@ -116,6 +116,10 @@ namespace RandomX {
 	const int32_t readDatasetL1Offset = readDatasetL2Offset - readDatasetL1Size;
 	const int32_t epilogueOffset = readDatasetL1Offset - epilogueSize;
 	size_t JitCompilerX86::getCodeSize() {
 		return codePos - prologueSize + readDatasetL1Size + readDatasetL2Size;
 	}
 	JitCompilerX86::JitCompilerX86() {
 #ifdef _WIN32
 		code = (uint8_t*)VirtualAlloc(nullptr, CodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
@ -196,6 +200,7 @@ namespace RandomX {
 	void JitCompilerX86::genar(Instruction& instr) {
 		gena(instr);
 		emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
 		emit(0xdc580f66);
 	}
 	void JitCompilerX86::genaf(Instruction& instr) {
@ -437,7 +442,7 @@ namespace RandomX {
 	void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
 		genar(instr);
-		if (instr.locb & 3) {
+		if (instr.locb & 7) {
 #ifdef MAGIC_DIVISION
 			if (instr.imm32 != 0) {
 				uint32_t divisor = instr.imm32;
@ -496,7 +501,7 @@ namespace RandomX {
 	void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
 		genar(instr);
-		if (instr.locb & 3) {
+		if (instr.locb & 7) {
 #ifdef MAGIC_DIVISION
 			int64_t divisor = instr.imm32;
 			if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
@ -566,8 +571,8 @@ namespace RandomX {
 #ifndef MAGIC_DIVISION
 		}
 #endif
-		emit(0xc88b480b75fffa83);
+		emit(0xd8f7480575fffa83); //cmp edx,-1
-		emit(0x1274c9ff48c1d148);
+		emit(uint16_t(0x12eb)); //jmp result
 		emit(0x0fd28500000001b9);
 		emit(0x489948c96348ca45);
 		emit(uint16_t(0xf9f7)); //idiv rcx
@ -766,6 +771,10 @@ namespace RandomX {
 		emitByte(0xc3); //ret
 	}
 	void JitCompilerX86::h_NOP(Instruction& instr, int i) {
 		genar(instr);
 	}
 #include "instructionWeights.hpp"
 #define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
@ -801,6 +810,7 @@ namespace RandomX {
 		INST_HANDLE(JUMP)
 		INST_HANDLE(CALL)
 		INST_HANDLE(RET)
 		INST_HANDLE(NOP)
 	};
 #endif
--- a/src/JitCompilerX86.hpp
+++ b/src/JitCompilerX86.hpp
@ -51,6 +51,7 @@ namespace RandomX {
 		uint8_t* getCode() {
 			return code;
 		}
 		size_t getCodeSize();
 	private:
 		static InstructionGeneratorX86 engine[256];
 		uint8_t* code;
@ -114,6 +115,7 @@ namespace RandomX {
 		void h_JUMP(Instruction&, int);
 		void h_CALL(Instruction&, int);
 		void h_RET(Instruction&, int);
 		void h_NOP(Instruction&, int);
 	};
 }
--- a/src/instructionWeights.hpp
+++ b/src/instructionWeights.hpp
@ -24,12 +24,12 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #define WT_SUB_64 12
 #define WT_SUB_32 2
 #define WT_MUL_64 23
-#define WT_MULH_64 10
+#define WT_MULH_64 5
 #define WT_MUL_32 15
 #define WT_IMUL_32 15
-#define WT_IMULH_64 6
+#define WT_IMULH_64 3
-#define WT_DIV_64 4
+#define WT_DIV_64 8
-#define WT_IDIV_64 4
+#define WT_IDIV_64 8
 #define WT_AND_64 4
 #define WT_AND_32 2
 #define WT_OR_64 4
@ -50,6 +50,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #define WT_JUMP 11
 #define WT_CALL 11
 #define WT_RET 12
 #define WT_NOP 0
 constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \
@ -57,7 +58,7 @@ WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \
 WT_DIV_64 + WT_IDIV_64 + WT_AND_64 + WT_AND_32 + WT_OR_64 + \
 WT_OR_32 + WT_XOR_64 + WT_XOR_32 + WT_SHL_64 + WT_SHR_64 + \
 WT_SAR_64 + WT_ROL_64 + WT_ROR_64 + WT_FPADD + WT_FPSUB + WT_FPMUL \
-+ WT_FPDIV + WT_FPSQRT + WT_FPROUND + WT_JUMP + WT_CALL + WT_RET;
+ WT_FPDIV + WT_FPSQRT + WT_FPROUND + WT_JUMP + WT_CALL + WT_RET + WT_NOP;
 static_assert(wtSum == 256,
 	"Sum of instruction weights must be 256");
--- a/src/main.cpp
+++ b/src/main.cpp
@ -270,6 +270,8 @@ int main(int argc, char** argv) {
 		}
 		else {
 			mine(vms[0], std::ref(atomicNonce), std::ref(result), programCount, 0);
 			if (compiled)
 				std::cout << "Average program size: " << ((RandomX::CompiledVirtualMachine*)vms[0])->getTotalSize() / programCount << std::endl;
 		}
 		double elapsed = sw.getElapsed();
 		std::cout << "Calculated result: ";
--- a/src/program.inc
+++ b/src/program.inc