diff --git a/.gitignore b/.gitignore
index 35c1e9a..dd437d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,6 @@ obj/
 *.user
 *.suo
 .vs
-x64
+x64/
+Release/
+Debug/
\ No newline at end of file
diff --git a/makefile b/makefile
index 5585b2b..3b39f4b 100644
--- a/makefile
+++ b/makefile
@@ -3,7 +3,7 @@
 AR=gcc-ar
 PLATFORM=$(shell uname -m)
 CXXFLAGS=-std=c++11
-CCFLAGS=
+CCFLAGS=-std=c99
 ARFLAGS=rcs
 BINDIR=bin
 SRCDIR=src
@@ -80,7 +80,8 @@ $(OBJDIR)/dataset.o: $(SRCDIR)/dataset.cpp $(SRCDIR)/common.hpp $(SRCDIR)/blake2
  $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h $(SRCDIR)/dataset.hpp \
  $(SRCDIR)/superscalar_program.hpp $(SRCDIR)/instruction.hpp $(SRCDIR)/jit_compiler_x86.hpp \
  $(SRCDIR)/allocator.hpp $(SRCDIR)/virtual_memory.hpp $(SRCDIR)/superscalar.hpp \
- $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h
+ $(SRCDIR)/blake2_generator.hpp $(SRCDIR)/reciprocal.h $(SRCDIR)/argon2.h $(SRCDIR)/argon2_core.h \
+ $(SRCDIR)/intrin_portable.h
 $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compiler_x86.hpp \
  $(SRCDIR)/common.hpp $(SRCDIR)/blake2/endian.h $(SRCDIR)/configuration.h $(SRCDIR)/randomx.h \
  $(SRCDIR)/jit_compiler_x86_static.hpp $(SRCDIR)/superscalar.hpp \
@@ -90,7 +91,6 @@ $(OBJDIR)/jit_compiler_x86.o: $(SRCDIR)/jit_compiler_x86.cpp $(SRCDIR)/jit_compi
 $(OBJDIR)/jit_compiler_x86_static.o: $(SRCDIR)/jit_compiler_x86_static.S \
  $(SRCDIR)/asm/program_prologue_linux.inc $(SRCDIR)/asm/program_xmm_constants.inc \
  $(SRCDIR)/asm/program_loop_load.inc $(SRCDIR)/asm/program_read_dataset.inc \
- $(SRCDIR)/asm/program_read_dataset_light.inc \
  $(SRCDIR)/asm/program_read_dataset_sshash_init.inc \
  $(SRCDIR)/asm/program_read_dataset_sshash_fin.inc \
  $(SRCDIR)/asm/program_loop_store.inc $(SRCDIR)/asm/program_epilogue_linux.inc \
diff --git a/src/asm/program_read_dataset_light.inc b/src/asm/program_read_dataset_light.inc
deleted file mode 100644
index 65d2b8d..0000000
--- a/src/asm/program_read_dataset_light.inc
+++ /dev/null
@@ -1,5 +0,0 @@
-	xor rbp, rax                       ;# modify "mx"
-	ror rbp, 32                        ;# swap "ma" and "mx"
-	mov ecx, ebp                       ;# ecx = ma
-	and ecx, 2147483584                ;# align "ma" to the start of a cache line
-	shr ecx, 6                         ;# ecx = Dataset block number
diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp
index b73f3a8..165d016 100644
--- a/src/assembly_generator_x86.cpp
+++ b/src/assembly_generator_x86.cpp
@@ -27,12 +27,12 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 
 namespace randomx {
 
-	static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
-	static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
-	static const char* regFE[8] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
-	static const char* regF[4] = { "xmm0", "xmm1", "xmm2", "xmm3" };
-	static const char* regE[4] = { "xmm4", "xmm5", "xmm6", "xmm7" };
-	static const char* regA[4] = { "xmm8", "xmm9", "xmm10", "xmm11" };
+	static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
+	static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
+	static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
+	static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
+	static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
+	static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
 
 	static const char* tempRegx = "xmm12";
 	static const char* mantissaMask = "xmm13";
@@ -49,7 +49,9 @@ namespace randomx {
 		}
 		asmCode.str(std::string()); //clear
 		for (unsigned i = 0; i < prog.getSize(); ++i) {
+#if RANDOMX_JUMP
 			asmCode << "randomx_isn_" << i << ":" << std::endl;
+#endif
 			Instruction& instr = prog(i);
 			instr.src %= RegistersCount;
 			instr.dst %= RegistersCount;
@@ -469,14 +471,14 @@ namespace randomx {
 	}
 
 	void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
 		asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
 		traceflt(instr);
 	}
 
 	void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		genAddressReg(instr);
 		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
 		asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
@@ -484,14 +486,14 @@ namespace randomx {
 	}
 
 	void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
 		asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
 		traceflt(instr);
 	}
 
 	void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		genAddressReg(instr);
 		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
 		asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
@@ -499,20 +501,20 @@ namespace randomx {
 	}
 
 	void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl;
 		traceflt(instr);
 	}
 
 	void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
 		asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
 		traceflt(instr);
 	}
 
 	void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		genAddressReg(instr);
 		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
 		asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl;
@@ -522,7 +524,7 @@ namespace randomx {
 	}
 
 	void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
 		traceflt(instr);
 	}	
@@ -566,7 +568,7 @@ namespace randomx {
 
 	void AssemblyGeneratorX86::handleCondition(Instruction& instr, int i) {
 		const int shift = instr.getModShift();
-		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
+		const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
 		int reg = getConditionRegister();
 		int target = registerUsage[reg] + 1;
 		registerUsage[reg] = i;
@@ -579,7 +581,9 @@ namespace randomx {
 	}
 
 	void AssemblyGeneratorX86::h_COND_R(Instruction& instr, int i) {
+#if RANDOMX_JUMP
 		handleCondition(instr, i);
+#endif
 		asmCode << "\txor ecx, ecx" << std::endl;
 		asmCode << "\tcmp " << regR32[instr.src] << ", " << (int32_t)instr.getImm32() << std::endl;
 		asmCode << "\tset" << condition(instr) << " cl" << std::endl;
@@ -602,7 +606,6 @@ namespace randomx {
 #define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
 
 	InstructionGenerator AssemblyGeneratorX86::engine[256] = {
-		//Integer
 		INST_HANDLE(IADD_RS)
 		INST_HANDLE(IADD_M)
 		INST_HANDLE(ISUB_R)
@@ -620,27 +623,18 @@ namespace randomx {
 		INST_HANDLE(IROR_R)
 		INST_HANDLE(IROL_R)
 		INST_HANDLE(ISWAP_R)
-
-		//Common floating point
 		INST_HANDLE(FSWAP_R)
-
-		//Floating point group F
 		INST_HANDLE(FADD_R)
 		INST_HANDLE(FADD_M)
 		INST_HANDLE(FSUB_R)
 		INST_HANDLE(FSUB_M)
 		INST_HANDLE(FSCAL_R)
-
-		//Floating point group E
 		INST_HANDLE(FMUL_R)
 		INST_HANDLE(FDIV_M)
 		INST_HANDLE(FSQRT_R)
-
-		//Control
 		INST_HANDLE(COND_R)
 		INST_HANDLE(CFROUND)
 		INST_HANDLE(ISTORE)
-
 		INST_HANDLE(NOP)
 	};
 }
\ No newline at end of file
diff --git a/src/assembly_generator_x86.hpp b/src/assembly_generator_x86.hpp
index 60ea7ab..1c27364 100644
--- a/src/assembly_generator_x86.hpp
+++ b/src/assembly_generator_x86.hpp
@@ -19,6 +19,7 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 
 #pragma once
 
+#include "common.hpp"
 #include <sstream>
 
 namespace randomx {
@@ -48,40 +49,40 @@ namespace randomx {
 		void traceint(Instruction&);
 		void traceflt(Instruction&);
 		void tracenop(Instruction&);
-		void  h_IADD_RS(Instruction&, int);
-		void  h_IADD_M(Instruction&, int);
-		void  h_ISUB_R(Instruction&, int);
-		void  h_ISUB_M(Instruction&, int);
-		void  h_IMUL_R(Instruction&, int);
-		void  h_IMUL_M(Instruction&, int);
-		void  h_IMULH_R(Instruction&, int);
-		void  h_IMULH_M(Instruction&, int);
-		void  h_ISMULH_R(Instruction&, int);
-		void  h_ISMULH_M(Instruction&, int);
-		void  h_IMUL_RCP(Instruction&, int);
-		void  h_ISDIV_C(Instruction&, int);
-		void  h_INEG_R(Instruction&, int);
-		void  h_IXOR_R(Instruction&, int);
-		void  h_IXOR_M(Instruction&, int);
-		void  h_IROR_R(Instruction&, int);
-		void  h_IROL_R(Instruction&, int);
-		void  h_ISWAP_R(Instruction&, int);
-		void  h_FSWAP_R(Instruction&, int);
-		void  h_FADD_R(Instruction&, int);
-		void  h_FADD_M(Instruction&, int);
-		void  h_FSUB_R(Instruction&, int);
-		void  h_FSUB_M(Instruction&, int);
-		void  h_FSCAL_R(Instruction&, int);
-		void  h_FMUL_R(Instruction&, int);
-		void  h_FDIV_M(Instruction&, int);
-		void  h_FSQRT_R(Instruction&, int);
-		void  h_COND_R(Instruction&, int);
-		void  h_CFROUND(Instruction&, int);
-		void  h_ISTORE(Instruction&, int);
-		void  h_NOP(Instruction&, int);
+		void h_IADD_RS(Instruction&, int);
+		void h_IADD_M(Instruction&, int);
+		void h_ISUB_R(Instruction&, int);
+		void h_ISUB_M(Instruction&, int);
+		void h_IMUL_R(Instruction&, int);
+		void h_IMUL_M(Instruction&, int);
+		void h_IMULH_R(Instruction&, int);
+		void h_IMULH_M(Instruction&, int);
+		void h_ISMULH_R(Instruction&, int);
+		void h_ISMULH_M(Instruction&, int);
+		void h_IMUL_RCP(Instruction&, int);
+		void h_ISDIV_C(Instruction&, int);
+		void h_INEG_R(Instruction&, int);
+		void h_IXOR_R(Instruction&, int);
+		void h_IXOR_M(Instruction&, int);
+		void h_IROR_R(Instruction&, int);
+		void h_IROL_R(Instruction&, int);
+		void h_ISWAP_R(Instruction&, int);
+		void h_FSWAP_R(Instruction&, int);
+		void h_FADD_R(Instruction&, int);
+		void h_FADD_M(Instruction&, int);
+		void h_FSUB_R(Instruction&, int);
+		void h_FSUB_M(Instruction&, int);
+		void h_FSCAL_R(Instruction&, int);
+		void h_FMUL_R(Instruction&, int);
+		void h_FDIV_M(Instruction&, int);
+		void h_FSQRT_R(Instruction&, int);
+		void h_COND_R(Instruction&, int);
+		void h_CFROUND(Instruction&, int);
+		void h_ISTORE(Instruction&, int);
+		void h_NOP(Instruction&, int);
 
 		static InstructionGenerator engine[256];
 		std::stringstream asmCode;
-		int registerUsage[8];
+		int registerUsage[RegistersCount];
 	};
 }
\ No newline at end of file
diff --git a/src/common.hpp b/src/common.hpp
index f7a6b1a..3c483bf 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -51,8 +51,6 @@ namespace randomx {
 
 	static_assert(wtSum == 256,	"Sum of instruction frequencies must be 256.");
 
-	using addr_t = uint32_t;
-
 	constexpr int ArgonBlockSize = 1024;
 	constexpr int ArgonSaltSize = sizeof(RANDOMX_ARGON_SALT) - 1;
 	constexpr int CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
@@ -78,6 +76,10 @@ namespace randomx {
 #endif
 #endif
 
+#define RANDOMX_JUMP (RANDOMX_JUMP_BITS > 0)
+
+	using addr_t = uint32_t;
+
 	using int_reg_t = uint64_t;
 
 	struct fpu_reg_t {
@@ -95,6 +97,7 @@ namespace randomx {
 	constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
 	constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
 	constexpr int RegistersCount = 8;
+	constexpr int RegisterCountFlt = RegistersCount / 2;
 	constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
 	constexpr int RegisterNeedsSib = 4; //x86 r12 register
 
@@ -118,5 +121,3 @@ namespace randomx {
 	typedef void(*CacheDeallocFunc)(randomx_cache*);
 	typedef void(*CacheInitializeFunc)(randomx_cache*, const void*, size_t);
 }
-
-std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf);
diff --git a/src/configuration.h b/src/configuration.h
index e25b061..d155e4e 100644
--- a/src/configuration.h
+++ b/src/configuration.h
@@ -34,7 +34,10 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 //Number of random Cache accesses per Dataset item. Minimum is 2.
 #define RANDOMX_CACHE_ACCESSES     8
 
+//Target latency for SuperscalarHash (in cycles of the reference CPU).
 #define RANDOMX_SUPERSCALAR_LATENCY   170
+
+//The maximum size of a SuperscalarHash program (number of instructions).
 #define RANDOMX_SUPERSCALAR_MAX_SIZE  512
 
 //Dataset base size in bytes. Must be a power of 2.
@@ -61,8 +64,8 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 //Scratchpad L1 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L2.
 #define RANDOMX_SCRATCHPAD_L1      (16 * 1024)
 
-//How many register bits must be zero for a jump condition to be triggered
-#define RANDOMX_CONDITION_BITS     7
+//How many register bits must be zero for a jump condition to be triggered. If set to 0, jumps are disabled.
+#define RANDOMX_JUMP_BITS          7
 
 /*
 Instruction frequencies (per 256 opcodes)
diff --git a/src/dataset.cpp b/src/dataset.cpp
index 8321797..31c2adb 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -39,6 +39,8 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include "blake2/endian.h"
 #include "argon2.h"
 #include "argon2_core.h"
+#include "jit_compiler_x86.hpp"
+#include "intrin_portable.h"
 
 static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
 static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
@@ -146,6 +148,7 @@ namespace randomx {
 		rl[7] = rl[0] ^ superscalarAdd7;
 		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
 			mixBlock = getMixBlock(registerValue, cache->memory);
+			PREFETCHNTA(mixBlock);
 			SuperscalarProgram& prog = cache->programs[i];
 
 			executeSuperscalar(rl, prog, &cache->reciprocalCache);
diff --git a/src/dataset.hpp b/src/dataset.hpp
index 4e072ff..4458017 100644
--- a/src/dataset.hpp
+++ b/src/dataset.hpp
@@ -24,7 +24,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include <type_traits>
 #include "common.hpp"
 #include "superscalar_program.hpp"
-#include "jit_compiler_x86.hpp"
 #include "allocator.hpp"
 
 /* Global scope for C binding */
@@ -33,6 +32,10 @@ struct randomx_dataset {
 	randomx::DatasetDeallocFunc dealloc;
 };
 
+namespace randomx {
+	class JitCompilerX86;
+}
+
 /* Global scope for C binding */
 struct randomx_cache {
 	uint8_t* memory = nullptr;
diff --git a/src/instruction.cpp b/src/instruction.cpp
index 9f1b681..e1dc557 100644
--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -29,12 +29,12 @@ namespace randomx {
 	}
 
 	void Instruction::genAddressReg(std::ostream& os) const {
-		os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
+		os << (getModMem() ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
 	}
 
 	void Instruction::genAddressRegDst(std::ostream& os) const {
 		if (getModCond())
-			os << ((mod % 4) ? "L1" : "L2");
+			os << (getModMem() ? "L1" : "L2");
 		else
 			os << "L3";
 		os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
@@ -49,7 +49,7 @@ namespace randomx {
 		if(dst == RegisterNeedsDisplacement) {
 			os << ", " << (int32_t)getImm32();
 		}
-		os << ", LSH " << (int)(mod % 4) << std::endl;
+		os << ", LSH " << (int)getModMem() << std::endl;
 	}
 
 	void Instruction::h_IADD_M(std::ostream& os) const {
@@ -65,7 +65,6 @@ namespace randomx {
 		}
 	}
 
-	//1 uOP
 	void Instruction::h_ISUB_R(std::ostream& os) const {
 		if (src != dst) {
 			os << "r" << (int)dst << ", r" << (int)src << std::endl;
@@ -197,57 +196,57 @@ namespace randomx {
 	}
 
 	void Instruction::h_FSWAP_R(std::ostream& os) const {
-		const char reg = (dst >= 4) ? 'e' : 'f';
-		auto dstIndex = dst % 4;
+		const char reg = (dst >= RegisterCountFlt) ? 'e' : 'f';
+		auto dstIndex = dst % RegisterCountFlt;
 		os << reg << dstIndex << std::endl;
 	}
 
 	void Instruction::h_FADD_R(std::ostream& os) const {
-		auto dstIndex = dst % 4;
-		auto srcIndex = src % 4;
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegisterCountFlt;
 		os << "f" << dstIndex << ", a" << srcIndex << std::endl;
 	}
 
 	void Instruction::h_FADD_M(std::ostream& os) const {
-		auto dstIndex = dst % 4;
+		auto dstIndex = dst % RegisterCountFlt;
 		os << "f" << dstIndex << ", ";
 		genAddressReg(os);
 		os << std::endl;
 	}
 
 	void Instruction::h_FSUB_R(std::ostream& os) const {
-		auto dstIndex = dst % 4;
-		auto srcIndex = src % 4;
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegisterCountFlt;
 		os << "f" << dstIndex << ", a" << srcIndex << std::endl;
 	}
 
 	void Instruction::h_FSUB_M(std::ostream& os) const {
-		auto dstIndex = dst % 4;
+		auto dstIndex = dst % RegisterCountFlt;
 		os << "f" << dstIndex << ", ";
 		genAddressReg(os);
 		os << std::endl;
 	}
 
 	void Instruction::h_FSCAL_R(std::ostream& os) const {
-		auto dstIndex = dst % 4;
+		auto dstIndex = dst % RegisterCountFlt;
 		os << "f" << dstIndex << std::endl;
 	}
 
 	void Instruction::h_FMUL_R(std::ostream& os) const {
-		auto dstIndex = dst % 4;
-		auto srcIndex = src % 4;
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegisterCountFlt;
 		os << "e" << dstIndex << ", a" << srcIndex << std::endl;
 	}
 
 	void Instruction::h_FDIV_M(std::ostream& os) const {
-		auto dstIndex = dst % 4;
+		auto dstIndex = dst % RegisterCountFlt;
 		os << "e" << dstIndex << ", ";
 		genAddressReg(os);
 		os << std::endl;
 	}
 
 	void Instruction::h_FSQRT_R(std::ostream& os) const {
-		auto dstIndex = dst % 4;
+		auto dstIndex = dst % RegisterCountFlt;
 		os << "e" << dstIndex << std::endl;
 	}
 
@@ -280,7 +279,7 @@ namespace randomx {
 	}
 
 	void Instruction::h_COND_R(std::ostream& os) const {
-		os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(mod >> 5) << std::endl;
+		os << "r" << (int)dst << ", " << condition(getModCond()) << "(r" << (int)src << ", " << (int32_t)getImm32() << "), LSH " << (int)(getModShift()) << std::endl;
 	}
 
 	void  Instruction::h_ISTORE(std::ostream& os) const {
@@ -297,7 +296,6 @@ namespace randomx {
 #define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
 
 	const char* Instruction::names[256] = {
-		//Integer
 		INST_NAME(IADD_RS)
 		INST_NAME(IADD_M)
 		INST_NAME(ISUB_R)
@@ -314,33 +312,22 @@ namespace randomx {
 		INST_NAME(IXOR_M)
 		INST_NAME(IROR_R)
 		INST_NAME(ISWAP_R)
-
-		//Common floating point
 		INST_NAME(FSWAP_R)
-
-		//Floating point group F
 		INST_NAME(FADD_R)
 		INST_NAME(FADD_M)
 		INST_NAME(FSUB_R)
 		INST_NAME(FSUB_M)
 		INST_NAME(FSCAL_R)
-
-		//Floating point group E
 		INST_NAME(FMUL_R)
 		INST_NAME(FDIV_M)
 		INST_NAME(FSQRT_R)
-
-		//Control
 		INST_NAME(COND_R)
 		INST_NAME(CFROUND)
-
 		INST_NAME(ISTORE)
-
 		INST_NAME(NOP)
 	};
 
 	InstructionFormatter Instruction::engine[256] = {
-		//Integer
 		INST_HANDLE(IADD_RS)
 		INST_HANDLE(IADD_M)
 		INST_HANDLE(ISUB_R)
@@ -358,22 +345,15 @@ namespace randomx {
 		INST_HANDLE(IROR_R)
 		INST_HANDLE(IROL_R)
 		INST_HANDLE(ISWAP_R)
-
-		//Common floating point
 		INST_HANDLE(FSWAP_R)
-
-		//Floating point group F
 		INST_HANDLE(FADD_R)
 		INST_HANDLE(FADD_M)
 		INST_HANDLE(FSUB_R)
 		INST_HANDLE(FSUB_M)
 		INST_HANDLE(FSCAL_R)
-
-		//Floating point group E
 		INST_HANDLE(FMUL_R)
 		INST_HANDLE(FDIV_M)
 		INST_HANDLE(FSQRT_R)
-
 		INST_HANDLE(COND_R)
 		INST_HANDLE(CFROUND)
 		INST_HANDLE(ISTORE)
diff --git a/src/instruction.hpp b/src/instruction.hpp
index 0dc382f..f6dbc3b 100644
--- a/src/instruction.hpp
+++ b/src/instruction.hpp
@@ -103,36 +103,36 @@ namespace randomx {
 		void genAddressReg(std::ostream& os) const;
 		void genAddressImm(std::ostream& os) const;
 		void genAddressRegDst(std::ostream&) const;
-		void  h_IADD_RS(std::ostream&) const;
-		void  h_IADD_M(std::ostream&) const;
-		void  h_ISUB_R(std::ostream&) const;
-		void  h_ISUB_M(std::ostream&) const;
-		void  h_IMUL_R(std::ostream&) const;
-		void  h_IMUL_M(std::ostream&) const;
-		void  h_IMULH_R(std::ostream&) const;
-		void  h_IMULH_M(std::ostream&) const;
-		void  h_ISMULH_R(std::ostream&) const;
-		void  h_ISMULH_M(std::ostream&) const;
-		void  h_IMUL_RCP(std::ostream&) const;
-		void  h_INEG_R(std::ostream&) const;
-		void  h_IXOR_R(std::ostream&) const;
-		void  h_IXOR_M(std::ostream&) const;
-		void  h_IROR_R(std::ostream&) const;
-		void  h_IROL_R(std::ostream&) const;
-		void  h_ISWAP_R(std::ostream&) const;
-		void  h_FSWAP_R(std::ostream&) const;
-		void  h_FADD_R(std::ostream&) const;
-		void  h_FADD_M(std::ostream&) const;
-		void  h_FSUB_R(std::ostream&) const;
-		void  h_FSUB_M(std::ostream&) const;
-		void  h_FSCAL_R(std::ostream&) const;
-		void  h_FMUL_R(std::ostream&) const;
-		void  h_FDIV_M(std::ostream&) const;
-		void  h_FSQRT_R(std::ostream&) const;
-		void  h_COND_R(std::ostream&) const;
-		void  h_CFROUND(std::ostream&) const;
-		void  h_ISTORE(std::ostream&) const;
-		void  h_NOP(std::ostream&) const;
+		void h_IADD_RS(std::ostream&) const;
+		void h_IADD_M(std::ostream&) const;
+		void h_ISUB_R(std::ostream&) const;
+		void h_ISUB_M(std::ostream&) const;
+		void h_IMUL_R(std::ostream&) const;
+		void h_IMUL_M(std::ostream&) const;
+		void h_IMULH_R(std::ostream&) const;
+		void h_IMULH_M(std::ostream&) const;
+		void h_ISMULH_R(std::ostream&) const;
+		void h_ISMULH_M(std::ostream&) const;
+		void h_IMUL_RCP(std::ostream&) const;
+		void h_INEG_R(std::ostream&) const;
+		void h_IXOR_R(std::ostream&) const;
+		void h_IXOR_M(std::ostream&) const;
+		void h_IROR_R(std::ostream&) const;
+		void h_IROL_R(std::ostream&) const;
+		void h_ISWAP_R(std::ostream&) const;
+		void h_FSWAP_R(std::ostream&) const;
+		void h_FADD_R(std::ostream&) const;
+		void h_FADD_M(std::ostream&) const;
+		void h_FSUB_R(std::ostream&) const;
+		void h_FSUB_M(std::ostream&) const;
+		void h_FSCAL_R(std::ostream&) const;
+		void h_FMUL_R(std::ostream&) const;
+		void h_FDIV_M(std::ostream&) const;
+		void h_FSQRT_R(std::ostream&) const;
+		void h_COND_R(std::ostream&) const;
+		void h_CFROUND(std::ostream&) const;
+		void h_ISTORE(std::ostream&) const;
+		void h_NOP(std::ostream&) const;
 	};
 
 	static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
diff --git a/src/intrin_portable.h b/src/intrin_portable.h
index 32aba08..a28ab66 100644
--- a/src/intrin_portable.h
+++ b/src/intrin_portable.h
@@ -318,18 +318,6 @@ constexpr uint64_t ieee_get_exponent_mask() {
 	return (uint64_t)(E + 1023U) << 52;
 }
 
-template<int E>
-__m128d ieee_set_exponent(__m128d x) {
-	static_assert(E > -1023, "Invalid exponent value");
-	constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
-	const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
-	constexpr uint64_t exponent64 = (uint64_t)(E + 1023U) << 52;
-	const __m128d exponentMask = _mm_castsi128_pd(_mm_set_epi64x(exponent64, exponent64));
-	x = _mm_and_pd(x, mantissaMask);
-	x = _mm_or_pd(x, exponentMask);
-	return x;
-}
-
 double loadDoublePortable(const void* addr);
 uint64_t mulh(uint64_t, uint64_t);
 int64_t smulh(int64_t, int64_t);
diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp
index 2480aa2..7ada8e7 100644
--- a/src/jit_compiler_x86.cpp
+++ b/src/jit_compiler_x86.cpp
@@ -20,8 +20,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include <stdexcept>
 #include "jit_compiler_x86.hpp"
 
-#define RANDOMX_JUMP
-
 #if !defined(_M_X64) && !defined(__x86_64__)
 namespace randomx {
 
@@ -113,7 +111,6 @@ namespace randomx {
 	const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
 	const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
 	const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
-	const uint8_t* codeReadDatasetLight = (uint8_t*)&randomx_program_read_dataset_light;
 	const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
 	const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
 	const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
@@ -128,8 +125,7 @@ namespace randomx {
 
 	const int32_t prologueSize = codeLoopBegin - codePrologue;
 	const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
-	const int32_t readDatasetSize = codeReadDatasetLight - codeReadDataset;
-	const int32_t readDatasetLightSize = codeReadDatasetLightSshInit - codeReadDatasetLight;
+	const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
 	const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
 	const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
 	const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
@@ -299,7 +295,7 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
 		instructionOffsets.clear();
 		for (unsigned i = 0; i < 8; ++i) {
 			registerUsage[i] = -1;
@@ -336,7 +332,7 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::generateCode(Instruction& instr, int i) {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
 		instructionOffsets.push_back(codePos);
 #endif
 		auto generator = engine[instr.opcode];
@@ -467,15 +463,6 @@ namespace randomx {
 
 	void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
-		/*if (instr.src != instr.dst) {
-			emit(REX_ADD_RR);
-			emitByte(0xc0 + 8 * instr.dst + instr.src);
-		}
-		else {
-			emit(REX_81);
-			emitByte(0xc0 + instr.dst);
-			emit32(instr.getImm32());
-		}*/
 		emit(REX_LEA);
 		if (instr.dst == RegisterNeedsDisplacement)
 			emitByte(0xac);
@@ -505,14 +492,6 @@ namespace randomx {
 		emitByte((scale << 6) | (index << 3) | base);
 	}
 
-	void JitCompilerX86::h_IADD_RC(Instruction& instr, int i) {
-		registerUsage[instr.dst] = i;
-		emit(REX_LEA);
-		emitByte(0x84 + 8 * instr.dst);
-		genSIB(0, instr.src, instr.dst);
-		emit32(instr.getImm32());
-	}
-
 	void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		if (instr.src != instr.dst) {
@@ -541,14 +520,6 @@ namespace randomx {
 		}
 	}
 
-	void JitCompilerX86::h_IMUL_9C(Instruction& instr, int i) {
-		registerUsage[instr.dst] = i;
-		emit(REX_LEA);
-		emitByte(0x84 + 8 * instr.dst);
-		genSIB(3, instr.dst, instr.dst);
-		emit32(instr.getImm32());
-	}
-
 	void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		if (instr.src != instr.dst) {
@@ -645,10 +616,6 @@ namespace randomx {
 		}
 	}
 
-	void JitCompilerX86::h_ISDIV_C(Instruction& instr, int i) {
-
-	}
-
 	void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
 		registerUsage[instr.dst] = i;
 		emit(REX_NEG);
@@ -729,17 +696,14 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
 		emit(REX_ADDPD);
 		emitByte(0xc0 + instr.src + 8 * instr.dst);
-		//emit(REX_PADD);
-		//emitByte(PADD_OPCODES[instr.mod % 4]);
-		//emitByte(0xf8 + instr.dst);
 	}
 
 	void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		genAddressReg(instr);
 		emit(REX_CVTDQ2PD_XMM12);
 		emit(REX_ADDPD);
@@ -747,17 +711,14 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
 		emit(REX_SUBPD);
 		emitByte(0xc0 + instr.src + 8 * instr.dst);
-		//emit(REX_PADD);
-		//emitByte(PADD_OPCODES[instr.mod % 4]);
-		//emitByte(0xf8 + instr.dst);
 	}
 
 	void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		genAddressReg(instr);
 		emit(REX_CVTDQ2PD_XMM12);
 		emit(REX_SUBPD);
@@ -765,40 +726,20 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		emit(REX_XORPS);
 		emitByte(0xc7 + 8 * instr.dst);
 	}
 
 	void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
 		emit(REX_MULPD);
 		emitByte(0xe0 + instr.src + 8 * instr.dst);
 	}
 
-	void JitCompilerX86::h_FMUL_M(Instruction& instr, int i) {
-		instr.dst %= 4;
-		genAddressReg(instr);
-		emit(REX_CVTDQ2PD_XMM12);
-		emit(REX_ANDPS_XMM12);
-		emit(REX_MULPD);
-		emitByte(0xe4 + 8 * instr.dst);
-		emit(REX_MAXPD);
-		emitByte(0xe5 + 8 * instr.dst);
-	}
-
-	void JitCompilerX86::h_FDIV_R(Instruction& instr, int i) {
-		instr.dst %= 4;
-		instr.src %= 4;
-		emit(REX_DIVPD);
-		emitByte(0xe0 + instr.src + 8 * instr.dst);
-		emit(REX_MAXPD);
-		emitByte(0xe5 + 8 * instr.dst);
-	}
-
 	void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		genAddressReg(instr);
 		emit(REX_CVTDQ2PD_XMM12);
 		emit(REX_ANDPS_XMM12);
@@ -807,7 +748,7 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
-		instr.dst %= 4;
+		instr.dst %= RegisterCountFlt;
 		emit(SQRTPD);
 		emitByte(0xe4 + 9 * instr.dst);
 	}
@@ -883,7 +824,7 @@ namespace randomx {
 
 	void JitCompilerX86::handleCondition(Instruction& instr, int i) {
 		const int shift = instr.getModShift();
-		const int conditionMask = ((1 << RANDOMX_CONDITION_BITS) - 1) << shift;
+		const int conditionMask = ((1 << RANDOMX_JUMP_BITS) - 1) << shift;
 		int reg = getConditionRegister();
 		int target = registerUsage[reg] + 1;
 		emit(REX_ADD_I);
@@ -900,7 +841,7 @@ namespace randomx {
 	}
 
 	void JitCompilerX86::h_COND_R(Instruction& instr, int i) {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
 		handleCondition(instr, i);
 #endif
 		emit(XOR_ECX_ECX);
@@ -914,40 +855,15 @@ namespace randomx {
 		emitByte(0xc1 + 8 * instr.dst);
 	}
 
-	void JitCompilerX86::h_COND_M(Instruction& instr, int i) {
-#ifdef RANDOMX_JUMP
-		handleCondition(instr, i);
-#endif
-		emit(XOR_ECX_ECX);
-		genAddressReg(instr);
-		emit(REX_CMP_M32I);
-		emit32(instr.getImm32());
-		emitByte(0x0f);
-		emitByte(condition(instr));
-		emitByte(0xc1);
-		emit(REX_ADD_RM);
-		emitByte(0xc1 + 8 * instr.dst);
-	}
-
 	void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
 		genAddressRegDst(instr);
-		//if (instr.getModCond())
 		emit(REX_MOV_MR);
-		//else
-		//	emit(MOVNTI);
-		emitByte(0x04 + 8 * instr.src);
-		emitByte(0x06);
-	}
-
-	void JitCompilerX86::h_FSTORE(Instruction& instr, int i) {
-		genAddressRegDst(instr, true);
-		emit(MOVAPD);
 		emitByte(0x04 + 8 * instr.src);
 		emitByte(0x06);
 	}
 
 	void JitCompilerX86::h_NOP(Instruction& instr, int i) {
-		emitByte(0x90);
+		emit(NOP1);
 	}
 
 #include "instruction_weights.hpp"
diff --git a/src/jit_compiler_x86.hpp b/src/jit_compiler_x86.hpp
index 8bccb1f..964dd93 100644
--- a/src/jit_compiler_x86.hpp
+++ b/src/jit_compiler_x86.hpp
@@ -110,43 +110,36 @@ namespace randomx {
 			codePos += count;
 		}
 
-		void  h_IADD_RS(Instruction&, int);
-		void  h_IADD_M(Instruction&, int);
-		void  h_IADD_RC(Instruction&, int);
-		void  h_ISUB_R(Instruction&, int);
-		void  h_ISUB_M(Instruction&, int);
-		void  h_IMUL_9C(Instruction&, int);
-		void  h_IMUL_R(Instruction&, int);
-		void  h_IMUL_M(Instruction&, int);
-		void  h_IMULH_R(Instruction&, int);
-		void  h_IMULH_M(Instruction&, int);
-		void  h_ISMULH_R(Instruction&, int);
-		void  h_ISMULH_M(Instruction&, int);
-		void  h_IMUL_RCP(Instruction&, int);
-		void  h_ISDIV_C(Instruction&, int);
-		void  h_INEG_R(Instruction&, int);
-		void  h_IXOR_R(Instruction&, int);
-		void  h_IXOR_M(Instruction&, int);
-		void  h_IROR_R(Instruction&, int);
-		void  h_IROL_R(Instruction&, int);
-		void  h_ISWAP_R(Instruction&, int);
-		void  h_FSWAP_R(Instruction&, int);
-		void  h_FADD_R(Instruction&, int);
-		void  h_FADD_M(Instruction&, int);
-		void  h_FSUB_R(Instruction&, int);
-		void  h_FSUB_M(Instruction&, int);
-		void  h_FSCAL_R(Instruction&, int);
-		void  h_FMUL_R(Instruction&, int);
-		void  h_FMUL_M(Instruction&, int);
-		void  h_FDIV_R(Instruction&, int);
-		void  h_FDIV_M(Instruction&, int);
-		void  h_FSQRT_R(Instruction&, int);
-		void  h_COND_R(Instruction&, int);
-		void  h_COND_M(Instruction&, int);
-		void  h_CFROUND(Instruction&, int);
-		void  h_ISTORE(Instruction&, int);
-		void  h_FSTORE(Instruction&, int);
-		void  h_NOP(Instruction&, int);
+		void h_IADD_RS(Instruction&, int);
+		void h_IADD_M(Instruction&, int);
+		void h_ISUB_R(Instruction&, int);
+		void h_ISUB_M(Instruction&, int);
+		void h_IMUL_R(Instruction&, int);
+		void h_IMUL_M(Instruction&, int);
+		void h_IMULH_R(Instruction&, int);
+		void h_IMULH_M(Instruction&, int);
+		void h_ISMULH_R(Instruction&, int);
+		void h_ISMULH_M(Instruction&, int);
+		void h_IMUL_RCP(Instruction&, int);
+		void h_INEG_R(Instruction&, int);
+		void h_IXOR_R(Instruction&, int);
+		void h_IXOR_M(Instruction&, int);
+		void h_IROR_R(Instruction&, int);
+		void h_IROL_R(Instruction&, int);
+		void h_ISWAP_R(Instruction&, int);
+		void h_FSWAP_R(Instruction&, int);
+		void h_FADD_R(Instruction&, int);
+		void h_FADD_M(Instruction&, int);
+		void h_FSUB_R(Instruction&, int);
+		void h_FSUB_M(Instruction&, int);
+		void h_FSCAL_R(Instruction&, int);
+		void h_FMUL_R(Instruction&, int);
+		void h_FDIV_M(Instruction&, int);
+		void h_FSQRT_R(Instruction&, int);
+		void h_COND_R(Instruction&, int);
+		void h_CFROUND(Instruction&, int);
+		void h_ISTORE(Instruction&, int);
+		void h_NOP(Instruction&, int);
 	};
 
 }
\ No newline at end of file
diff --git a/src/jit_compiler_x86_static.S b/src/jit_compiler_x86_static.S
index 3b8e82e..04dbaa9 100644
--- a/src/jit_compiler_x86_static.S
+++ b/src/jit_compiler_x86_static.S
@@ -31,7 +31,6 @@
 .global DECL(randomx_program_loop_load)
 .global DECL(randomx_program_start)
 .global DECL(randomx_program_read_dataset)
-.global DECL(randomx_program_read_dataset_light)
 .global DECL(randomx_program_read_dataset_sshash_init)
 .global DECL(randomx_program_read_dataset_sshash_fin)
 .global DECL(randomx_program_loop_store)
@@ -66,9 +65,6 @@ DECL(randomx_program_start):
 DECL(randomx_program_read_dataset):
 	#include "asm/program_read_dataset.inc"
 
-DECL(randomx_program_read_dataset_light):
-	#include "asm/program_read_dataset_light.inc"
-
 DECL(randomx_program_read_dataset_sshash_init):
 	#include "asm/program_read_dataset_sshash_init.inc"
 
diff --git a/src/jit_compiler_x86_static.asm b/src/jit_compiler_x86_static.asm
index 3153a8f..92d2ebd 100644
--- a/src/jit_compiler_x86_static.asm
+++ b/src/jit_compiler_x86_static.asm
@@ -24,7 +24,6 @@ PUBLIC randomx_program_loop_begin
 PUBLIC randomx_program_loop_load
 PUBLIC randomx_program_start
 PUBLIC randomx_program_read_dataset
-PUBLIC randomx_program_read_dataset_light
 PUBLIC randomx_program_read_dataset_sshash_init
 PUBLIC randomx_program_read_dataset_sshash_fin
 PUBLIC randomx_dataset_init
@@ -62,10 +61,6 @@ randomx_program_read_dataset PROC
 	include asm/program_read_dataset.inc
 randomx_program_read_dataset ENDP
 
-randomx_program_read_dataset_light PROC
-	include asm/program_read_dataset_light.inc
-randomx_program_read_dataset_light ENDP
-
 randomx_program_read_dataset_sshash_init PROC
 	include asm/program_read_dataset_sshash_init.inc
 randomx_program_read_dataset_sshash_init ENDP
diff --git a/src/jit_compiler_x86_static.hpp b/src/jit_compiler_x86_static.hpp
index a3ce44f..09b4703 100644
--- a/src/jit_compiler_x86_static.hpp
+++ b/src/jit_compiler_x86_static.hpp
@@ -25,7 +25,6 @@ extern "C" {
 	void randomx_program_loop_load();
 	void randomx_program_start();
 	void randomx_program_read_dataset();
-	void randomx_program_read_dataset_light();
 	void randomx_program_read_dataset_sshash_init();
 	void randomx_program_read_dataset_sshash_fin();
 	void randomx_program_loop_store();
diff --git a/src/virtual_machine.cpp b/src/virtual_machine.cpp
index e97fad7..3707ba7 100644
--- a/src/virtual_machine.cpp
+++ b/src/virtual_machine.cpp
@@ -76,22 +76,6 @@ void randomx_vm::initialize() {
 	store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
 }
 
-//TODO
-std::ostream& operator<<(std::ostream& os, const randomx::RegisterFile& rf) {
-	for (int i = 0; i < randomx::RegistersCount; ++i)
-		os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
-	for (int i = 0; i < 4; ++i)
-		os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
-		<< "   = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec;
-	for (int i = 0; i < 4; ++i)
-		os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl
-		<< "   = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec;
-	for (int i = 0; i < 4; ++i)
-		os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl
-		<< "   = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec;
-	return os;
-}
-
 namespace randomx {
 
 	alignas(16) volatile static __m128i aesDummy;
diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp
index a5bba0f..2f69855 100644
--- a/src/vm_interpreted.cpp
+++ b/src/vm_interpreted.cpp
@@ -17,10 +17,6 @@ You should have received a copy of the GNU General Public License
 along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 */
 
-//#define TRACE
-//#define FPUCHECK
-#define RANDOMX_JUMP
-
 #include <iostream>
 #include <iomanip>
 #include <stdexcept>
@@ -33,12 +29,6 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 #include "intrin_portable.h"
 #include "reciprocal.h"
 
-#ifdef FPUCHECK
-constexpr bool fpuCheck = true;
-#else
-constexpr bool fpuCheck = false;
-#endif
-
 namespace randomx {
 
 	static int_reg_t Zero = 0;
@@ -53,49 +43,16 @@ namespace randomx {
 	void InterpretedVm<Allocator, softAes>::run(void* seed) {
 		VmBase<Allocator, softAes>::generateProgram(seed);
 		randomx_vm::initialize();
-		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
-			program(i).src %= RegistersCount;
-			program(i).dst %= RegistersCount;
-		}
 		execute();
 	}
 
 	template<class Allocator, bool softAes>
-	void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
-		for (int ic = 0; ic < RANDOMX_PROGRAM_SIZE; ++ic) {
-			executeBytecode(ic, r, f, e, a);
+	void InterpretedVm<Allocator, softAes>::executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
+		for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
+			executeBytecode(pc, r, f, e, a);
 		}
 	}
 
-	static void print(int_reg_t r) {
-		std::cout << std::hex << std::setw(16) << std::setfill('0') << r << std::endl;
-	}
-
-	static void print(__m128d f) {
-		uint64_t lo = *(((uint64_t*)&f) + 0);
-		uint64_t hi = *(((uint64_t*)&f) + 1);
-		std::cout << std::hex << std::setw(16) << std::setfill('0') << hi << '-' << std::hex << std::setw(16) << std::setfill('0') << lo << std::endl;
-	}
-
-	static void printState(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
-		for (int i = 0; i < 8; ++i) {
-			std::cout << "r" << i << " = "; print(r[i]);
-		}
-		for (int i = 0; i < 4; ++i) {
-			std::cout << "f" << i << " = "; print(f[i]);
-		}
-		for (int i = 0; i < 4; ++i) {
-			std::cout << "e" << i << " = "; print(e[i]);
-		}
-		for (int i = 0; i < 4; ++i) {
-			std::cout << "a" << i << " = "; print(a[i]);
-		}
-	}
-
-	static bool isDenormal(double x) {
-		return std::fpclassify(x) == FP_SUBNORMAL;
-	}
-
 	template<class Allocator, bool softAes>
 	FORCE_INLINE void* InterpretedVm<Allocator, softAes>::getScratchpadAddress(InstructionByteCode& ibc) {
 		uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
@@ -113,9 +70,8 @@ namespace randomx {
 	}
 
 	template<class Allocator, bool softAes>
-	void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
-		auto& ibc = byteCode[ic];
-		if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
+	void InterpretedVm<Allocator, softAes>::executeBytecode(int& pc, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
+		auto& ibc = byteCode[pc];
 		switch (ibc.type)
 		{
 			case InstructionType::IADD_RS: {
@@ -225,11 +181,11 @@ namespace randomx {
 			} break;
 
 			case InstructionType::COND_R: {
-#ifdef RANDOMX_JUMP
+#if RANDOMX_JUMP
 				*ibc.creg += (1 << ibc.shift);
-				const uint64_t conditionMask = ((1ULL << RANDOMX_CONDITION_BITS) - 1) << ibc.shift;
+				const uint64_t conditionMask = ((1ULL << RANDOMX_JUMP_BITS) - 1) << ibc.shift;
 				if ((*ibc.creg & conditionMask) == 0) {
-					ic = ibc.target;
+					pc = ibc.target;
 					break;
 				}
 #endif
@@ -251,50 +207,23 @@ namespace randomx {
 			default:
 				UNREACHABLE;
 		}
-		if (trace && ibc.type != InstructionType::NOP) {
-			if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
-				print(*ibc.idst);
-			else //if(ibc.type >= 20 && ibc.type <= 30)
-				print(0);
-		}
-#ifdef FPUCHECK
-		if (ibc.type >= 26 && ibc.type <= 30) {
-			double lo = *(((double*)ibc.fdst) + 0);
-			double hi = *(((double*)ibc.fdst) + 1);
-			if (lo <= 0 || hi <= 0) {
-				std::stringstream ss;
-				ss << "Underflow in operation " << ibc.type;
-				printState(r, f, e, a);
-				throw std::runtime_error(ss.str());
-			}
-		}
-#endif
 	}
 
 	template<class Allocator, bool softAes>
 	void InterpretedVm<Allocator, softAes>::execute() {
-		int_reg_t r[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
-		__m128d f[4];
-		__m128d e[4];
-		__m128d a[4];
+		int_reg_t r[RegistersCount] = { 0 };
+		__m128d f[RegisterCountFlt];
+		__m128d e[RegisterCountFlt];
+		__m128d a[RegisterCountFlt];
 
-		a[0] = _mm_load_pd(&reg.a[0].lo);
-		a[1] = _mm_load_pd(&reg.a[1].lo);
-		a[2] = _mm_load_pd(&reg.a[2].lo);
-		a[3] = _mm_load_pd(&reg.a[3].lo);
+		for(unsigned i = 0; i < RegisterCountFlt; ++i)
+			a[i] = _mm_load_pd(&reg.a[i].lo);
 
 		precompileProgram(r, f, e, a);
 
 		uint32_t spAddr0 = mem.mx;
 		uint32_t spAddr1 = mem.ma;
 
-		if (trace) {
-			std::cout << "execute (reg: r" << config.readReg0 << ", r" << config.readReg1 << ", r" << config.readReg2 << ", r" << config.readReg3 << ")" << std::endl;
-			std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
-			std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
-			printState(r, f, e, a);
-		}
-
 		for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
 			uint64_t spMix = r[config.readReg0] ^ r[config.readReg1];
 			spAddr0 ^= spMix;
@@ -302,31 +231,14 @@ namespace randomx {
 			spAddr1 ^= spMix >> 32;
 			spAddr1 &= ScratchpadL3Mask64;
 			
-			r[0] ^= load64(scratchpad + spAddr0 + 0);
-			r[1] ^= load64(scratchpad + spAddr0 + 8);
-			r[2] ^= load64(scratchpad + spAddr0 + 16);
-			r[3] ^= load64(scratchpad + spAddr0 + 24);
-			r[4] ^= load64(scratchpad + spAddr0 + 32);
-			r[5] ^= load64(scratchpad + spAddr0 + 40);
-			r[6] ^= load64(scratchpad + spAddr0 + 48);
-			r[7] ^= load64(scratchpad + spAddr0 + 56);
+			for (unsigned i = 0; i < RegistersCount; ++i)
+				r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
 
-			f[0] = load_cvt_i32x2(scratchpad + spAddr1 + 0);
-			f[1] = load_cvt_i32x2(scratchpad + spAddr1 + 8);
-			f[2] = load_cvt_i32x2(scratchpad + spAddr1 + 16);
-			f[3] = load_cvt_i32x2(scratchpad + spAddr1 + 24);
-			e[0] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 32));
-			e[1] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 40));
-			e[2] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 48));
-			e[3] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 56));
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				f[i] = load_cvt_i32x2(scratchpad + spAddr1 + 8 * i);
 
-			if (trace) {
-				std::cout << "iteration " << std::dec << ic << std::endl;
-				std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
-				std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
-				printState(r, f, e, a);
-				std::cout << "-----------------------------------" << std::endl;
-			}
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				e[i] = maskRegisterExponentMantissa(load_cvt_i32x2(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
 
 			executeBytecode(r, f, e, a);
 
@@ -335,72 +247,33 @@ namespace randomx {
 			datasetRead(datasetOffset + mem.ma, r);
 			std::swap(mem.mx, mem.ma);
 
-			if (trace) {
-				std::cout << "iteration " << std::dec << ic << std::endl;
-				std::cout << "spAddr " << std::hex << std::setw(8) << std::setfill('0') << spAddr1 << " / " << std::setw(8) << std::setfill('0') << spAddr0 << std::endl;
-				std::cout << "ma/mx " << std::hex << std::setw(8) << std::setfill('0') << mem.ma << std::setw(8) << std::setfill('0') << mem.mx << std::endl;
-				printState(r, f, e, a);
-				std::cout << "===================================" << std::endl;
-			}
+			for (unsigned i = 0; i < RegistersCount; ++i)
+				store64(scratchpad + spAddr1 + 8 * i, r[i]);
 
-			store64(scratchpad + spAddr1 + 0, r[0]);
-			store64(scratchpad + spAddr1 + 8, r[1]);
-			store64(scratchpad + spAddr1 + 16, r[2]);
-			store64(scratchpad + spAddr1 + 24, r[3]);
-			store64(scratchpad + spAddr1 + 32, r[4]);
-			store64(scratchpad + spAddr1 + 40, r[5]);
-			store64(scratchpad + spAddr1 + 48, r[6]);
-			store64(scratchpad + spAddr1 + 56, r[7]);
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				f[i] = _mm_xor_pd(f[i], e[i]);
 
-			f[0] = _mm_xor_pd(f[0], e[0]);
-			f[1] = _mm_xor_pd(f[1], e[1]);
-			f[2] = _mm_xor_pd(f[2], e[2]);
-			f[3] = _mm_xor_pd(f[3], e[3]);
-
-#ifdef FPUCHECK
-			for(int i = 0; i < 4; ++i) {
-				double lo = *(((double*)&f[i]) + 0);
-				double hi = *(((double*)&f[i]) + 1);
-				if (isDenormal(lo) || isDenormal(hi)) {
-					std::stringstream ss;
-					ss << "Denormal f" << i;
-					throw std::runtime_error(ss.str());
-				}
-			}
-#endif
-
-			_mm_store_pd((double*)(scratchpad + spAddr0 + 0), f[0]);
-			_mm_store_pd((double*)(scratchpad + spAddr0 + 16), f[1]);
-			_mm_store_pd((double*)(scratchpad + spAddr0 + 32), f[2]);
-			_mm_store_pd((double*)(scratchpad + spAddr0 + 48), f[3]);
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				_mm_store_pd((double*)(scratchpad + spAddr0 + 16 * i), f[i]);
 
 			spAddr0 = 0;
 			spAddr1 = 0;
 		}
 
-		store64(&reg.r[0], r[0]);
-		store64(&reg.r[1], r[1]);
-		store64(&reg.r[2], r[2]);
-		store64(&reg.r[3], r[3]);
-		store64(&reg.r[4], r[4]);
-		store64(&reg.r[5], r[5]);
-		store64(&reg.r[6], r[6]);
-		store64(&reg.r[7], r[7]);
+		for (unsigned i = 0; i < RegistersCount; ++i)
+			store64(&reg.r[i], r[i]);
 
-		_mm_store_pd(&reg.f[0].lo, f[0]);
-		_mm_store_pd(&reg.f[1].lo, f[1]);
-		_mm_store_pd(&reg.f[2].lo, f[2]);
-		_mm_store_pd(&reg.f[3].lo, f[3]);
-		_mm_store_pd(&reg.e[0].lo, e[0]);
-		_mm_store_pd(&reg.e[1].lo, e[1]);
-		_mm_store_pd(&reg.e[2].lo, e[2]);
-		_mm_store_pd(&reg.e[3].lo, e[3]);
+		for (unsigned i = 0; i < RegisterCountFlt; ++i)
+			_mm_store_pd(&reg.f[i].lo, f[i]);
+
+		for (unsigned i = 0; i < RegisterCountFlt; ++i)
+			_mm_store_pd(&reg.e[i].lo, e[i]);
 	}
 
-	static int getConditionRegister(int(&registerUsage)[8]) {
+	static int getConditionRegister(int(&registerUsage)[RegistersCount]) {
 		int min = INT_MAX;
 		int minIndex;
-		for (unsigned i = 0; i < 8; ++i) {
+		for (unsigned i = 0; i < RegistersCount; ++i) {
 			if (registerUsage[i] < min) {
 				min = registerUsage[i];
 				minIndex = i;
@@ -410,7 +283,7 @@ namespace randomx {
 	}
 
 	template<class Allocator, bool softAes>
-	void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[8]) {
+	void InterpretedVm<Allocator, softAes>::datasetRead(uint32_t address, int_reg_t(&r)[RegistersCount]) {
 		uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
 		for (int i = 0; i < RegistersCount; ++i)
 			r[i] ^= datasetLine[i];
@@ -419,9 +292,9 @@ namespace randomx {
 #include "instruction_weights.hpp"
 
 	template<class Allocator, bool softAes>
-	void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
-		int registerUsage[8];
-		for (unsigned i = 0; i < 8; ++i) {
+	void InterpretedVm<Allocator, softAes>::precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]) {
+		int registerUsage[RegistersCount];
+		for (unsigned i = 0; i < RegistersCount; ++i) {
 			registerUsage[i] = -1;
 		}
 		for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
@@ -443,7 +316,7 @@ namespace randomx {
 						ibc.shift = instr.getModMem();
 						ibc.imm = signExtend2sCompl(instr.getImm32());
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IADD_M) {
@@ -452,7 +325,7 @@ namespace randomx {
 					ibc.type = InstructionType::IADD_M;
 					ibc.idst = &r[dst];
 					ibc.imm = signExtend2sCompl(instr.getImm32());
-					if (instr.src != instr.dst) {
+					if (src != dst) {
 						ibc.isrc = &r[src];
 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
 					}
@@ -460,7 +333,7 @@ namespace randomx {
 						ibc.isrc = &Zero;
 						ibc.memMask = ScratchpadL3Mask;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(ISUB_R) {
@@ -475,7 +348,7 @@ namespace randomx {
 						ibc.imm = signExtend2sCompl(instr.getImm32());
 						ibc.isrc = &ibc.imm;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(ISUB_M) {
@@ -484,7 +357,7 @@ namespace randomx {
 					ibc.type = InstructionType::ISUB_M;
 					ibc.idst = &r[dst];
 					ibc.imm = signExtend2sCompl(instr.getImm32());
-					if (instr.src != instr.dst) {
+					if (src != dst) {
 						ibc.isrc = &r[src];
 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
 					}
@@ -492,7 +365,7 @@ namespace randomx {
 						ibc.isrc = &Zero;
 						ibc.memMask = ScratchpadL3Mask;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IMUL_R) {
@@ -507,7 +380,7 @@ namespace randomx {
 						ibc.imm = signExtend2sCompl(instr.getImm32());
 						ibc.isrc = &ibc.imm;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IMUL_M) {
@@ -516,7 +389,7 @@ namespace randomx {
 					ibc.type = InstructionType::IMUL_M;
 					ibc.idst = &r[dst];
 					ibc.imm = signExtend2sCompl(instr.getImm32());
-					if (instr.src != instr.dst) {
+					if (src != dst) {
 						ibc.isrc = &r[src];
 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
 					}
@@ -524,7 +397,7 @@ namespace randomx {
 						ibc.isrc = &Zero;
 						ibc.memMask = ScratchpadL3Mask;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IMULH_R) {
@@ -533,7 +406,7 @@ namespace randomx {
 					ibc.type = InstructionType::IMULH_R;
 					ibc.idst = &r[dst];
 					ibc.isrc = &r[src];
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IMULH_M) {
@@ -542,7 +415,7 @@ namespace randomx {
 					ibc.type = InstructionType::IMULH_M;
 					ibc.idst = &r[dst];
 					ibc.imm = signExtend2sCompl(instr.getImm32());
-					if (instr.src != instr.dst) {
+					if (src != dst) {
 						ibc.isrc = &r[src];
 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
 					}
@@ -550,7 +423,7 @@ namespace randomx {
 						ibc.isrc = &Zero;
 						ibc.memMask = ScratchpadL3Mask;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(ISMULH_R) {
@@ -559,7 +432,7 @@ namespace randomx {
 					ibc.type = InstructionType::ISMULH_R;
 					ibc.idst = &r[dst];
 					ibc.isrc = &r[src];
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(ISMULH_M) {
@@ -568,7 +441,7 @@ namespace randomx {
 					ibc.type = InstructionType::ISMULH_M;
 					ibc.idst = &r[dst];
 					ibc.imm = signExtend2sCompl(instr.getImm32());
-					if (instr.src != instr.dst) {
+					if (src != dst) {
 						ibc.isrc = &r[src];
 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
 					}
@@ -576,7 +449,7 @@ namespace randomx {
 						ibc.isrc = &Zero;
 						ibc.memMask = ScratchpadL3Mask;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IMUL_RCP) {
@@ -587,7 +460,7 @@ namespace randomx {
 						ibc.idst = &r[dst];
 						ibc.imm = randomx_reciprocal(divisor);
 						ibc.isrc = &ibc.imm;
-						registerUsage[instr.dst] = i;
+						registerUsage[dst] = i;
 					}
 					else {
 						ibc.type = InstructionType::NOP;
@@ -598,7 +471,7 @@ namespace randomx {
 					auto dst = instr.dst % RegistersCount;
 					ibc.type = InstructionType::INEG_R;
 					ibc.idst = &r[dst];
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IXOR_R) {
@@ -613,7 +486,7 @@ namespace randomx {
 						ibc.imm = signExtend2sCompl(instr.getImm32());
 						ibc.isrc = &ibc.imm;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IXOR_M) {
@@ -622,7 +495,7 @@ namespace randomx {
 					ibc.type = InstructionType::IXOR_M;
 					ibc.idst = &r[dst];
 					ibc.imm = signExtend2sCompl(instr.getImm32());
-					if (instr.src != instr.dst) {
+					if (src != dst) {
 						ibc.isrc = &r[src];
 						ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
 					}
@@ -630,7 +503,7 @@ namespace randomx {
 						ibc.isrc = &Zero;
 						ibc.memMask = ScratchpadL3Mask;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IROR_R) {
@@ -645,7 +518,7 @@ namespace randomx {
 						ibc.imm = instr.getImm32();
 						ibc.isrc = &ibc.imm;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(IROL_R) {
@@ -660,7 +533,7 @@ namespace randomx {
 						ibc.imm = instr.getImm32();
 						ibc.isrc = &ibc.imm;
 					}
-					registerUsage[instr.dst] = i;
+					registerUsage[dst] = i;
 				} break;
 
 				CASE_REP(ISWAP_R) {
@@ -670,8 +543,8 @@ namespace randomx {
 						ibc.idst = &r[dst];
 						ibc.isrc = &r[src];
 						ibc.type = InstructionType::ISWAP_R;
-						registerUsage[instr.dst] = i;
-						registerUsage[instr.src] = i;
+						registerUsage[dst] = i;
+						registerUsage[src] = i;
 					}
 					else {
 						ibc.type = InstructionType::NOP;
@@ -681,23 +554,23 @@ namespace randomx {
 				CASE_REP(FSWAP_R) {
 					auto dst = instr.dst % RegistersCount;
 					ibc.type = InstructionType::FSWAP_R;
-					if (dst < 4)
+					if (dst < RegisterCountFlt)
 						ibc.fdst = &f[dst];
 					else
-						ibc.fdst = &e[dst - 4];
+						ibc.fdst = &e[dst - RegisterCountFlt];
 				} break;
 
 				CASE_REP(FADD_R) {
-					auto dst = instr.dst % 4;
-					auto src = instr.src % 4;
+					auto dst = instr.dst % RegisterCountFlt;
+					auto src = instr.src % RegisterCountFlt;
 					ibc.type = InstructionType::FADD_R;
 					ibc.fdst = &f[dst];
 					ibc.fsrc = &a[src];
 				} break;
 
 				CASE_REP(FADD_M) {
-					auto dst = instr.dst % 4;
-					auto src = instr.src % 8;
+					auto dst = instr.dst % RegisterCountFlt;
+					auto src = instr.src % RegistersCount;
 					ibc.type = InstructionType::FADD_M;
 					ibc.fdst = &f[dst];
 					ibc.isrc = &r[src];
@@ -706,16 +579,16 @@ namespace randomx {
 				} break;
 
 				CASE_REP(FSUB_R) {
-					auto dst = instr.dst % 4;
-					auto src = instr.src % 4;
+					auto dst = instr.dst % RegisterCountFlt;
+					auto src = instr.src % RegisterCountFlt;
 					ibc.type = InstructionType::FSUB_R;
 					ibc.fdst = &f[dst];
 					ibc.fsrc = &a[src];
 				} break;
 
 				CASE_REP(FSUB_M) {
-					auto dst = instr.dst % 4;
-					auto src = instr.src % 8;
+					auto dst = instr.dst % RegisterCountFlt;
+					auto src = instr.src % RegistersCount;
 					ibc.type = InstructionType::FSUB_M;
 					ibc.fdst = &f[dst];
 					ibc.isrc = &r[src];
@@ -724,22 +597,22 @@ namespace randomx {
 				} break;
 
 				CASE_REP(FSCAL_R) {
-					auto dst = instr.dst % 4;
+					auto dst = instr.dst % RegisterCountFlt;
 					ibc.fdst = &f[dst];
 					ibc.type = InstructionType::FSCAL_R;
 				} break;
 
 				CASE_REP(FMUL_R) {
-					auto dst = instr.dst % 4;
-					auto src = instr.src % 4;
+					auto dst = instr.dst % RegisterCountFlt;
+					auto src = instr.src % RegisterCountFlt;
 					ibc.type = InstructionType::FMUL_R;
 					ibc.fdst = &e[dst];
 					ibc.fsrc = &a[src];
 				} break;
 
 				CASE_REP(FDIV_M) {
-					auto dst = instr.dst % 4;
-					auto src = instr.src % 8;
+					auto dst = instr.dst % RegisterCountFlt;
+					auto src = instr.src % RegistersCount;
 					ibc.type = InstructionType::FDIV_M;
 					ibc.fdst = &e[dst];
 					ibc.isrc = &r[src];
@@ -748,7 +621,7 @@ namespace randomx {
 				} break;
 
 				CASE_REP(FSQRT_R) {
-					auto dst = instr.dst % 4;
+					auto dst = instr.dst % RegisterCountFlt;
 					ibc.type = InstructionType::FSQRT_R;
 					ibc.fdst = &e[dst];
 				} break;
@@ -766,13 +639,13 @@ namespace randomx {
 					ibc.target = registerUsage[reg];
 					ibc.shift = instr.getModShift();
 					ibc.creg = &r[reg];
-					for (unsigned j = 0; j < 8; ++j) { //mark all registers as used
+					for (unsigned j = 0; j < RegistersCount; ++j) { //mark all registers as used
 						registerUsage[j] = i;
 					}
 				} break;
 
 				CASE_REP(CFROUND) {
-					auto src = instr.src % 8;
+					auto src = instr.src % RegistersCount;
 					ibc.isrc = &r[src];
 					ibc.type = InstructionType::CFROUND;
 					ibc.imm = instr.getImm32() & 63;
diff --git a/src/vm_interpreted.hpp b/src/vm_interpreted.hpp
index 8a15785..e3a3eb4 100644
--- a/src/vm_interpreted.hpp
+++ b/src/vm_interpreted.hpp
@@ -71,12 +71,12 @@ namespace randomx {
 		void run(void* seed) override;
 		void setDataset(randomx_dataset* dataset) override;
 	protected:
-		virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[8]);
+		virtual void datasetRead(uint32_t blockNumber, int_reg_t(&r)[RegistersCount]);
 	private:
 		void execute();
-		void precompileProgram(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
-		void executeBytecode(int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
-		void executeBytecode(int& i, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]);
+		void precompileProgram(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
+		void executeBytecode(int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
+		void executeBytecode(int& i, int_reg_t(&r)[RegistersCount], __m128d (&f)[RegisterCountFlt], __m128d (&e)[RegisterCountFlt], __m128d (&a)[RegisterCountFlt]);
 		void* getScratchpadAddress(InstructionByteCode& ibc);
 		__m128d maskRegisterExponentMantissa(__m128d);
 
diff --git a/vcxproj/benchmark.vcxproj b/vcxproj/benchmark.vcxproj
index eba548f..27031e3 100644
--- a/vcxproj/benchmark.vcxproj
+++ b/vcxproj/benchmark.vcxproj
@@ -106,7 +106,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
+      <SDLCheck>false</SDLCheck>
       <ConformanceMode>true</ConformanceMode>
     </ClCompile>
     <Link>
diff --git a/vcxproj/randomx.vcxproj b/vcxproj/randomx.vcxproj
index 0ad01ab..218975a 100644
--- a/vcxproj/randomx.vcxproj
+++ b/vcxproj/randomx.vcxproj
@@ -26,20 +26,20 @@
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
+    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
+    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>MultiByte</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
+    <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>MultiByte</CharacterSet>