mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	RandomX portable interpreter
This commit is contained in:
		
							parent
							
								
									072130c774
								
							
						
					
					
						commit
						c9102ee88c
					
				
					 31 changed files with 19262 additions and 0 deletions
				
			
		
							
								
								
									
										6
									
								
								.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								.gitignore
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,6 @@ | |||
| bin/ | ||||
| obj/ | ||||
| *.user | ||||
| *.suo | ||||
| .vs | ||||
| 
 | ||||
							
								
								
									
										77
									
								
								makefile
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								makefile
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| #CXX=g++-8
 | ||||
| #CC=gcc-8
 | ||||
| PLATFORM=$(shell uname -i) | ||||
| CXXFLAGS=-std=c++17 | ||||
| CCFLAGS= | ||||
| ifeq ($(PLATFORM),x86_64) | ||||
|     CXXFLAGS += -maes | ||||
| endif | ||||
| BINDIR=bin | ||||
| SRCDIR=src | ||||
| OBJDIR=obj | ||||
| LDFLAGS= | ||||
| TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) | ||||
| ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o blake2b.o dataset.o instructionsPortable.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o) | ||||
| SRC1=$(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | ||||
| 
 | ||||
| all: release test | ||||
| 
 | ||||
| release: CXXFLAGS += -march=native -O3 -flto | ||||
| release: CCFLAGS += -march=native -O3 -flto | ||||
| release: $(BINDIR)/randomx | ||||
| 
 | ||||
| debug: CXXFLAGS += -g | ||||
| debug: CCFLAGS += -g | ||||
| debug: LDFLAGS += -g | ||||
| debug: $(BINDIR)/randomx | ||||
| 
 | ||||
| test: CXXFLAGS += -O0 | ||||
| test: $(BINDIR)/AluFpuTest | ||||
| 
 | ||||
| $(BINDIR)/randomx: $(ROBJS) | $(BINDIR) | ||||
| 	$(CXX) $(ROBJS) $(LDFLAGS) -o $@  | ||||
| 
 | ||||
| $(BINDIR)/AluFpuTest: $(TOBJS) | $(BINDIR) | ||||
| 	$(CXX) $(TOBJS) $(LDFLAGS) -o $@ | ||||
|    | ||||
| $(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/TestAluFpu.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blake2/blake2.h blake2/blake2-impl.h) | $(OBJDIR) | ||||
| 	$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_core.c -o $@ | ||||
|    | ||||
| $(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h) | $(OBJDIR) | ||||
| 	$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@ | ||||
| 
 | ||||
| $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h) | $(OBJDIR) | ||||
| 	$(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@ | ||||
|    | ||||
| $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMachine.hpp common.hpp dataset.hpp) | $(OBJDIR) | ||||
| 	$(CXX) $(CXXFLAGS) -c $(SRCDIR)/VirtualMachine.cpp -o $@ | ||||
|    | ||||
| $(OBJDIR): | ||||
| 	mkdir $(OBJDIR) | ||||
|    | ||||
| $(BINDIR): | ||||
| 	mkdir $(BINDIR) | ||||
| 
 | ||||
| clean: | ||||
| 	rm -f $(BINDIR)/randomx $(BINDIR)/AluFpuTest $(OBJDIR)/*.o | ||||
							
								
								
									
										336
									
								
								src/InterpretedVirtualMachine.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										336
									
								
								src/InterpretedVirtualMachine.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,336 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| //#define TRACE
 | ||||
| //#define FPUCHECK
 | ||||
| #include "InterpretedVirtualMachine.hpp" | ||||
| #include "Pcg32.hpp" | ||||
| #include "instructions.hpp" | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <stdexcept> | ||||
| #include <sstream> | ||||
| #include <cmath> | ||||
| 
 | ||||
| #ifdef FPUCHECK | ||||
| constexpr bool fpuCheck = true; | ||||
| #else | ||||
| constexpr bool fpuCheck = false; | ||||
| #endif | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::initializeProgram(const void* seed) { | ||||
| 		Pcg32 gen(seed); | ||||
| 		for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) { | ||||
| 			*(((uint32_t*)®) + i) = gen(); | ||||
| 		} | ||||
| 		FPINIT(); | ||||
| 		for (int i = 0; i < 8; ++i) { | ||||
| 			reg.f[i].f64 = (double)reg.f[i].i64; | ||||
| 		} | ||||
| 		p.initialize(gen); | ||||
| 		mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7; | ||||
| 		mem.mx = *(((uint32_t*)seed) + 5); | ||||
| 		pc = 0; | ||||
| 		ic = InstructionCount; | ||||
| 		stack.clear(); | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::execute() { | ||||
| 		while (ic > 0) { | ||||
| 			auto& inst = p(pc); | ||||
| 			if(trace) std::cout << p.getName(inst) << " (" << std::dec << pc << ")" << std::endl; | ||||
| 			pc = (pc + 1) % ProgramLength; | ||||
| 			auto handler = engine[inst.opcode]; | ||||
| 			(this->*handler)(inst); | ||||
| 			ic--; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	convertible_t InterpretedVirtualMachine::loada(Instruction& inst) { | ||||
| 		convertible_t& rega = reg.r[inst.rega % RegistersCount]; | ||||
| 		rega.u64 ^= inst.addr0; | ||||
| 		addr_t addr = rega.u32; | ||||
| 		switch (inst.loca & 7) | ||||
| 		{ | ||||
| 			case 0: | ||||
| 			case 1: | ||||
| 			case 2: | ||||
| 			case 3: | ||||
| 				return readDataset(addr, mem); | ||||
| 
 | ||||
| 			case 4: | ||||
| 				return scratchpad[addr % ScratchpadL2]; | ||||
| 
 | ||||
| 			case 5: | ||||
| 			case 6: | ||||
| 			case 7: | ||||
| 				return scratchpad[addr % ScratchpadL1]; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) { | ||||
| 		switch (inst.loca & 7) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 		case 1: | ||||
| 		case 2: | ||||
| 		case 3: | ||||
| 		case 4: | ||||
| 		case 5: | ||||
| 			return reg.r[inst.regb % RegistersCount]; | ||||
| 		case 6: | ||||
| 		case 7: | ||||
| 			convertible_t temp; | ||||
| 			temp.i64 = inst.imm1; | ||||
| 			return temp; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	convertible_t InterpretedVirtualMachine::loadbr0(Instruction& inst) { | ||||
| 		switch (inst.locb & 7) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 		case 1: | ||||
| 		case 2: | ||||
| 		case 3: | ||||
| 		case 4: | ||||
| 		case 5: | ||||
| 			return reg.r[inst.regb % RegistersCount]; | ||||
| 		case 6: | ||||
| 		case 7: | ||||
| 			convertible_t temp; | ||||
| 			temp.u64 = inst.imm0; | ||||
| 			return temp; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	double InterpretedVirtualMachine::loadbf(Instruction& inst) { | ||||
| 		switch (inst.locb & 7) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 		case 1: | ||||
| 		case 2: | ||||
| 		case 3: | ||||
| 		case 4: | ||||
| 		case 5: | ||||
| 			return reg.f[inst.regb % RegistersCount].f64; | ||||
| 		case 6: | ||||
| 		case 7: | ||||
| 			return (double)inst.imm1; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	convertible_t& InterpretedVirtualMachine::getcr(Instruction& inst) { | ||||
| 		addr_t addr; | ||||
| 		switch (inst.locc & 7) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 			addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addr1; | ||||
| 			return scratchpad[addr % ScratchpadL2]; | ||||
| 
 | ||||
| 		case 1: | ||||
| 		case 2: | ||||
| 		case 3: | ||||
| 			addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addr1; | ||||
| 			return scratchpad[addr % ScratchpadL1]; | ||||
| 
 | ||||
| 		case 4: | ||||
| 		case 5: | ||||
| 		case 6: | ||||
| 		case 7: | ||||
| 			return reg.r[inst.regc % RegistersCount]; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	convertible_t& InterpretedVirtualMachine::getcf(Instruction& inst) { | ||||
| 		addr_t addr; | ||||
| 		switch (inst.locc & 7) | ||||
| 		{ | ||||
| 		case 0: | ||||
| 			addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addr1; | ||||
| 			return scratchpad[addr % ScratchpadL2]; | ||||
| 
 | ||||
| 		case 1: | ||||
| 		case 2: | ||||
| 		case 3: | ||||
| 			addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addr1; | ||||
| 			return scratchpad[addr % ScratchpadL1]; | ||||
| 
 | ||||
| 		case 4: | ||||
| 		case 5: | ||||
| 		case 6: | ||||
| 		case 7: | ||||
| 			return reg.f[inst.regc % RegistersCount]; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| #define ALU_RETIRE(x) x(a, b, c); \ | ||||
| 	if(trace) std::cout << std::hex << a.u64 << " " << b.u64 << " " << c.u64 << std::endl; | ||||
| 
 | ||||
| #define FPU_RETIRE(x) x(a, b, c); \ | ||||
| 	if(trace) { \ | ||||
| 		convertible_t bc; \ | ||||
| 		bc.f64 = b; \ | ||||
| 		std::cout << std::hex << a.u64 << " " << bc.u64 << " " << c.u64 << std::endl; \ | ||||
| 	} \ | ||||
| 	if(fpuCheck) { \ | ||||
| 		convertible_t bc; \ | ||||
| 		if(c.f64 != c.f64)  { \ | ||||
| 			std::stringstream ss; \ | ||||
| 			bc.f64 = b; \ | ||||
| 			ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \ | ||||
| 			throw std::runtime_error(ss.str()); \ | ||||
| 		} else if (std::fpclassify(c.f64) == FP_SUBNORMAL) {\ | ||||
| 			std::stringstream ss; \ | ||||
| 			bc.f64 = b; \ | ||||
| 			ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \ | ||||
| 			throw std::runtime_error(ss.str()); \ | ||||
| 		} \ | ||||
| 	} | ||||
| 
 | ||||
| #define FPU_RETIRE_NB(x) x(a, b, c); \ | ||||
| 	if(trace) std::cout << std::hex << a.u64 << " " << c.u64 << std::endl; | ||||
| 
 | ||||
| #define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ | ||||
| 	convertible_t a = loada(inst); \ | ||||
| 	convertible_t b = loadbr1(inst); \ | ||||
| 	convertible_t& c = getcr(inst); \ | ||||
| 	ALU_RETIRE(x) \ | ||||
| 	} | ||||
| 
 | ||||
| #define ALU_INST_SR(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ | ||||
| 	convertible_t a = loada(inst); \ | ||||
| 	convertible_t b = loadbr0(inst); \ | ||||
| 	convertible_t& c = getcr(inst); \ | ||||
| 	ALU_RETIRE(x) \ | ||||
| 	} | ||||
| 
 | ||||
| #define FPU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ | ||||
| 	convertible_t a = loada(inst); \ | ||||
| 	double b = loadbf(inst); \ | ||||
| 	convertible_t& c = getcf(inst); \ | ||||
| 	FPU_RETIRE(x) \ | ||||
| 	} | ||||
| 
 | ||||
| #define FPU_INST_NB(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \ | ||||
| 	convertible_t a = loada(inst); \ | ||||
| 	convertible_t b; \ | ||||
| 	convertible_t& c = getcf(inst); \ | ||||
| 	FPU_RETIRE_NB(x) \ | ||||
| 	} | ||||
| 
 | ||||
| 	ALU_INST(ADD_64) | ||||
| 	ALU_INST(ADD_32) | ||||
| 	ALU_INST(SUB_64) | ||||
| 	ALU_INST(SUB_32) | ||||
| 	ALU_INST(MUL_64) | ||||
| 	ALU_INST(MULH_64) | ||||
| 	ALU_INST(MUL_32) | ||||
| 	ALU_INST(IMUL_32) | ||||
| 	ALU_INST(IMULH_64) | ||||
| 	ALU_INST(DIV_64) | ||||
| 	ALU_INST(IDIV_64) | ||||
| 	ALU_INST(AND_64) | ||||
| 	ALU_INST(AND_32) | ||||
| 	ALU_INST(OR_64) | ||||
| 	ALU_INST(OR_32) | ||||
| 	ALU_INST(XOR_64) | ||||
| 	ALU_INST(XOR_32) | ||||
| 
 | ||||
| 	ALU_INST_SR(SHL_64) | ||||
| 	ALU_INST_SR(SHR_64) | ||||
| 	ALU_INST_SR(SAR_64) | ||||
| 	ALU_INST_SR(ROL_64) | ||||
| 	ALU_INST_SR(ROR_64) | ||||
| 
 | ||||
| 	FPU_INST(FPADD) | ||||
| 	FPU_INST(FPSUB) | ||||
| 	FPU_INST(FPMUL) | ||||
| 	FPU_INST(FPDIV) | ||||
| 
 | ||||
| 	FPU_INST_NB(FPSQRT) | ||||
| 	FPU_INST_NB(FPROUND) | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::h_CALL(Instruction& inst) { | ||||
| 		convertible_t a = loada(inst); | ||||
| 		convertible_t b = loadbr1(inst); | ||||
| 		convertible_t& c = getcr(inst); | ||||
| 		if (b.u32 <= (uint32_t)inst.imm1) { | ||||
| 			stackPush(a); | ||||
| 			stackPush(pc); | ||||
| 			pc += (inst.imm0 & 127) + 1; | ||||
| 			pc = pc % ProgramLength; | ||||
| 		} | ||||
| 		else { | ||||
| 			c.u64 = a.u64; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void InterpretedVirtualMachine::h_RET(Instruction& inst) { | ||||
| 		convertible_t a = loada(inst); | ||||
| 		convertible_t b = loadbr1(inst); | ||||
| 		convertible_t& c = getcr(inst); | ||||
| 		if (stack.size() > 0 && b.u32 <= (uint32_t)inst.imm1) { | ||||
| 			auto raddr = stackPopAddress(); | ||||
| 			auto retval = stackPopValue(); | ||||
| 			c.u64 = a.u64 ^ retval.u64; | ||||
| 			pc = raddr; | ||||
| 		} | ||||
| 		else { | ||||
| 			c.u64 = a.u64; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| #define INST_HANDLE(x) REPN(&InterpretedVirtualMachine::h_##x, WT(x)) | ||||
| 
 | ||||
| 	InstructionHandler InterpretedVirtualMachine::engine[256] = { | ||||
| 		INST_HANDLE(ADD_64) | ||||
| 		INST_HANDLE(ADD_32) | ||||
| 		INST_HANDLE(SUB_64) | ||||
| 		INST_HANDLE(SUB_32) | ||||
| 		INST_HANDLE(MUL_64) | ||||
| 		INST_HANDLE(MULH_64) | ||||
| 		INST_HANDLE(MUL_32) | ||||
| 		INST_HANDLE(IMUL_32) | ||||
| 		INST_HANDLE(IMULH_64) | ||||
| 		INST_HANDLE(DIV_64) | ||||
| 		INST_HANDLE(IDIV_64) | ||||
| 		INST_HANDLE(AND_64) | ||||
| 		INST_HANDLE(AND_32) | ||||
| 		INST_HANDLE(OR_64) | ||||
| 		INST_HANDLE(OR_32) | ||||
| 		INST_HANDLE(XOR_64) | ||||
| 		INST_HANDLE(XOR_32) | ||||
| 		INST_HANDLE(SHL_64) | ||||
| 		INST_HANDLE(SHR_64) | ||||
| 		INST_HANDLE(SAR_64) | ||||
| 		INST_HANDLE(ROL_64) | ||||
| 		INST_HANDLE(ROR_64) | ||||
| 		INST_HANDLE(FPADD) | ||||
| 		INST_HANDLE(FPSUB) | ||||
| 		INST_HANDLE(FPMUL) | ||||
| 		INST_HANDLE(FPDIV) | ||||
| 		INST_HANDLE(FPSQRT) | ||||
| 		INST_HANDLE(FPROUND) | ||||
| 		INST_HANDLE(CALL) | ||||
| 		INST_HANDLE(RET) | ||||
| 	}; | ||||
| } | ||||
							
								
								
									
										106
									
								
								src/InterpretedVirtualMachine.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								src/InterpretedVirtualMachine.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,106 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "VirtualMachine.hpp" | ||||
| #include "Program.hpp" | ||||
| #include <vector> | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	class InterpretedVirtualMachine; | ||||
| 
 | ||||
| 	typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&); | ||||
| 
 | ||||
| 	class InterpretedVirtualMachine : public VirtualMachine { | ||||
| 	public: | ||||
| 		InterpretedVirtualMachine(bool softAes) : VirtualMachine(softAes) {} | ||||
| 		virtual void initializeProgram(const void* seed) override; | ||||
| 		virtual void execute() override; | ||||
| 		const Program& getProgam() { | ||||
| 			return p; | ||||
| 		} | ||||
| 	private: | ||||
| 		static InstructionHandler engine[256]; | ||||
| 		Program p; | ||||
| 		std::vector<convertible_t> stack; | ||||
| 		uint64_t pc, ic; | ||||
| 
 | ||||
| 		convertible_t loada(Instruction&); | ||||
| 		convertible_t loadbr0(Instruction&); | ||||
| 		convertible_t loadbr1(Instruction&); | ||||
| 		double loadbf(Instruction&); | ||||
| 		convertible_t& getcr(Instruction&); | ||||
| 		convertible_t& getcf(Instruction&); | ||||
| 
 | ||||
| 		void stackPush(convertible_t& c) { | ||||
| 			stack.push_back(c); | ||||
| 		} | ||||
| 
 | ||||
| 		void stackPush(uint64_t x) { | ||||
| 			convertible_t c; | ||||
| 			c.u64 = x; | ||||
| 			stack.push_back(c); | ||||
| 		} | ||||
| 
 | ||||
| 		convertible_t stackPopValue() { | ||||
| 			convertible_t top = stack.back(); | ||||
| 			stack.pop_back(); | ||||
| 			return top; | ||||
| 		} | ||||
| 
 | ||||
| 		uint64_t stackPopAddress() { | ||||
| 			convertible_t top = stack.back(); | ||||
| 			stack.pop_back(); | ||||
| 			return top.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void h_ADD_64(Instruction&); | ||||
| 		void h_ADD_32(Instruction&); | ||||
| 		void h_SUB_64(Instruction&); | ||||
| 		void h_SUB_32(Instruction&); | ||||
| 		void h_MUL_64(Instruction&); | ||||
| 		void h_MULH_64(Instruction&); | ||||
| 		void h_MUL_32(Instruction&); | ||||
| 		void h_IMUL_32(Instruction&); | ||||
| 		void h_IMULH_64(Instruction&); | ||||
| 		void h_DIV_64(Instruction&); | ||||
| 		void h_IDIV_64(Instruction&); | ||||
| 		void h_AND_64(Instruction&); | ||||
| 		void h_AND_32(Instruction&); | ||||
| 		void h_OR_64(Instruction&); | ||||
| 		void h_OR_32(Instruction&); | ||||
| 		void h_XOR_64(Instruction&); | ||||
| 		void h_XOR_32(Instruction&); | ||||
| 		void h_SHL_64(Instruction&); | ||||
| 		void h_SHR_64(Instruction&); | ||||
| 		void h_SAR_64(Instruction&); | ||||
| 		void h_ROL_64(Instruction&); | ||||
| 		void h_ROR_64(Instruction&); | ||||
| 		void h_FPADD(Instruction&); | ||||
| 		void h_FPSUB(Instruction&); | ||||
| 		void h_FPMUL(Instruction&); | ||||
| 		void h_FPDIV(Instruction&); | ||||
| 		void h_FPSQRT(Instruction&); | ||||
| 		void h_FPROUND(Instruction&); | ||||
| 		void h_CALL(Instruction&); | ||||
| 		void h_RET(Instruction&); | ||||
| 	}; | ||||
| } | ||||
							
								
								
									
										72
									
								
								src/Pcg32.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/Pcg32.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,72 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| // Based on:
 | ||||
| // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org
 | ||||
| // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
 | ||||
| 
 | ||||
| #pragma once | ||||
| #include <cstdint> | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| #pragma warning (disable : 4146) | ||||
| #endif | ||||
| 
 | ||||
| class Pcg32 { | ||||
| public: | ||||
| 	typedef uint32_t result_type; | ||||
| 	static constexpr result_type min() { return 0U; } | ||||
| 	static constexpr result_type max() { return UINT32_MAX; } | ||||
| 	Pcg32(const void* seed) { | ||||
| 		auto* u64seed = (const uint64_t*)seed; | ||||
| 		state = *(u64seed + 0); | ||||
| 		inc = *(u64seed + 1) | 1ull; | ||||
| 	} | ||||
| 	Pcg32(uint64_t state, uint64_t inc) : state(state), inc(inc | 1ull) { | ||||
| 	} | ||||
| 	result_type operator()() { | ||||
| 		return next(); | ||||
| 	} | ||||
| 	result_type getUniform(result_type min, result_type max) { | ||||
| 		const result_type range = max - min; | ||||
| 		const result_type erange = range + 1; | ||||
| 		result_type ret; | ||||
| 
 | ||||
| 		for (;;) { | ||||
| 			ret = next(); | ||||
| 			if (ret / erange < UINT32_MAX / erange || UINT32_MAX % erange == range) { | ||||
| 				ret %= erange; | ||||
| 				break; | ||||
| 			} | ||||
| 		} | ||||
| 		return ret + min; | ||||
| 	} | ||||
| private: | ||||
| 	uint64_t state; | ||||
| 	uint64_t inc; | ||||
| 	result_type next() { | ||||
| 		uint64_t oldstate = state; | ||||
| 		// Advance internal state
 | ||||
| 		state = oldstate * 6364136223846793005ULL + inc; | ||||
| 		// Calculate output function (XSH RR), uses old state for max ILP
 | ||||
| 		uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; | ||||
| 		uint32_t rot = oldstate >> 59u; | ||||
| 		return (xorshifted >> rot) | (xorshifted << (-rot & 31)); | ||||
| 	} | ||||
| }; | ||||
							
								
								
									
										77
									
								
								src/Program.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								src/Program.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include "Program.hpp" | ||||
| #include "Pcg32.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 	void Program::initialize(Pcg32& gen) { | ||||
| 		for (unsigned i = 0; i < sizeof(programBuffer) / sizeof(Pcg32::result_type); ++i) { | ||||
| 			*(((uint32_t*)&programBuffer) + i) = gen(); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void Program::print(std::ostream& os) const { | ||||
| 		for (int i = 0; i < RandomX::ProgramLength; ++i) { | ||||
| 			auto instr = programBuffer[i]; | ||||
| 			os << std::dec << instrNames[instr.opcode] << " (" << i << "):" << std::endl; | ||||
| 			os << "  A: loc = " << (instr.loca & 7) << ", reg: " << (instr.rega & 7) << std::endl; | ||||
| 			os << "  B: loc = " << (instr.locb & 7) << ", reg: " << (instr.regb & 7) << std::endl; | ||||
| 			os << "  C: loc = " << (instr.locc & 7) << ", reg: " << (instr.regc & 7) << std::endl; | ||||
| 			os << "  imm0 = " << (int)instr.imm0 << std::endl; | ||||
| 			os << "  imm1 = " << std::hex << instr.imm1 << std::endl; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| #define INST_NAME(x) REPN(#x, WT(x)) | ||||
| 
 | ||||
| 	const char* Program::instrNames[256] = { | ||||
| 		INST_NAME(ADD_64) | ||||
| 		INST_NAME(ADD_32) | ||||
| 		INST_NAME(SUB_64) | ||||
| 		INST_NAME(SUB_32) | ||||
| 		INST_NAME(MUL_64) | ||||
| 		INST_NAME(MULH_64) | ||||
| 		INST_NAME(MUL_32) | ||||
| 		INST_NAME(IMUL_32) | ||||
| 		INST_NAME(IMULH_64) | ||||
| 		INST_NAME(DIV_64) | ||||
| 		INST_NAME(IDIV_64) | ||||
| 		INST_NAME(AND_64) | ||||
| 		INST_NAME(AND_32) | ||||
| 		INST_NAME(OR_64) | ||||
| 		INST_NAME(OR_32) | ||||
| 		INST_NAME(XOR_64) | ||||
| 		INST_NAME(XOR_32) | ||||
| 		INST_NAME(SHL_64) | ||||
| 		INST_NAME(SHR_64) | ||||
| 		INST_NAME(SAR_64) | ||||
| 		INST_NAME(ROL_64) | ||||
| 		INST_NAME(ROR_64) | ||||
| 		INST_NAME(FPADD) | ||||
| 		INST_NAME(FPSUB) | ||||
| 		INST_NAME(FPMUL) | ||||
| 		INST_NAME(FPDIV) | ||||
| 		INST_NAME(FPSQRT) | ||||
| 		INST_NAME(FPROUND) | ||||
| 		INST_NAME(CALL) | ||||
| 		INST_NAME(RET) | ||||
| 	}; | ||||
| } | ||||
							
								
								
									
										66
									
								
								src/Program.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								src/Program.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,66 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <ostream> | ||||
| #include "common.hpp" | ||||
| 
 | ||||
| class Pcg32; | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	struct Instruction { | ||||
| 		uint8_t opcode; | ||||
| 		uint8_t loca; | ||||
| 		uint8_t rega; | ||||
| 		uint8_t locb; | ||||
| 		uint8_t regb; | ||||
| 		uint8_t locc; | ||||
| 		uint8_t regc; | ||||
| 		uint8_t imm0; | ||||
| 		uint32_t addr0; | ||||
| 		union { | ||||
| 			uint32_t addr1; | ||||
| 			int32_t imm1; | ||||
| 		}; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(Instruction) == 16, "Invalid alignment of struct Instruction"); | ||||
| 
 | ||||
| 	class Program { | ||||
| 	public: | ||||
| 		Instruction& operator()(uint64_t pc) { | ||||
| 			return programBuffer[pc]; | ||||
| 		} | ||||
| 		const char* getName(Instruction& instr) { | ||||
| 			return instrNames[instr.opcode]; | ||||
| 		} | ||||
| 		void initialize(Pcg32& gen); | ||||
| 		friend std::ostream& operator<<(std::ostream& os, const Program& p) { | ||||
| 			p.print(os); | ||||
| 			return os; | ||||
| 		} | ||||
| 	private: | ||||
| 		void print(std::ostream&) const; | ||||
| 		static const char* instrNames[256]; | ||||
| 		Instruction programBuffer[ProgramLength]; | ||||
| 	}; | ||||
| } | ||||
							
								
								
									
										75
									
								
								src/Stopwatch.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								src/Stopwatch.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,75 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <chrono> | ||||
| #include <cstdint> | ||||
| 
 | ||||
| class Stopwatch { | ||||
| public: | ||||
| 	Stopwatch(bool startNow = false) { | ||||
| 		reset(); | ||||
| 		if (startNow) { | ||||
| 			start(); | ||||
| 		} | ||||
| 	} | ||||
| 	void reset() { | ||||
| 		isRunning = false; | ||||
| 		elapsed = 0; | ||||
| 	} | ||||
| 	void start() { | ||||
| 		if (!isRunning) { | ||||
| 			startMark = std::chrono::high_resolution_clock::now(); | ||||
| 			isRunning = true; | ||||
| 		} | ||||
| 	} | ||||
| 	void restart() { | ||||
| 		startMark = std::chrono::high_resolution_clock::now(); | ||||
| 		isRunning = true; | ||||
| 		elapsed = 0; | ||||
| 	} | ||||
| 	void stop() { | ||||
| 		if (isRunning) { | ||||
| 			chrono_t endMark = std::chrono::high_resolution_clock::now(); | ||||
| 			uint64_t ns = std::chrono::duration_cast<sw_unit>(endMark - startMark).count(); | ||||
| 			elapsed += ns; | ||||
| 			isRunning = false; | ||||
| 		} | ||||
| 	} | ||||
| 	double getElapsed() { | ||||
| 		return getElapsedNanosec() / 1e+9; | ||||
| 	} | ||||
| private: | ||||
| 	using chrono_t = std::chrono::high_resolution_clock::time_point; | ||||
| 	using sw_unit = std::chrono::nanoseconds; | ||||
| 	chrono_t startMark; | ||||
| 	uint64_t elapsed; | ||||
| 	bool isRunning; | ||||
| 
 | ||||
| 	uint64_t getElapsedNanosec() { | ||||
| 		uint64_t elns = elapsed; | ||||
| 		if (isRunning) { | ||||
| 			chrono_t endMark = std::chrono::high_resolution_clock::now(); | ||||
| 			uint64_t ns = std::chrono::duration_cast<sw_unit>(endMark - startMark).count(); | ||||
| 			elns += ns; | ||||
| 		} | ||||
| 		return elns; | ||||
| 	} | ||||
| }; | ||||
							
								
								
									
										390
									
								
								src/TestAluFpu.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										390
									
								
								src/TestAluFpu.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,390 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <limits> | ||||
| #include "instructions.hpp" | ||||
| #include "Pcg32.hpp" | ||||
| //#define DEBUG
 | ||||
| 
 | ||||
| using namespace RandomX; | ||||
| 
 | ||||
| typedef void(*VmOperation)(convertible_t&, convertible_t&, convertible_t&); | ||||
| 
 | ||||
| uint64_t rxRound(uint32_t mode, int64_t x, int64_t y, VmOperation op) { | ||||
| 	convertible_t a, b, c; | ||||
| 	a.u64 = mode; | ||||
| 	FPROUND(a, b, c); | ||||
| #ifdef DEBUG | ||||
| 	a.f64 = convertToDouble(x); | ||||
| 	b.f64 = convertToDouble(y); | ||||
| 	std::cout << std::hex << (uint64_t)x << " -> " << a.u64 << std::endl; | ||||
| 	std::cout << std::hex << (uint64_t)y << " -> " << b.u64 << std::endl; | ||||
| 	std::cout << std::dec; | ||||
| #endif | ||||
| 	a.i64 = x; | ||||
| 	b.i64 = y; | ||||
| 	op(a, b, c); | ||||
| 	return c.u64; | ||||
| } | ||||
| 
 | ||||
| #define CATCH_CONFIG_MAIN | ||||
| #include "catch.hpp" | ||||
| 
 | ||||
| #define RX_EXECUTE_U64(va, vb, INST) do { \ | ||||
| 	a.u64 = va; \ | ||||
| 	b.u64 = vb; \ | ||||
| 	INST(a, b, c); \ | ||||
| 	} while(false) | ||||
| 
 | ||||
| #define RX_EXECUTE_I64(va, vb, INST) do { \ | ||||
| 	a.i64 = va; \ | ||||
| 	b.i64 = vb; \ | ||||
| 	INST(a, b, c); \ | ||||
| 	} while(false) | ||||
| 
 | ||||
| TEST_CASE("Integer addition (64-bit)", "[ADD_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_64); | ||||
| 	CHECK(c.u64 == 0x100000000); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 0x8000000000000000, ADD_64); | ||||
| 	CHECK(c.u64 == 0x0); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Integer addition (32-bit)", "[ADD_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_32); | ||||
| 	CHECK(c.u64 == 0); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xFF00000000000001, 0x0000000100000001, ADD_32); | ||||
| 	CHECK(c.u64 == 2); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Integer subtraction (64-bit)", "[SUB_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_64); | ||||
| 	CHECK(c.u64 == 0xFFFFFFFF00000002); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Integer subtraction (32-bit)", "[SUB_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_32); | ||||
| 	CHECK(c.u64 == 2); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Unsigned multiplication (64-bit, low half)", "[MUL_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_64); | ||||
| 	CHECK(c.u64 == 0x28723424A9108E51); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Unsigned multiplication (64-bit, high half)", "[MULH_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MULH_64); | ||||
| 	CHECK(c.u64 == 0xB4676D31D2B34883); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Unsigned multiplication (32-bit x 32-bit -> 64-bit)", "[MUL_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_32); | ||||
| 	CHECK(c.u64 == 0xB001AA5FA9108E51); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Signed multiplication (32-bit x 32-bit -> 64-bit)", "[IMUL_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMUL_32); | ||||
| 	CHECK(c.u64 == 0x03EBA0C1A9108E51); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Signed multiplication (64-bit, high half)", "[IMULH_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMULH_64); | ||||
| 	CHECK(c.u64 == 0x02D93EF1269D3EE5); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Unsigned division (64-bit / 32-bit -> 32-bit)", "[DIV_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(8774217225983458895, 3014068202, DIV_64); | ||||
| 	CHECK(c.u64 == 2911087818); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(8774217225983458895, 0, DIV_64); | ||||
| 	CHECK(c.u64 == 8774217225983458895); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(3014068202, 8774217225983458895, DIV_64); | ||||
| 	CHECK(c.u64 == 2); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Signed division (64-bit / 32-bit -> 32-bit)", "[IDIV_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(8774217225983458895, 3014068202, IDIV_64); | ||||
| 	CHECK(c.u64 == 0xFFFFFFFE67B4994E); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(5, 0xFFFFFFFFFFFFFFFF, IDIV_64); | ||||
| 	CHECK(c.u64 == 0xFFFFFFFFFFFFFFFB); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(8774217225983458895, 0, IDIV_64); | ||||
| 	CHECK(c.u64 == 8774217225983458895); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 0xFFFFFFFFFFFFFFFF, IDIV_64); | ||||
| 	CHECK(c.u64 == 0x8000000000000000); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 0x93D1FFFFFFFFFFFF, IDIV_64); | ||||
| 	CHECK(c.u64 == 0x8000000000000000); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xFFFFFFFFB3A707EA, 8774217225983458895, IDIV_64); | ||||
| 	CHECK(c.u64 == 0xFFFFFFFFFFFFFFFF); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Bitwise AND (64-bit)", "[AND_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_64); | ||||
| 	CHECK(c.u64 == 0x8888888888888888); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Bitwise AND (32-bit)", "[AND_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_32); | ||||
| 	CHECK(c.u64 == 0x88888888); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Bitwise OR (64-bit)", "[OR_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_64); | ||||
| 	CHECK(c.u64 == 0xEEEEEEEEEEEEEEEE); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Bitwise OR (32-bit)", "[OR_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_32); | ||||
| 	CHECK(c.u64 == 0xEEEEEEEE); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Bitwise XOR (64-bit)", "[XOR_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_64); | ||||
| 	CHECK(c.u64 == 0x2222222222222222); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Bitwise XOR (32-bit)", "[XOR_32]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_32); | ||||
| 	CHECK(c.u64 == 0x22222222); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Logical left shift (64-bit)", "[SHL_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x3, 52, SHL_64); | ||||
| 	CHECK(c.u64 == 0x30000000000000); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHL_64); | ||||
| 	CHECK(c.u64 == 6978065200108797952); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 1, SHL_64); | ||||
| 	CHECK(c.u64 == 0); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Logical right shift (64-bit)", "[SHR_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x3, 52, SHR_64); | ||||
| 	CHECK(c.u64 == 0); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHR_64); | ||||
| 	CHECK(c.u64 == 110985711); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 1, SHR_64); | ||||
| 	CHECK(c.u64 == 0x4000000000000000); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Arithmetic right shift (64-bit)", "[SAR_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_I64(-9, 2, SAR_64); | ||||
| 	CHECK(c.i64 == -3); | ||||
| 
 | ||||
| 	RX_EXECUTE_I64(INT64_MIN, 63, SAR_64); | ||||
| 	CHECK(c.i64 == -1); | ||||
| 
 | ||||
| 	RX_EXECUTE_I64(INT64_MAX, 163768499474606398, SAR_64); | ||||
| 	CHECK(c.i64 == 1); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Circular left shift (64-bit)", "[ROL_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x3, 52, ROL_64); | ||||
| 	CHECK(c.u64 == 0x30000000000000); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROL_64); | ||||
| 	CHECK(c.u64 == 6978065200552740799); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 1, ROL_64); | ||||
| 	CHECK(c.u64 == 1); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Circular right shift (64-bit)", "[ROR_64]") { | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x3, 52, ROR_64); | ||||
| 	CHECK(c.u64 == 12288); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROR_64); | ||||
| 	CHECK(c.u64 == 0xD835C455069D81EF); | ||||
| 
 | ||||
| 	RX_EXECUTE_U64(0x8000000000000000, 1, ROR_64); | ||||
| 	CHECK(c.u64 == 0x4000000000000000); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("Denormal results are not produced", "[FTZ]") { | ||||
| 	FPINIT(); | ||||
| 	convertible_t a, b, c; | ||||
| 	a.i64 = 2048; | ||||
| 	FPDIV(a, DBL_MAX, c); | ||||
| #ifdef DEBUG | ||||
| 	std::cout << a.i64 << " / " << DBL_MAX << " = " << std::hex << c.u64 << std::endl; | ||||
| #endif | ||||
| 	REQUIRE(std::fpclassify(c.f64) != FP_SUBNORMAL); | ||||
| 	b.f64 = c.f64; | ||||
| 	a.i64 = 0; | ||||
| 	FPSUB_64(a, b, c); | ||||
| #ifdef DEBUG | ||||
| 	std::cout << a.i64 << " - " << b.f64 << " = " << std::hex << c.u64 << std::endl; | ||||
| #endif | ||||
| 	CHECK(std::fpclassify(c.f64) != FP_SUBNORMAL); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("NaN results are not produced", "[NAN]") { | ||||
| 	FPINIT(); | ||||
| 	convertible_t a, c; | ||||
| 	a.i64 = 0; | ||||
| 	FPDIV(a, 0, c); | ||||
| 	CHECK(std::fpclassify(c.f64) != FP_NAN); | ||||
| 	FPMUL(a, std::numeric_limits<double>::infinity(), c); | ||||
| 	CHECK(std::fpclassify(c.f64) != FP_NAN); | ||||
| } | ||||
| 
 | ||||
| volatile int64_t fpAdda = 7379480244170225589; | ||||
| volatile int64_t fpAddb = -438072579179686797; | ||||
| volatile int64_t fpSuba = 2939258788088626026; | ||||
| volatile int64_t fpSubb = 4786131045320678734; | ||||
| volatile int64_t fpMula1 = 8399833736388895639; | ||||
| volatile int64_t fpMulb1 = 5671608020317594922; | ||||
| volatile int64_t fpMula2 = -7094299423744805450; | ||||
| volatile int64_t fpMulb2 = 4982086006202596504; | ||||
| volatile int64_t fpDiva1 = 8399833736388895639; | ||||
| volatile int64_t fpDivb1 = 5671608020317594922; | ||||
| volatile int64_t fpDiva2 = -7434878587645025912; | ||||
| volatile int64_t fpDivb2 = 5266243837734830806; | ||||
| volatile int64_t fpSqrta = -7594301562963134542; | ||||
| 
 | ||||
| TEST_CASE("IEEE-754 compliance", "[FPU]") { | ||||
| 	FPINIT(); | ||||
| 	convertible_t a, b, c; | ||||
| 
 | ||||
| 	a.i64 = 2048; | ||||
| 	FPDIV(a, 0, c); | ||||
| 	CHECK(c.f64 == std::numeric_limits<double>::infinity()); | ||||
| 
 | ||||
| 	a.i64 = -2048; | ||||
| 	FPDIV(a, 0, c); | ||||
| 	CHECK(c.f64 == -std::numeric_limits<double>::infinity()); | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
| 	std::cout << "FPROUND" << std::endl; | ||||
| #endif | ||||
| 	CHECK(rxRound(RoundToNearest, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); | ||||
| 	CHECK(rxRound(RoundDown, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); | ||||
| 	CHECK(rxRound(RoundUp, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); | ||||
| 	CHECK(rxRound(RoundToZero, fpAdda, 0, &FPROUND) == 0x43d99a4b8bc531dcU); | ||||
| 
 | ||||
| 	CHECK(rxRound(RoundToNearest, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); | ||||
| 	CHECK(rxRound(RoundDown, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); | ||||
| 	CHECK(rxRound(RoundUp, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); | ||||
| 	CHECK(rxRound(RoundToZero, fpSuba, 0, &FPROUND) == 0x43c4652c25bf7bdcU); | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
| 	std::cout << "FPADD" << std::endl; | ||||
| #endif | ||||
| 	CHECK(rxRound(RoundToNearest, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d473U); | ||||
| 	CHECK(rxRound(RoundDown, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d473U); | ||||
| 	CHECK(rxRound(RoundUp, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d472U); | ||||
| 	CHECK(rxRound(RoundToZero, fpAdda, fpAddb, &FPADD_64) == 0xf9eba74f6c27d472U); | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
| 	std::cout << "FPSUB" << std::endl; | ||||
| #endif | ||||
| 	CHECK(rxRound(RoundToNearest, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c49U); | ||||
| 	CHECK(rxRound(RoundDown, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c48U); | ||||
| 	CHECK(rxRound(RoundUp, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c49U); | ||||
| 	CHECK(rxRound(RoundToZero, fpSuba, fpSubb, &FPSUB_64) == 0x43c4652bb6bc2c48U); | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
| 	std::cout << "FPMUL" << std::endl; | ||||
| #endif | ||||
| 	CHECK(rxRound(RoundToNearest, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e9U); | ||||
| 	CHECK(rxRound(RoundDown, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e8U); | ||||
| 	CHECK(rxRound(RoundUp, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e9U); | ||||
| 	CHECK(rxRound(RoundToZero, fpMula1, fpMulb1, &FPMUL_64) == 0x52a3abbb1677f3e8U); | ||||
| 
 | ||||
| 	CHECK(rxRound(RoundToNearest, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c583U); | ||||
| 	CHECK(rxRound(RoundDown, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c583U); | ||||
| 	CHECK(rxRound(RoundUp, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c582U); | ||||
| 	CHECK(rxRound(RoundToZero, fpMula2, fpMulb2, &FPMUL_64) == 0xc90ea6c25e29c582U); | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
| 	std::cout << "FPDIV" << std::endl; | ||||
| #endif | ||||
| 	CHECK(rxRound(RoundToNearest, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81cU); | ||||
| 	CHECK(rxRound(RoundDown, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81bU); | ||||
| 	CHECK(rxRound(RoundUp, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81cU); | ||||
| 	CHECK(rxRound(RoundToZero, fpDiva1, fpDivb1, &FPDIV_64) == 0x3515967d3015e81bU); | ||||
| 
 | ||||
| 	CHECK(rxRound(RoundToNearest, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fccU); | ||||
| 	CHECK(rxRound(RoundDown, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fccU); | ||||
| 	CHECK(rxRound(RoundUp, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fcbU); | ||||
| 	CHECK(rxRound(RoundToZero, fpDiva2, fpDivb2, &FPDIV_64) == 0xbab33c30b92b8fcbU); | ||||
| 
 | ||||
| #ifdef DEBUG | ||||
| 	std::cout << "FPSQRT" << std::endl; | ||||
| #endif | ||||
| 	CHECK(rxRound(RoundToNearest, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2dU); | ||||
| 	CHECK(rxRound(RoundDown, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2cU); | ||||
| 	CHECK(rxRound(RoundUp, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2dU); | ||||
| 	CHECK(rxRound(RoundToZero, fpSqrta, 0, &FPSQRT) == 0x41d304e3fcc31a2cU); | ||||
| } | ||||
							
								
								
									
										77
									
								
								src/VirtualMachine.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								src/VirtualMachine.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include "VirtualMachine.hpp" | ||||
| #include "common.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include <cstring> | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 	VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) { | ||||
| 		mem.dataset = nullptr; | ||||
| 	} | ||||
| 
 | ||||
| 	void VirtualMachine::initializeDataset(const void* seed, bool light) { | ||||
| 		if (lightClient) { | ||||
| 			_mm_free(mem.lcm->cache); | ||||
| 			_mm_free(mem.lcm->block); | ||||
| 		} | ||||
| 		_mm_free(mem.dataset); | ||||
| 		lightClient = light; | ||||
| 		if (light) { | ||||
| 			if (softAes) { | ||||
| 				datasetInitLight<true>(seed, mem.lcm); | ||||
| 				readDataset = &datasetReadLight<true>; | ||||
| 			} | ||||
| 			else { | ||||
| 				datasetInitLight<false>(seed, mem.lcm); | ||||
| 				readDataset = &datasetReadLight<false>; | ||||
| 			} | ||||
| 		} | ||||
| 		else { | ||||
| 			readDataset = &datasetRead; | ||||
| 			if (softAes) { | ||||
| 				datasetInit<true>(seed, mem.dataset); | ||||
| 			} | ||||
| 			else { | ||||
| 				datasetInit<false>(seed, mem.dataset); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	void VirtualMachine::initializeScratchpad(uint32_t index) { | ||||
| 		if (lightClient) { | ||||
| 			if (softAes) { | ||||
| 				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 0, 4 * index + 0, mem.lcm->keys); | ||||
| 				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 1, 4 * index + 1, mem.lcm->keys); | ||||
| 				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 2, 4 * index + 2, mem.lcm->keys); | ||||
| 				initBlock<true>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 3, 4 * index + 3, mem.lcm->keys); | ||||
| 			} | ||||
| 			else { | ||||
| 				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 0, 4 * index + 0, mem.lcm->keys); | ||||
| 				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 1, 4 * index + 1, mem.lcm->keys); | ||||
| 				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 2, 4 * index + 2, mem.lcm->keys); | ||||
| 				initBlock<false>(mem.lcm->cache, ((uint8_t*)scratchpad) + DatasetBlockSize * 3, 4 * index + 3, mem.lcm->keys); | ||||
| 			} | ||||
| 		} | ||||
| 		else { | ||||
| 			memcpy(scratchpad, mem.dataset + ScratchpadSize * index, ScratchpadSize); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										61
									
								
								src/VirtualMachine.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								src/VirtualMachine.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,61 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| #include <cstdint> | ||||
| #include "common.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	typedef convertible_t(*DatasetReadFunc)(addr_t, MemoryRegisters&); | ||||
| 
 | ||||
| 	class VirtualMachine { | ||||
| 	public: | ||||
| 		VirtualMachine(bool softAes); | ||||
| 		virtual ~VirtualMachine() {} | ||||
| 		void initializeDataset(const void* seed, bool light = false); | ||||
| 		void initializeScratchpad(uint32_t index); | ||||
| 		virtual void initializeProgram(const void* seed) = 0; | ||||
| 		virtual void execute() = 0; | ||||
| 		const RegisterFile& getRegisterFile() const { | ||||
| 			return reg; | ||||
| 		} | ||||
| 		const convertible_t* getScratchpad() const { | ||||
| 			return scratchpad; | ||||
| 		} | ||||
| 		const void* getCache() { | ||||
| 			if (lightClient) { | ||||
| 				return mem.lcm->cache; | ||||
| 			} | ||||
| 			return nullptr; | ||||
| 		} | ||||
| 		const __m128i* getKeys() { | ||||
| 			if (lightClient) { | ||||
| 				return mem.lcm->keys; | ||||
| 			} | ||||
| 			return nullptr; | ||||
| 		} | ||||
| 	protected: | ||||
| 		bool softAes, lightClient; | ||||
| 		RegisterFile reg; | ||||
| 		MemoryRegisters mem; | ||||
| 		DatasetReadFunc readDataset; | ||||
| 		alignas(16) convertible_t scratchpad[ScratchpadLength]; | ||||
| 	}; | ||||
| } | ||||
							
								
								
									
										220
									
								
								src/argon2.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								src/argon2.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,220 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <stddef.h> | ||||
| #include <limits.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Argon2 input parameter restrictions | ||||
|  */ | ||||
| 
 | ||||
|  /* Minimum and maximum number of lanes (degree of parallelism) */ | ||||
| #define ARGON2_MIN_LANES UINT32_C(1) | ||||
| #define ARGON2_MAX_LANES UINT32_C(0xFFFFFF) | ||||
| 
 | ||||
| /* Minimum and maximum number of threads */ | ||||
| #define ARGON2_MIN_THREADS UINT32_C(1) | ||||
| #define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF) | ||||
| 
 | ||||
| /* Number of synchronization points between lanes per pass */ | ||||
| #define ARGON2_SYNC_POINTS UINT32_C(4) | ||||
| 
 | ||||
| /* Minimum and maximum digest size in bytes */ | ||||
| #define ARGON2_MIN_OUTLEN UINT32_C(4) | ||||
| #define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF) | ||||
| 
 | ||||
| /* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */ | ||||
| #define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */ | ||||
| 
 | ||||
| #define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b)) | ||||
| /* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */ | ||||
| #define ARGON2_MAX_MEMORY_BITS                                                 \ | ||||
|     ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1)) | ||||
| #define ARGON2_MAX_MEMORY                                                      \ | ||||
|     ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS) | ||||
| 
 | ||||
| /* Minimum and maximum number of passes */ | ||||
| #define ARGON2_MIN_TIME UINT32_C(1) | ||||
| #define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF) | ||||
| 
 | ||||
| /* Minimum and maximum password length in bytes */ | ||||
| #define ARGON2_MIN_PWD_LENGTH UINT32_C(0) | ||||
| #define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF) | ||||
| 
 | ||||
| /* Minimum and maximum associated data length in bytes */ | ||||
| #define ARGON2_MIN_AD_LENGTH UINT32_C(0) | ||||
| #define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF) | ||||
| 
 | ||||
| /* Minimum and maximum salt length in bytes */ | ||||
| #define ARGON2_MIN_SALT_LENGTH UINT32_C(8) | ||||
| #define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF) | ||||
| 
 | ||||
| /* Minimum and maximum key length in bytes */ | ||||
| #define ARGON2_MIN_SECRET UINT32_C(0) | ||||
| #define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF) | ||||
| 
 | ||||
| /* Flags to determine which fields are securely wiped (default = no wipe). */ | ||||
| #define ARGON2_DEFAULT_FLAGS UINT32_C(0) | ||||
| #define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0) | ||||
| #define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1) | ||||
| 
 | ||||
| 
 | ||||
| /* Error codes */ | ||||
| typedef enum Argon2_ErrorCodes { | ||||
| 	ARGON2_OK = 0, | ||||
| 
 | ||||
| 	ARGON2_OUTPUT_PTR_NULL = -1, | ||||
| 
 | ||||
| 	ARGON2_OUTPUT_TOO_SHORT = -2, | ||||
| 	ARGON2_OUTPUT_TOO_LONG = -3, | ||||
| 
 | ||||
| 	ARGON2_PWD_TOO_SHORT = -4, | ||||
| 	ARGON2_PWD_TOO_LONG = -5, | ||||
| 
 | ||||
| 	ARGON2_SALT_TOO_SHORT = -6, | ||||
| 	ARGON2_SALT_TOO_LONG = -7, | ||||
| 
 | ||||
| 	ARGON2_AD_TOO_SHORT = -8, | ||||
| 	ARGON2_AD_TOO_LONG = -9, | ||||
| 
 | ||||
| 	ARGON2_SECRET_TOO_SHORT = -10, | ||||
| 	ARGON2_SECRET_TOO_LONG = -11, | ||||
| 
 | ||||
| 	ARGON2_TIME_TOO_SMALL = -12, | ||||
| 	ARGON2_TIME_TOO_LARGE = -13, | ||||
| 
 | ||||
| 	ARGON2_MEMORY_TOO_LITTLE = -14, | ||||
| 	ARGON2_MEMORY_TOO_MUCH = -15, | ||||
| 
 | ||||
| 	ARGON2_LANES_TOO_FEW = -16, | ||||
| 	ARGON2_LANES_TOO_MANY = -17, | ||||
| 
 | ||||
| 	ARGON2_PWD_PTR_MISMATCH = -18,    /* NULL ptr with non-zero length */ | ||||
| 	ARGON2_SALT_PTR_MISMATCH = -19,   /* NULL ptr with non-zero length */ | ||||
| 	ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */ | ||||
| 	ARGON2_AD_PTR_MISMATCH = -21,     /* NULL ptr with non-zero length */ | ||||
| 
 | ||||
| 	ARGON2_MEMORY_ALLOCATION_ERROR = -22, | ||||
| 
 | ||||
| 	ARGON2_FREE_MEMORY_CBK_NULL = -23, | ||||
| 	ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24, | ||||
| 
 | ||||
| 	ARGON2_INCORRECT_PARAMETER = -25, | ||||
| 	ARGON2_INCORRECT_TYPE = -26, | ||||
| 
 | ||||
| 	ARGON2_OUT_PTR_MISMATCH = -27, | ||||
| 
 | ||||
| 	ARGON2_THREADS_TOO_FEW = -28, | ||||
| 	ARGON2_THREADS_TOO_MANY = -29, | ||||
| 
 | ||||
| 	ARGON2_MISSING_ARGS = -30, | ||||
| 
 | ||||
| 	ARGON2_ENCODING_FAIL = -31, | ||||
| 
 | ||||
| 	ARGON2_DECODING_FAIL = -32, | ||||
| 
 | ||||
| 	ARGON2_THREAD_FAIL = -33, | ||||
| 
 | ||||
| 	ARGON2_DECODING_LENGTH_FAIL = -34, | ||||
| 
 | ||||
| 	ARGON2_VERIFY_MISMATCH = -35 | ||||
| } argon2_error_codes; | ||||
| 
 | ||||
| /* Memory allocator types --- for external allocation */ | ||||
| typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate); | ||||
| typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate); | ||||
| 
 | ||||
| /* Argon2 external data structures */ | ||||
| 
 | ||||
| /*
 | ||||
| 	***** | ||||
| 	* Context: structure to hold Argon2 inputs: | ||||
| 	*  output array and its length, | ||||
| 	*  password and its length, | ||||
| 	*  salt and its length, | ||||
| 	*  secret and its length, | ||||
| 	*  associated data and its length, | ||||
| 	*  number of passes, amount of used memory (in KBytes, can be rounded up a bit) | ||||
| 	*  number of parallel threads that will be run. | ||||
| 	* All the parameters above affect the output hash value. | ||||
| 	* Additionally, two function pointers can be provided to allocate and | ||||
| 	* deallocate the memory (if NULL, memory will be allocated internally). | ||||
| 	* Also, three flags indicate whether to erase password, secret as soon as they | ||||
| 	* are pre-hashed (and thus not needed anymore), and the entire memory | ||||
| 	***** | ||||
| 	* Simplest situation: you have output array out[8], password is stored in | ||||
| 	* pwd[32], salt is stored in salt[16], you do not have keys nor associated | ||||
| 	* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with | ||||
| 	* 4 parallel lanes. | ||||
| 	* You want to erase the password, but you're OK with last pass not being | ||||
| 	* erased. You want to use the default memory allocator. | ||||
| 	* Then you initialize: | ||||
| 	Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false) | ||||
| 	*/ | ||||
| typedef struct Argon2_Context { | ||||
| 	uint8_t *out;    /* output array */ | ||||
| 	uint32_t outlen; /* digest length */ | ||||
| 
 | ||||
| 	uint8_t *pwd;    /* password array */ | ||||
| 	uint32_t pwdlen; /* password length */ | ||||
| 
 | ||||
| 	uint8_t *salt;    /* salt array */ | ||||
| 	uint32_t saltlen; /* salt length */ | ||||
| 
 | ||||
| 	uint8_t *secret;    /* key array */ | ||||
| 	uint32_t secretlen; /* key length */ | ||||
| 
 | ||||
| 	uint8_t *ad;    /* associated data array */ | ||||
| 	uint32_t adlen; /* associated data length */ | ||||
| 
 | ||||
| 	uint32_t t_cost;  /* number of passes */ | ||||
| 	uint32_t m_cost;  /* amount of memory requested (KB) */ | ||||
| 	uint32_t lanes;   /* number of lanes */ | ||||
| 	uint32_t threads; /* maximum number of threads */ | ||||
| 
 | ||||
| 	uint32_t version; /* version number */ | ||||
| 
 | ||||
| 	allocate_fptr allocate_cbk; /* pointer to memory allocator */ | ||||
| 	deallocate_fptr free_cbk;   /* pointer to memory deallocator */ | ||||
| 
 | ||||
| 	uint32_t flags; /* array of bool options */ | ||||
| } argon2_context; | ||||
| 
 | ||||
| /* Argon2 primitive type */ | ||||
| typedef enum Argon2_type { | ||||
| 	Argon2_d = 0, | ||||
| 	Argon2_i = 1, | ||||
| 	Argon2_id = 2 | ||||
| } argon2_type; | ||||
| 
 | ||||
| /* Version of the algorithm */ | ||||
| typedef enum Argon2_version { | ||||
| 	ARGON2_VERSION_10 = 0x10, | ||||
| 	ARGON2_VERSION_13 = 0x13, | ||||
| 	ARGON2_VERSION_NUMBER = ARGON2_VERSION_13 | ||||
| } argon2_version; | ||||
							
								
								
									
										507
									
								
								src/argon2_core.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										507
									
								
								src/argon2_core.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,507 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
|  /*For memory wiping*/ | ||||
| #ifdef _MSC_VER | ||||
| #include <windows.h> | ||||
| #include <winbase.h> /* For SecureZeroMemory */ | ||||
| #endif | ||||
| #if defined __STDC_LIB_EXT1__ | ||||
| #define __STDC_WANT_LIB_EXT1__ 1 | ||||
| #endif | ||||
| #define VC_GE_2005(version) (version >= 1400) | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| #include "argon2_core.h" | ||||
| #include "blake2/blake2.h" | ||||
| #include "blake2/blake2-impl.h" | ||||
| 
 | ||||
| #ifdef GENKAT | ||||
| #include "genkat.h" | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__clang__) | ||||
| #if __has_attribute(optnone) | ||||
| #define NOT_OPTIMIZED __attribute__((optnone)) | ||||
| #endif | ||||
| #elif defined(__GNUC__) | ||||
| #define GCC_VERSION                                                            \ | ||||
|     (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) | ||||
| #if GCC_VERSION >= 40400 | ||||
| #define NOT_OPTIMIZED __attribute__((optimize("O0"))) | ||||
| #endif | ||||
| #endif | ||||
| #ifndef NOT_OPTIMIZED | ||||
| #define NOT_OPTIMIZED | ||||
| #endif | ||||
| 
 | ||||
| /***************Instance and Position constructors**********/ | ||||
| void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); } | ||||
| 
 | ||||
| void copy_block(block *dst, const block *src) { | ||||
| 	memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK); | ||||
| } | ||||
| 
 | ||||
| void xor_block(block *dst, const block *src) { | ||||
| 	int i; | ||||
| 	for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { | ||||
| 		dst->v[i] ^= src->v[i]; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void load_block(block *dst, const void *input) { | ||||
| 	unsigned i; | ||||
| 	for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { | ||||
| 		dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i])); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void store_block(void *output, const block *src) { | ||||
| 	unsigned i; | ||||
| 	for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { | ||||
| 		store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /***************Memory functions*****************/ | ||||
| 
 | ||||
| int allocate_memory(const argon2_context *context, uint8_t **memory, | ||||
| 	size_t num, size_t size) { | ||||
| 	size_t memory_size = num * size; | ||||
| 	if (memory == NULL) { | ||||
| 		return ARGON2_MEMORY_ALLOCATION_ERROR; | ||||
| 	} | ||||
| 
 | ||||
| 	/* 1. Check for multiplication overflow */ | ||||
| 	if (size != 0 && memory_size / size != num) { | ||||
| 		return ARGON2_MEMORY_ALLOCATION_ERROR; | ||||
| 	} | ||||
| 
 | ||||
| 	/* 2. Try to allocate with appropriate allocator */ | ||||
| 	if (context->allocate_cbk) { | ||||
| 		(context->allocate_cbk)(memory, memory_size); | ||||
| 	} | ||||
| 	else { | ||||
| 		*memory = (uint8_t*)malloc(memory_size); | ||||
| 	} | ||||
| 
 | ||||
| 	if (*memory == NULL) { | ||||
| 		return ARGON2_MEMORY_ALLOCATION_ERROR; | ||||
| 	} | ||||
| 
 | ||||
| 	return ARGON2_OK; | ||||
| } | ||||
| 
 | ||||
| void free_memory(const argon2_context *context, uint8_t *memory, | ||||
| 	size_t num, size_t size) { | ||||
| 	size_t memory_size = num * size; | ||||
| 	clear_internal_memory(memory, memory_size); | ||||
| 	if (context->free_cbk) { | ||||
| 		(context->free_cbk)(memory, memory_size); | ||||
| 	} | ||||
| 	else { | ||||
| 		free(memory); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) { | ||||
| #if defined(_MSC_VER) && VC_GE_2005(_MSC_VER) | ||||
| 	SecureZeroMemory(v, n); | ||||
| #elif defined memset_s | ||||
| 	memset_s(v, n, 0, n); | ||||
| #elif defined(__OpenBSD__) | ||||
| 	explicit_bzero(v, n); | ||||
| #else | ||||
| 	static void *(*const volatile memset_sec)(void *, int, size_t) = &memset; | ||||
| 	memset_sec(v, 0, n); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /* Memory clear flag defaults to true. */ | ||||
| #define FLAG_clear_internal_memory 0 | ||||
| void clear_internal_memory(void *v, size_t n) { | ||||
| 	if (FLAG_clear_internal_memory && v) { | ||||
| 		secure_wipe_memory(v, n); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| uint32_t index_alpha(const argon2_instance_t *instance, | ||||
| 	const argon2_position_t *position, uint32_t pseudo_rand, | ||||
| 	int same_lane) { | ||||
| 	/*
 | ||||
| 	 * Pass 0: | ||||
| 	 *      This lane : all already finished segments plus already constructed | ||||
| 	 * blocks in this segment | ||||
| 	 *      Other lanes : all already finished segments | ||||
| 	 * Pass 1+: | ||||
| 	 *      This lane : (SYNC_POINTS - 1) last segments plus already constructed | ||||
| 	 * blocks in this segment | ||||
| 	 *      Other lanes : (SYNC_POINTS - 1) last segments | ||||
| 	 */ | ||||
| 	uint32_t reference_area_size; | ||||
| 	uint64_t relative_position; | ||||
| 	uint32_t start_position, absolute_position; | ||||
| 
 | ||||
| 	if (0 == position->pass) { | ||||
| 		/* First pass */ | ||||
| 		if (0 == position->slice) { | ||||
| 			/* First slice */ | ||||
| 			reference_area_size = | ||||
| 				position->index - 1; /* all but the previous */ | ||||
| 		} | ||||
| 		else { | ||||
| 			if (same_lane) { | ||||
| 				/* The same lane => add current segment */ | ||||
| 				reference_area_size = | ||||
| 					position->slice * instance->segment_length + | ||||
| 					position->index - 1; | ||||
| 			} | ||||
| 			else { | ||||
| 				reference_area_size = | ||||
| 					position->slice * instance->segment_length + | ||||
| 					((position->index == 0) ? (-1) : 0); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	else { | ||||
| 		/* Second pass */ | ||||
| 		if (same_lane) { | ||||
| 			reference_area_size = instance->lane_length - | ||||
| 				instance->segment_length + position->index - | ||||
| 				1; | ||||
| 		} | ||||
| 		else { | ||||
| 			reference_area_size = instance->lane_length - | ||||
| 				instance->segment_length + | ||||
| 				((position->index == 0) ? (-1) : 0); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
 | ||||
| 	 * relative position */ | ||||
| 	relative_position = pseudo_rand; | ||||
| 	relative_position = relative_position * relative_position >> 32; | ||||
| 	relative_position = reference_area_size - 1 - | ||||
| 		(reference_area_size * relative_position >> 32); | ||||
| 
 | ||||
| 	/* 1.2.5 Computing starting position */ | ||||
| 	start_position = 0; | ||||
| 
 | ||||
| 	if (0 != position->pass) { | ||||
| 		start_position = (position->slice == ARGON2_SYNC_POINTS - 1) | ||||
| 			? 0 | ||||
| 			: (position->slice + 1) * instance->segment_length; | ||||
| 	} | ||||
| 
 | ||||
| 	/* 1.2.6. Computing absolute position */ | ||||
| 	absolute_position = (start_position + relative_position) % | ||||
| 		instance->lane_length; /* absolute position */ | ||||
| 	return absolute_position; | ||||
| } | ||||
| 
 | ||||
| /* Single-threaded version for p=1 case */ | ||||
| static int fill_memory_blocks_st(argon2_instance_t *instance) { | ||||
| 	uint32_t r, s, l; | ||||
| 
 | ||||
| 	for (r = 0; r < instance->passes; ++r) { | ||||
| 		for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { | ||||
| 			for (l = 0; l < instance->lanes; ++l) { | ||||
| 				argon2_position_t position = { r, l, (uint8_t)s, 0 }; | ||||
| 				fill_segment(instance, position); | ||||
| 			} | ||||
| 		} | ||||
| #ifdef GENKAT | ||||
| 		internal_kat(instance, r); /* Print all memory blocks */ | ||||
| #endif | ||||
| 	} | ||||
| 	return ARGON2_OK; | ||||
| } | ||||
| 
 | ||||
| int fill_memory_blocks(argon2_instance_t *instance) { | ||||
| 	if (instance == NULL || instance->lanes == 0) { | ||||
| 		return ARGON2_INCORRECT_PARAMETER; | ||||
| 	} | ||||
| 	return fill_memory_blocks_st(instance); | ||||
| } | ||||
| 
 | ||||
| int validate_inputs(const argon2_context *context) { | ||||
| 	if (NULL == context) { | ||||
| 		return ARGON2_INCORRECT_PARAMETER; | ||||
| 	} | ||||
| 
 | ||||
| 	if (NULL == context->out) { | ||||
| 		return ARGON2_OUTPUT_PTR_NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate output length */ | ||||
| 	if (ARGON2_MIN_OUTLEN > context->outlen) { | ||||
| 		return ARGON2_OUTPUT_TOO_SHORT; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_OUTLEN < context->outlen) { | ||||
| 		return ARGON2_OUTPUT_TOO_LONG; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate password (required param) */ | ||||
| 	if (NULL == context->pwd) { | ||||
| 		if (0 != context->pwdlen) { | ||||
| 			return ARGON2_PWD_PTR_MISMATCH; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) { | ||||
| 		return ARGON2_PWD_TOO_SHORT; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) { | ||||
| 		return ARGON2_PWD_TOO_LONG; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate salt (required param) */ | ||||
| 	if (NULL == context->salt) { | ||||
| 		if (0 != context->saltlen) { | ||||
| 			return ARGON2_SALT_PTR_MISMATCH; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MIN_SALT_LENGTH > context->saltlen) { | ||||
| 		return ARGON2_SALT_TOO_SHORT; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_SALT_LENGTH < context->saltlen) { | ||||
| 		return ARGON2_SALT_TOO_LONG; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate secret (optional param) */ | ||||
| 	if (NULL == context->secret) { | ||||
| 		if (0 != context->secretlen) { | ||||
| 			return ARGON2_SECRET_PTR_MISMATCH; | ||||
| 		} | ||||
| 	} | ||||
| 	else { | ||||
| 		if (ARGON2_MIN_SECRET > context->secretlen) { | ||||
| 			return ARGON2_SECRET_TOO_SHORT; | ||||
| 		} | ||||
| 		if (ARGON2_MAX_SECRET < context->secretlen) { | ||||
| 			return ARGON2_SECRET_TOO_LONG; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate associated data (optional param) */ | ||||
| 	if (NULL == context->ad) { | ||||
| 		if (0 != context->adlen) { | ||||
| 			return ARGON2_AD_PTR_MISMATCH; | ||||
| 		} | ||||
| 	} | ||||
| 	else { | ||||
| 		if (ARGON2_MIN_AD_LENGTH > context->adlen) { | ||||
| 			return ARGON2_AD_TOO_SHORT; | ||||
| 		} | ||||
| 		if (ARGON2_MAX_AD_LENGTH < context->adlen) { | ||||
| 			return ARGON2_AD_TOO_LONG; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate memory cost */ | ||||
| 	if (ARGON2_MIN_MEMORY > context->m_cost) { | ||||
| 		return ARGON2_MEMORY_TOO_LITTLE; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_MEMORY < context->m_cost) { | ||||
| 		return ARGON2_MEMORY_TOO_MUCH; | ||||
| 	} | ||||
| 
 | ||||
| 	if (context->m_cost < 8 * context->lanes) { | ||||
| 		return ARGON2_MEMORY_TOO_LITTLE; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate time cost */ | ||||
| 	if (ARGON2_MIN_TIME > context->t_cost) { | ||||
| 		return ARGON2_TIME_TOO_SMALL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_TIME < context->t_cost) { | ||||
| 		return ARGON2_TIME_TOO_LARGE; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate lanes */ | ||||
| 	if (ARGON2_MIN_LANES > context->lanes) { | ||||
| 		return ARGON2_LANES_TOO_FEW; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_LANES < context->lanes) { | ||||
| 		return ARGON2_LANES_TOO_MANY; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Validate threads */ | ||||
| 	if (ARGON2_MIN_THREADS > context->threads) { | ||||
| 		return ARGON2_THREADS_TOO_FEW; | ||||
| 	} | ||||
| 
 | ||||
| 	if (ARGON2_MAX_THREADS < context->threads) { | ||||
| 		return ARGON2_THREADS_TOO_MANY; | ||||
| 	} | ||||
| 
 | ||||
| 	if (NULL != context->allocate_cbk && NULL == context->free_cbk) { | ||||
| 		return ARGON2_FREE_MEMORY_CBK_NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	if (NULL == context->allocate_cbk && NULL != context->free_cbk) { | ||||
| 		return ARGON2_ALLOCATE_MEMORY_CBK_NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	return ARGON2_OK; | ||||
| } | ||||
| 
 | ||||
| void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) { | ||||
| 	uint32_t l; | ||||
| 	/* Make the first and second block in each lane as G(H0||0||i) or
 | ||||
| 	   G(H0||1||i) */ | ||||
| 	uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; | ||||
| 	for (l = 0; l < instance->lanes; ++l) { | ||||
| 
 | ||||
| 		store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0); | ||||
| 		store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l); | ||||
| 		blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, | ||||
| 			ARGON2_PREHASH_SEED_LENGTH); | ||||
| 		load_block(&instance->memory[l * instance->lane_length + 0], | ||||
| 			blockhash_bytes); | ||||
| 
 | ||||
| 		store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1); | ||||
| 		blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash, | ||||
| 			ARGON2_PREHASH_SEED_LENGTH); | ||||
| 		load_block(&instance->memory[l * instance->lane_length + 1], | ||||
| 			blockhash_bytes); | ||||
| 	} | ||||
| 	clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE); | ||||
| } | ||||
| 
 | ||||
| void initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) { | ||||
| 	blake2b_state BlakeHash; | ||||
| 	uint8_t value[sizeof(uint32_t)]; | ||||
| 
 | ||||
| 	if (NULL == context || NULL == blockhash) { | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH); | ||||
| 
 | ||||
| 	store32(&value, context->lanes); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	store32(&value, context->outlen); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	store32(&value, context->m_cost); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	store32(&value, context->t_cost); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	store32(&value, context->version); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	store32(&value, (uint32_t)type); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	store32(&value, context->pwdlen); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	if (context->pwd != NULL) { | ||||
| 		blake2b_update(&BlakeHash, (const uint8_t *)context->pwd, | ||||
| 			context->pwdlen); | ||||
| 
 | ||||
| 		if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) { | ||||
| 			secure_wipe_memory(context->pwd, context->pwdlen); | ||||
| 			context->pwdlen = 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	store32(&value, context->saltlen); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	if (context->salt != NULL) { | ||||
| 		blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen); | ||||
| 	} | ||||
| 
 | ||||
| 	store32(&value, context->secretlen); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	if (context->secret != NULL) { | ||||
| 		blake2b_update(&BlakeHash, (const uint8_t *)context->secret, | ||||
| 			context->secretlen); | ||||
| 
 | ||||
| 		if (context->flags & ARGON2_FLAG_CLEAR_SECRET) { | ||||
| 			secure_wipe_memory(context->secret, context->secretlen); | ||||
| 			context->secretlen = 0; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	store32(&value, context->adlen); | ||||
| 	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value)); | ||||
| 
 | ||||
| 	if (context->ad != NULL) { | ||||
| 		blake2b_update(&BlakeHash, (const uint8_t *)context->ad, | ||||
| 			context->adlen); | ||||
| 	} | ||||
| 
 | ||||
| 	blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); | ||||
| } | ||||
| 
 | ||||
| int initialize(argon2_instance_t *instance, argon2_context *context) { | ||||
| 	uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; | ||||
| 	int result = ARGON2_OK; | ||||
| 
 | ||||
| 	if (instance == NULL || context == NULL) | ||||
| 		return ARGON2_INCORRECT_PARAMETER; | ||||
| 	instance->context_ptr = context; | ||||
| 
 | ||||
| 	/* 1. Memory allocation */ | ||||
| 	/*result = allocate_memory(context, (uint8_t **)&(instance->memory), instance->memory_blocks, sizeof(block));
 | ||||
| 	if (result != ARGON2_OK) { | ||||
| 		return result; | ||||
| 	}*/ | ||||
| 
 | ||||
| 	/* 2. Initial hashing */ | ||||
| 	/* H_0 + 8 extra bytes to produce the first blocks */ | ||||
| 	/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */ | ||||
| 	/* Hashing all inputs */ | ||||
| 	initial_hash(blockhash, context, instance->type); | ||||
| 	/* Zeroing 8 extra bytes */ | ||||
| 	clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, | ||||
| 		ARGON2_PREHASH_SEED_LENGTH - | ||||
| 		ARGON2_PREHASH_DIGEST_LENGTH); | ||||
| 
 | ||||
| 	/* 3. Creating first blocks, we always have at least two blocks in a slice
 | ||||
| 	 */ | ||||
| 	fill_first_blocks(blockhash, instance); | ||||
| 	/* Clearing the hash */ | ||||
| 	clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH); | ||||
| 
 | ||||
| 	return ARGON2_OK; | ||||
| } | ||||
							
								
								
									
										245
									
								
								src/argon2_core.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										245
									
								
								src/argon2_core.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,245 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #ifndef ARGON2_CORE_H | ||||
| #define ARGON2_CORE_H | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include "argon2.h" | ||||
| 
 | ||||
| #if defined(__cplusplus) | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| #define CONST_CAST(x) (x)(uintptr_t) | ||||
| 
 | ||||
|  /**********************Argon2 internal constants*******************************/ | ||||
| 
 | ||||
| enum argon2_core_constants { | ||||
| 	/* Memory block size in bytes */ | ||||
| 	ARGON2_BLOCK_SIZE = 1024, | ||||
| 	ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8, | ||||
| 	ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16, | ||||
| 	ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32, | ||||
| 	ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64, | ||||
| 
 | ||||
| 	/* Number of pseudo-random values generated by one call to Blake in Argon2i
 | ||||
| 	   to | ||||
| 	   generate reference block positions */ | ||||
| 	ARGON2_ADDRESSES_IN_BLOCK = 128, | ||||
| 
 | ||||
| 	/* Pre-hashing digest length and its extension*/ | ||||
| 	ARGON2_PREHASH_DIGEST_LENGTH = 64, | ||||
| 	ARGON2_PREHASH_SEED_LENGTH = 72 | ||||
| }; | ||||
| 
 | ||||
| /*************************Argon2 internal data types***********************/ | ||||
| 
 | ||||
| /*
 | ||||
|  * Structure for the (1KB) memory block implemented as 128 64-bit words. | ||||
|  * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no | ||||
|  * bounds checking). | ||||
|  */ | ||||
| typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block; | ||||
| 
 | ||||
| /*****************Functions that work with the block******************/ | ||||
| 
 | ||||
| /* Initialize each byte of the block with @in */ | ||||
| void init_block_value(block *b, uint8_t in); | ||||
| 
 | ||||
| /* Copy block @src to block @dst */ | ||||
| void copy_block(block *dst, const block *src); | ||||
| 
 | ||||
| /* XOR @src onto @dst bytewise */ | ||||
| void xor_block(block *dst, const block *src); | ||||
| 
 | ||||
| /*
 | ||||
|  * Argon2 instance: memory pointer, number of passes, amount of memory, type, | ||||
|  * and derived values. | ||||
|  * Used to evaluate the number and location of blocks to construct in each | ||||
|  * thread | ||||
|  */ | ||||
| typedef struct Argon2_instance_t { | ||||
| 	block *memory;          /* Memory pointer */ | ||||
| 	uint32_t version; | ||||
| 	uint32_t passes;        /* Number of passes */ | ||||
| 	uint32_t memory_blocks; /* Number of blocks in memory */ | ||||
| 	uint32_t segment_length; | ||||
| 	uint32_t lane_length; | ||||
| 	uint32_t lanes; | ||||
| 	uint32_t threads; | ||||
| 	argon2_type type; | ||||
| 	int print_internals; /* whether to print the memory blocks */ | ||||
| 	argon2_context *context_ptr; /* points back to original context */ | ||||
| } argon2_instance_t; | ||||
| 
 | ||||
| /*
 | ||||
|  * Argon2 position: where we construct the block right now. Used to distribute | ||||
|  * work between threads. | ||||
|  */ | ||||
| typedef struct Argon2_position_t { | ||||
| 	uint32_t pass; | ||||
| 	uint32_t lane; | ||||
| 	uint8_t slice; | ||||
| 	uint32_t index; | ||||
| } argon2_position_t; | ||||
| 
 | ||||
| /*Struct that holds the inputs for thread handling FillSegment*/ | ||||
| typedef struct Argon2_thread_data { | ||||
| 	argon2_instance_t *instance_ptr; | ||||
| 	argon2_position_t pos; | ||||
| } argon2_thread_data; | ||||
| 
 | ||||
| /*************************Argon2 core functions********************************/ | ||||
| 
 | ||||
| /* Allocates memory to the given pointer, uses the appropriate allocator as
 | ||||
|  * specified in the context. Total allocated memory is num*size. | ||||
|  * @param context argon2_context which specifies the allocator | ||||
|  * @param memory pointer to the pointer to the memory | ||||
|  * @param size the size in bytes for each element to be allocated | ||||
|  * @param num the number of elements to be allocated | ||||
|  * @return ARGON2_OK if @memory is a valid pointer and memory is allocated | ||||
|  */ | ||||
| int allocate_memory(const argon2_context *context, uint8_t **memory, | ||||
| 	size_t num, size_t size); | ||||
| 
 | ||||
| /*
 | ||||
|  * Frees memory at the given pointer, uses the appropriate deallocator as | ||||
|  * specified in the context. Also cleans the memory using clear_internal_memory. | ||||
|  * @param context argon2_context which specifies the deallocator | ||||
|  * @param memory pointer to buffer to be freed | ||||
|  * @param size the size in bytes for each element to be deallocated | ||||
|  * @param num the number of elements to be deallocated | ||||
|  */ | ||||
| void free_memory(const argon2_context *context, uint8_t *memory, | ||||
| 	size_t num, size_t size); | ||||
| 
 | ||||
| /* Function that securely cleans the memory. This ignores any flags set
 | ||||
|  * regarding clearing memory. Usually one just calls clear_internal_memory. | ||||
|  * @param mem Pointer to the memory | ||||
|  * @param s Memory size in bytes | ||||
|  */ | ||||
| void secure_wipe_memory(void *v, size_t n); | ||||
| 
 | ||||
| /* Function that securely clears the memory if FLAG_clear_internal_memory is
 | ||||
|  * set. If the flag isn't set, this function does nothing. | ||||
|  * @param mem Pointer to the memory | ||||
|  * @param s Memory size in bytes | ||||
|  */ | ||||
| void clear_internal_memory(void *v, size_t n); | ||||
| 
 | ||||
| /*
 | ||||
|  * Computes absolute position of reference block in the lane following a skewed | ||||
|  * distribution and using a pseudo-random value as input | ||||
|  * @param instance Pointer to the current instance | ||||
|  * @param position Pointer to the current position | ||||
|  * @param pseudo_rand 32-bit pseudo-random value used to determine the position | ||||
|  * @param same_lane Indicates if the block will be taken from the current lane. | ||||
|  * If so we can reference the current segment | ||||
|  * @pre All pointers must be valid | ||||
|  */ | ||||
| uint32_t index_alpha(const argon2_instance_t *instance, | ||||
| 	const argon2_position_t *position, uint32_t pseudo_rand, | ||||
| 	int same_lane); | ||||
| 
 | ||||
| /*
 | ||||
|  * Function that validates all inputs against predefined restrictions and return | ||||
|  * an error code | ||||
|  * @param context Pointer to current Argon2 context | ||||
|  * @return ARGON2_OK if everything is all right, otherwise one of error codes | ||||
|  * (all defined in <argon2.h> | ||||
|  */ | ||||
| int validate_inputs(const argon2_context *context); | ||||
| 
 | ||||
| /*
 | ||||
|  * Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears | ||||
|  * password and secret if needed | ||||
|  * @param  context  Pointer to the Argon2 internal structure containing memory | ||||
|  * pointer, and parameters for time and space requirements. | ||||
|  * @param  blockhash Buffer for pre-hashing digest | ||||
|  * @param  type Argon2 type | ||||
|  * @pre    @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes | ||||
|  * allocated | ||||
|  */ | ||||
| void initial_hash(uint8_t *blockhash, argon2_context *context, | ||||
| 	argon2_type type); | ||||
| 
 | ||||
| /*
 | ||||
|  * Function creates first 2 blocks per lane | ||||
|  * @param instance Pointer to the current instance | ||||
|  * @param blockhash Pointer to the pre-hashing digest | ||||
|  * @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values | ||||
|  */ | ||||
| void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance); | ||||
| 
 | ||||
| /*
 | ||||
|  * Function allocates memory, hashes the inputs with Blake,  and creates first | ||||
|  * two blocks. Returns the pointer to the main memory with 2 blocks per lane | ||||
|  * initialized | ||||
|  * @param  context  Pointer to the Argon2 internal structure containing memory | ||||
|  * pointer, and parameters for time and space requirements. | ||||
|  * @param  instance Current Argon2 instance | ||||
|  * @return Zero if successful, -1 if memory failed to allocate. @context->state | ||||
|  * will be modified if successful. | ||||
|  */ | ||||
| int initialize(argon2_instance_t *instance, argon2_context *context); | ||||
| 
 | ||||
| /*
 | ||||
|  * XORing the last block of each lane, hashing it, making the tag. Deallocates | ||||
|  * the memory. | ||||
|  * @param context Pointer to current Argon2 context (use only the out parameters | ||||
|  * from it) | ||||
|  * @param instance Pointer to current instance of Argon2 | ||||
|  * @pre instance->state must point to necessary amount of memory | ||||
|  * @pre context->out must point to outlen bytes of memory | ||||
|  * @pre if context->free_cbk is not NULL, it should point to a function that | ||||
|  * deallocates memory | ||||
|  */ | ||||
| void finalize(const argon2_context *context, argon2_instance_t *instance); | ||||
| 
 | ||||
| /*
 | ||||
|  * Function that fills the segment using previous segments also from other | ||||
|  * threads | ||||
|  * @param context current context | ||||
|  * @param instance Pointer to the current instance | ||||
|  * @param position Current position | ||||
|  * @pre all block pointers must be valid | ||||
|  */ | ||||
| void fill_segment(const argon2_instance_t *instance, | ||||
| 	argon2_position_t position); | ||||
| 
 | ||||
| /*
 | ||||
|  * Function that fills the entire memory t_cost times based on the first two | ||||
|  * blocks in each lane | ||||
|  * @param instance Pointer to the current instance | ||||
|  * @return ARGON2_OK if successful, @context->state | ||||
|  */ | ||||
| int fill_memory_blocks(argon2_instance_t *instance); | ||||
| 
 | ||||
| #if defined(__cplusplus) | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										205
									
								
								src/argon2_ref.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								src/argon2_ref.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,205 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <string.h> | ||||
| #include <stdlib.h> | ||||
| 
 | ||||
| #include "argon2.h" | ||||
| #include "argon2_core.h" | ||||
| 
 | ||||
| #include "blake2/blamka-round-ref.h" | ||||
| #include "blake2/blake2-impl.h" | ||||
| #include "blake2/blake2.h" | ||||
| 
 | ||||
|  /*
 | ||||
|   * Function fills a new memory block and optionally XORs the old block over the new one. | ||||
|   * @next_block must be initialized. | ||||
|   * @param prev_block Pointer to the previous block | ||||
|   * @param ref_block Pointer to the reference block | ||||
|   * @param next_block Pointer to the block to be constructed | ||||
|   * @param with_xor Whether to XOR into the new block (1) or just overwrite (0) | ||||
|   * @pre all block pointers must be valid | ||||
|   */ | ||||
| static void fill_block(const block *prev_block, const block *ref_block, | ||||
| 	block *next_block, int with_xor) { | ||||
| 	block blockR, block_tmp; | ||||
| 	unsigned i; | ||||
| 
 | ||||
| 	copy_block(&blockR, ref_block); | ||||
| 	xor_block(&blockR, prev_block); | ||||
| 	copy_block(&block_tmp, &blockR); | ||||
| 	/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */ | ||||
| 	if (with_xor) { | ||||
| 		/* Saving the next block contents for XOR over: */ | ||||
| 		xor_block(&block_tmp, next_block); | ||||
| 		/* Now blockR = ref_block + prev_block and
 | ||||
| 		   block_tmp = ref_block + prev_block + next_block */ | ||||
| 	} | ||||
| 
 | ||||
| 	/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
 | ||||
| 	   (16,17,..31)... finally (112,113,...127) */ | ||||
| 	for (i = 0; i < 8; ++i) { | ||||
| 		BLAKE2_ROUND_NOMSG( | ||||
| 			blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], | ||||
| 			blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], | ||||
| 			blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], | ||||
| 			blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], | ||||
| 			blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], | ||||
| 			blockR.v[16 * i + 15]); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
 | ||||
| 	   (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ | ||||
| 	for (i = 0; i < 8; i++) { | ||||
| 		BLAKE2_ROUND_NOMSG( | ||||
| 			blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], | ||||
| 			blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], | ||||
| 			blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], | ||||
| 			blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], | ||||
| 			blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], | ||||
| 			blockR.v[2 * i + 113]); | ||||
| 	} | ||||
| 
 | ||||
| 	copy_block(next_block, &block_tmp); | ||||
| 	xor_block(next_block, &blockR); | ||||
| } | ||||
| 
 | ||||
| static void next_addresses(block *address_block, block *input_block, | ||||
| 	const block *zero_block) { | ||||
| 	input_block->v[6]++; | ||||
| 	fill_block(zero_block, input_block, address_block, 0); | ||||
| 	fill_block(zero_block, address_block, address_block, 0); | ||||
| } | ||||
| 
 | ||||
| void fill_segment(const argon2_instance_t *instance, | ||||
| 	argon2_position_t position) { | ||||
| 	block *ref_block = NULL, *curr_block = NULL; | ||||
| 	block address_block, input_block, zero_block; | ||||
| 	uint64_t pseudo_rand, ref_index, ref_lane; | ||||
| 	uint32_t prev_offset, curr_offset; | ||||
| 	uint32_t starting_index; | ||||
| 	uint32_t i; | ||||
| 	int data_independent_addressing; | ||||
| 
 | ||||
| 	if (instance == NULL) { | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	data_independent_addressing = | ||||
| 		(instance->type == Argon2_i) || | ||||
| 		(instance->type == Argon2_id && (position.pass == 0) && | ||||
| 		(position.slice < ARGON2_SYNC_POINTS / 2)); | ||||
| 
 | ||||
| 	if (data_independent_addressing) { | ||||
| 		init_block_value(&zero_block, 0); | ||||
| 		init_block_value(&input_block, 0); | ||||
| 
 | ||||
| 		input_block.v[0] = position.pass; | ||||
| 		input_block.v[1] = position.lane; | ||||
| 		input_block.v[2] = position.slice; | ||||
| 		input_block.v[3] = instance->memory_blocks; | ||||
| 		input_block.v[4] = instance->passes; | ||||
| 		input_block.v[5] = instance->type; | ||||
| 	} | ||||
| 
 | ||||
| 	starting_index = 0; | ||||
| 
 | ||||
| 	if ((0 == position.pass) && (0 == position.slice)) { | ||||
| 		starting_index = 2; /* we have already generated the first two blocks */ | ||||
| 
 | ||||
| 		/* Don't forget to generate the first block of addresses: */ | ||||
| 		if (data_independent_addressing) { | ||||
| 			next_addresses(&address_block, &input_block, &zero_block); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* Offset of the current block */ | ||||
| 	curr_offset = position.lane * instance->lane_length + | ||||
| 		position.slice * instance->segment_length + starting_index; | ||||
| 
 | ||||
| 	if (0 == curr_offset % instance->lane_length) { | ||||
| 		/* Last block in this lane */ | ||||
| 		prev_offset = curr_offset + instance->lane_length - 1; | ||||
| 	} | ||||
| 	else { | ||||
| 		/* Previous block */ | ||||
| 		prev_offset = curr_offset - 1; | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = starting_index; i < instance->segment_length; | ||||
| 		++i, ++curr_offset, ++prev_offset) { | ||||
| 		/*1.1 Rotating prev_offset if needed */ | ||||
| 		if (curr_offset % instance->lane_length == 1) { | ||||
| 			prev_offset = curr_offset - 1; | ||||
| 		} | ||||
| 
 | ||||
| 		/* 1.2 Computing the index of the reference block */ | ||||
| 		/* 1.2.1 Taking pseudo-random value from the previous block */ | ||||
| 		if (data_independent_addressing) { | ||||
| 			if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { | ||||
| 				next_addresses(&address_block, &input_block, &zero_block); | ||||
| 			} | ||||
| 			pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; | ||||
| 		} | ||||
| 		else { | ||||
| 			pseudo_rand = instance->memory[prev_offset].v[0]; | ||||
| 		} | ||||
| 
 | ||||
| 		/* 1.2.2 Computing the lane of the reference block */ | ||||
| 		ref_lane = ((pseudo_rand >> 32)) % instance->lanes; | ||||
| 
 | ||||
| 		if ((position.pass == 0) && (position.slice == 0)) { | ||||
| 			/* Can not reference other lanes yet */ | ||||
| 			ref_lane = position.lane; | ||||
| 		} | ||||
| 
 | ||||
| 		/* 1.2.3 Computing the number of possible reference block within the
 | ||||
| 		 * lane. | ||||
| 		 */ | ||||
| 		position.index = i; | ||||
| 		ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, | ||||
| 			ref_lane == position.lane); | ||||
| 
 | ||||
| 		/* 2 Creating a new block */ | ||||
| 		ref_block = | ||||
| 			instance->memory + instance->lane_length * ref_lane + ref_index; | ||||
| 		curr_block = instance->memory + curr_offset; | ||||
| 		if (ARGON2_VERSION_10 == instance->version) { | ||||
| 			/* version 1.2.1 and earlier: overwrite, not XOR */ | ||||
| 			fill_block(instance->memory + prev_offset, ref_block, curr_block, 0); | ||||
| 		} | ||||
| 		else { | ||||
| 			if (0 == position.pass) { | ||||
| 				fill_block(instance->memory + prev_offset, ref_block, | ||||
| 					curr_block, 0); | ||||
| 			} | ||||
| 			else { | ||||
| 				fill_block(instance->memory + prev_offset, ref_block, | ||||
| 					curr_block, 1); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										162
									
								
								src/blake2/blake2-impl.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								src/blake2/blake2-impl.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,162 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #ifndef PORTABLE_BLAKE2_IMPL_H | ||||
| #define PORTABLE_BLAKE2_IMPL_H | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| #define BLAKE2_INLINE __inline | ||||
| #elif defined(__GNUC__) || defined(__clang__) | ||||
| #define BLAKE2_INLINE __inline__ | ||||
| #else | ||||
| #define BLAKE2_INLINE | ||||
| #endif | ||||
| 
 | ||||
|  /* Argon2 Team - Begin Code */ | ||||
|  /*
 | ||||
| 	Not an exhaustive list, but should cover the majority of modern platforms | ||||
| 	Additionally, the code will always be correct---this is only a performance | ||||
| 	tweak. | ||||
|  */ | ||||
| #if (defined(__BYTE_ORDER__) &&                                                \ | ||||
|      (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) ||                           \ | ||||
|     defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \ | ||||
|     defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) ||       \ | ||||
|     defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) ||                \ | ||||
|     defined(_M_ARM) | ||||
| #define NATIVE_LITTLE_ENDIAN | ||||
| #endif | ||||
|  /* Argon2 Team - End Code */ | ||||
| 
 | ||||
| static BLAKE2_INLINE uint32_t load32(const void *src) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| 	uint32_t w; | ||||
| 	memcpy(&w, src, sizeof w); | ||||
| 	return w; | ||||
| #else | ||||
| 	const uint8_t *p = (const uint8_t *)src; | ||||
| 	uint32_t w = *p++; | ||||
| 	w |= (uint32_t)(*p++) << 8; | ||||
| 	w |= (uint32_t)(*p++) << 16; | ||||
| 	w |= (uint32_t)(*p++) << 24; | ||||
| 	return w; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE uint64_t load64(const void *src) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| 	uint64_t w; | ||||
| 	memcpy(&w, src, sizeof w); | ||||
| 	return w; | ||||
| #else | ||||
| 	const uint8_t *p = (const uint8_t *)src; | ||||
| 	uint64_t w = *p++; | ||||
| 	w |= (uint64_t)(*p++) << 8; | ||||
| 	w |= (uint64_t)(*p++) << 16; | ||||
| 	w |= (uint64_t)(*p++) << 24; | ||||
| 	w |= (uint64_t)(*p++) << 32; | ||||
| 	w |= (uint64_t)(*p++) << 40; | ||||
| 	w |= (uint64_t)(*p++) << 48; | ||||
| 	w |= (uint64_t)(*p++) << 56; | ||||
| 	return w; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void store32(void *dst, uint32_t w) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| 	memcpy(dst, &w, sizeof w); | ||||
| #else | ||||
| 	uint8_t *p = (uint8_t *)dst; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void store64(void *dst, uint64_t w) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| 	memcpy(dst, &w, sizeof w); | ||||
| #else | ||||
| 	uint8_t *p = (uint8_t *)dst; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE uint64_t load48(const void *src) { | ||||
| 	const uint8_t *p = (const uint8_t *)src; | ||||
| 	uint64_t w = *p++; | ||||
| 	w |= (uint64_t)(*p++) << 8; | ||||
| 	w |= (uint64_t)(*p++) << 16; | ||||
| 	w |= (uint64_t)(*p++) << 24; | ||||
| 	w |= (uint64_t)(*p++) << 32; | ||||
| 	w |= (uint64_t)(*p++) << 40; | ||||
| 	return w; | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void store48(void *dst, uint64_t w) { | ||||
| 	uint8_t *p = (uint8_t *)dst; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| 	w >>= 8; | ||||
| 	*p++ = (uint8_t)w; | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { | ||||
| 	return (w >> c) | (w << (32 - c)); | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { | ||||
| 	return (w >> c) | (w << (64 - c)); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										98
									
								
								src/blake2/blake2.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								src/blake2/blake2.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,98 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #ifndef PORTABLE_BLAKE2_H | ||||
| #define PORTABLE_BLAKE2_H | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <limits.h> | ||||
| 
 | ||||
| #if defined(__cplusplus) | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| 	enum blake2b_constant { | ||||
| 		BLAKE2B_BLOCKBYTES = 128, | ||||
| 		BLAKE2B_OUTBYTES = 64, | ||||
| 		BLAKE2B_KEYBYTES = 64, | ||||
| 		BLAKE2B_SALTBYTES = 16, | ||||
| 		BLAKE2B_PERSONALBYTES = 16 | ||||
| 	}; | ||||
| 
 | ||||
| #pragma pack(push, 1) | ||||
| 	typedef struct __blake2b_param { | ||||
| 		uint8_t digest_length;                   /* 1 */ | ||||
| 		uint8_t key_length;                      /* 2 */ | ||||
| 		uint8_t fanout;                          /* 3 */ | ||||
| 		uint8_t depth;                           /* 4 */ | ||||
| 		uint32_t leaf_length;                    /* 8 */ | ||||
| 		uint64_t node_offset;                    /* 16 */ | ||||
| 		uint8_t node_depth;                      /* 17 */ | ||||
| 		uint8_t inner_length;                    /* 18 */ | ||||
| 		uint8_t reserved[14];                    /* 32 */ | ||||
| 		uint8_t salt[BLAKE2B_SALTBYTES];         /* 48 */ | ||||
| 		uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ | ||||
| 	} blake2b_param; | ||||
| #pragma pack(pop) | ||||
| 
 | ||||
| 	typedef struct __blake2b_state { | ||||
| 		uint64_t h[8]; | ||||
| 		uint64_t t[2]; | ||||
| 		uint64_t f[2]; | ||||
| 		uint8_t buf[BLAKE2B_BLOCKBYTES]; | ||||
| 		unsigned buflen; | ||||
| 		unsigned outlen; | ||||
| 		uint8_t last_node; | ||||
| 	} blake2b_state; | ||||
| 
 | ||||
| 	/* Ensure param structs have not been wrongly padded */ | ||||
| 	/* Poor man's static_assert */ | ||||
| 	enum { | ||||
| 		blake2_size_check_0 = 1 / !!(CHAR_BIT == 8), | ||||
| 		blake2_size_check_2 = | ||||
| 		1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT) | ||||
| 	}; | ||||
| 
 | ||||
| 	/* Streaming API */ | ||||
| 	int blake2b_init(blake2b_state *S, size_t outlen); | ||||
| 	int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, | ||||
| 		size_t keylen); | ||||
| 	int blake2b_init_param(blake2b_state *S, const blake2b_param *P); | ||||
| 	int blake2b_update(blake2b_state *S, const void *in, size_t inlen); | ||||
| 	int blake2b_final(blake2b_state *S, void *out, size_t outlen); | ||||
| 
 | ||||
| 	/* Simple API */ | ||||
| 	int blake2b(void *out, size_t outlen, const void *in, size_t inlen, | ||||
| 		const void *key, size_t keylen); | ||||
| 
 | ||||
| 	/* Argon2 Team - Begin Code */ | ||||
| 	int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen); | ||||
| 	/* Argon2 Team - End Code */ | ||||
| 
 | ||||
| #if defined(__cplusplus) | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										400
									
								
								src/blake2/blake2b.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										400
									
								
								src/blake2/blake2b.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,400 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <string.h> | ||||
| #include <stdio.h> | ||||
| 
 | ||||
| #include "blake2.h" | ||||
| #include "blake2-impl.h" | ||||
| 
 | ||||
| static const uint64_t blake2b_IV[8] = { | ||||
| 	UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b), | ||||
| 	UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1), | ||||
| 	UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f), | ||||
| 	UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) }; | ||||
| 
 | ||||
| static const unsigned int blake2b_sigma[12][16] = { | ||||
| 	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, | ||||
| 	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, | ||||
| 	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, | ||||
| 	{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, | ||||
| 	{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, | ||||
| 	{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, | ||||
| 	{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, | ||||
| 	{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, | ||||
| 	{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, | ||||
| 	{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, | ||||
| 	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, | ||||
| 	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, | ||||
| }; | ||||
| 
 | ||||
| static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) { | ||||
| 	S->f[1] = (uint64_t)-1; | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) { | ||||
| 	if (S->last_node) { | ||||
| 		blake2b_set_lastnode(S); | ||||
| 	} | ||||
| 	S->f[0] = (uint64_t)-1; | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S, | ||||
| 	uint64_t inc) { | ||||
| 	S->t[0] += inc; | ||||
| 	S->t[1] += (S->t[0] < inc); | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) { | ||||
| 	//clear_internal_memory(S, sizeof(*S));      /* wipe */
 | ||||
| 	blake2b_set_lastblock(S); /* invalidate for further use */ | ||||
| } | ||||
| 
 | ||||
| static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) { | ||||
| 	memset(S, 0, sizeof(*S)); | ||||
| 	memcpy(S->h, blake2b_IV, sizeof(S->h)); | ||||
| } | ||||
| 
 | ||||
| int blake2b_init_param(blake2b_state *S, const blake2b_param *P) { | ||||
| 	const unsigned char *p = (const unsigned char *)P; | ||||
| 	unsigned int i; | ||||
| 
 | ||||
| 	if (NULL == P || NULL == S) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	blake2b_init0(S); | ||||
| 	/* IV XOR Parameter Block */ | ||||
| 	for (i = 0; i < 8; ++i) { | ||||
| 		S->h[i] ^= load64(&p[i * sizeof(S->h[i])]); | ||||
| 	} | ||||
| 	S->outlen = P->digest_length; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /* Sequential blake2b initialization */ | ||||
| int blake2b_init(blake2b_state *S, size_t outlen) { | ||||
| 	blake2b_param P; | ||||
| 
 | ||||
| 	if (S == NULL) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { | ||||
| 		blake2b_invalidate_state(S); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Setup Parameter Block for unkeyed BLAKE2 */ | ||||
| 	P.digest_length = (uint8_t)outlen; | ||||
| 	P.key_length = 0; | ||||
| 	P.fanout = 1; | ||||
| 	P.depth = 1; | ||||
| 	P.leaf_length = 0; | ||||
| 	P.node_offset = 0; | ||||
| 	P.node_depth = 0; | ||||
| 	P.inner_length = 0; | ||||
| 	memset(P.reserved, 0, sizeof(P.reserved)); | ||||
| 	memset(P.salt, 0, sizeof(P.salt)); | ||||
| 	memset(P.personal, 0, sizeof(P.personal)); | ||||
| 
 | ||||
| 	return blake2b_init_param(S, &P); | ||||
| } | ||||
| 
 | ||||
| int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) { | ||||
| 	blake2b_param P; | ||||
| 
 | ||||
| 	if (S == NULL) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) { | ||||
| 		blake2b_invalidate_state(S); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) { | ||||
| 		blake2b_invalidate_state(S); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Setup Parameter Block for keyed BLAKE2 */ | ||||
| 	P.digest_length = (uint8_t)outlen; | ||||
| 	P.key_length = (uint8_t)keylen; | ||||
| 	P.fanout = 1; | ||||
| 	P.depth = 1; | ||||
| 	P.leaf_length = 0; | ||||
| 	P.node_offset = 0; | ||||
| 	P.node_depth = 0; | ||||
| 	P.inner_length = 0; | ||||
| 	memset(P.reserved, 0, sizeof(P.reserved)); | ||||
| 	memset(P.salt, 0, sizeof(P.salt)); | ||||
| 	memset(P.personal, 0, sizeof(P.personal)); | ||||
| 
 | ||||
| 	if (blake2b_init_param(S, &P) < 0) { | ||||
| 		blake2b_invalidate_state(S); | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	{ | ||||
| 		uint8_t block[BLAKE2B_BLOCKBYTES]; | ||||
| 		memset(block, 0, BLAKE2B_BLOCKBYTES); | ||||
| 		memcpy(block, key, keylen); | ||||
| 		blake2b_update(S, block, BLAKE2B_BLOCKBYTES); | ||||
| 		/* Burn the key from stack */ | ||||
| 		//clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
 | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void blake2b_compress(blake2b_state *S, const uint8_t *block) { | ||||
| 	uint64_t m[16]; | ||||
| 	uint64_t v[16]; | ||||
| 	unsigned int i, r; | ||||
| 
 | ||||
| 	for (i = 0; i < 16; ++i) { | ||||
| 		m[i] = load64(block + i * sizeof(m[i])); | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < 8; ++i) { | ||||
| 		v[i] = S->h[i]; | ||||
| 	} | ||||
| 
 | ||||
| 	v[8] = blake2b_IV[0]; | ||||
| 	v[9] = blake2b_IV[1]; | ||||
| 	v[10] = blake2b_IV[2]; | ||||
| 	v[11] = blake2b_IV[3]; | ||||
| 	v[12] = blake2b_IV[4] ^ S->t[0]; | ||||
| 	v[13] = blake2b_IV[5] ^ S->t[1]; | ||||
| 	v[14] = blake2b_IV[6] ^ S->f[0]; | ||||
| 	v[15] = blake2b_IV[7] ^ S->f[1]; | ||||
| 
 | ||||
| #define G(r, i, a, b, c, d)                                                    \ | ||||
|     do {                                                                       \ | ||||
|         a = a + b + m[blake2b_sigma[r][2 * i + 0]];                            \ | ||||
|         d = rotr64(d ^ a, 32);                                                 \ | ||||
|         c = c + d;                                                             \ | ||||
|         b = rotr64(b ^ c, 24);                                                 \ | ||||
|         a = a + b + m[blake2b_sigma[r][2 * i + 1]];                            \ | ||||
|         d = rotr64(d ^ a, 16);                                                 \ | ||||
|         c = c + d;                                                             \ | ||||
|         b = rotr64(b ^ c, 63);                                                 \ | ||||
|     } while ((void)0, 0) | ||||
| 
 | ||||
| #define ROUND(r)                                                               \ | ||||
|     do {                                                                       \ | ||||
|         G(r, 0, v[0], v[4], v[8], v[12]);                                      \ | ||||
|         G(r, 1, v[1], v[5], v[9], v[13]);                                      \ | ||||
|         G(r, 2, v[2], v[6], v[10], v[14]);                                     \ | ||||
|         G(r, 3, v[3], v[7], v[11], v[15]);                                     \ | ||||
|         G(r, 4, v[0], v[5], v[10], v[15]);                                     \ | ||||
|         G(r, 5, v[1], v[6], v[11], v[12]);                                     \ | ||||
|         G(r, 6, v[2], v[7], v[8], v[13]);                                      \ | ||||
|         G(r, 7, v[3], v[4], v[9], v[14]);                                      \ | ||||
|     } while ((void)0, 0) | ||||
| 
 | ||||
| 	for (r = 0; r < 12; ++r) { | ||||
| 		ROUND(r); | ||||
| 	} | ||||
| 
 | ||||
| 	for (i = 0; i < 8; ++i) { | ||||
| 		S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; | ||||
| 	} | ||||
| 
 | ||||
| #undef G | ||||
| #undef ROUND | ||||
| } | ||||
| 
 | ||||
| int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { | ||||
| 	const uint8_t *pin = (const uint8_t *)in; | ||||
| 
 | ||||
| 	if (inlen == 0) { | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Sanity check */ | ||||
| 	if (S == NULL || in == NULL) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Is this a reused state? */ | ||||
| 	if (S->f[0] != 0) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { | ||||
| 		/* Complete current block */ | ||||
| 		size_t left = S->buflen; | ||||
| 		size_t fill = BLAKE2B_BLOCKBYTES - left; | ||||
| 		memcpy(&S->buf[left], pin, fill); | ||||
| 		blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); | ||||
| 		blake2b_compress(S, S->buf); | ||||
| 		S->buflen = 0; | ||||
| 		inlen -= fill; | ||||
| 		pin += fill; | ||||
| 		/* Avoid buffer copies when possible */ | ||||
| 		while (inlen > BLAKE2B_BLOCKBYTES) { | ||||
| 			blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); | ||||
| 			blake2b_compress(S, pin); | ||||
| 			inlen -= BLAKE2B_BLOCKBYTES; | ||||
| 			pin += BLAKE2B_BLOCKBYTES; | ||||
| 		} | ||||
| 	} | ||||
| 	memcpy(&S->buf[S->buflen], pin, inlen); | ||||
| 	S->buflen += (unsigned int)inlen; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int blake2b_final(blake2b_state *S, void *out, size_t outlen) { | ||||
| 	uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 }; | ||||
| 	unsigned int i; | ||||
| 
 | ||||
| 	/* Sanity checks */ | ||||
| 	if (S == NULL || out == NULL || outlen < S->outlen) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Is this a reused state? */ | ||||
| 	if (S->f[0] != 0) { | ||||
| 		return -1; | ||||
| 	} | ||||
| 
 | ||||
| 	blake2b_increment_counter(S, S->buflen); | ||||
| 	blake2b_set_lastblock(S); | ||||
| 	memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ | ||||
| 	blake2b_compress(S, S->buf); | ||||
| 
 | ||||
| 	for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ | ||||
| 		store64(buffer + sizeof(S->h[i]) * i, S->h[i]); | ||||
| 	} | ||||
| 
 | ||||
| 	memcpy(out, buffer, S->outlen); | ||||
| 	//clear_internal_memory(buffer, sizeof(buffer));
 | ||||
| 	//clear_internal_memory(S->buf, sizeof(S->buf));
 | ||||
| 	//clear_internal_memory(S->h, sizeof(S->h));
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int blake2b(void *out, size_t outlen, const void *in, size_t inlen, | ||||
| 	const void *key, size_t keylen) { | ||||
| 	blake2b_state S; | ||||
| 	int ret = -1; | ||||
| 
 | ||||
| 	/* Verify parameters */ | ||||
| 	if (NULL == in && inlen > 0) { | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) { | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) { | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	if (keylen > 0) { | ||||
| 		if (blake2b_init_key(&S, outlen, key, keylen) < 0) { | ||||
| 			goto fail; | ||||
| 		} | ||||
| 	} | ||||
| 	else { | ||||
| 		if (blake2b_init(&S, outlen) < 0) { | ||||
| 			goto fail; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (blake2b_update(&S, in, inlen) < 0) { | ||||
| 		goto fail; | ||||
| 	} | ||||
| 	ret = blake2b_final(&S, out, outlen); | ||||
| 
 | ||||
| fail: | ||||
| 	//clear_internal_memory(&S, sizeof(S));
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /* Argon2 Team - Begin Code */ | ||||
| int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) { | ||||
| 	uint8_t *out = (uint8_t *)pout; | ||||
| 	blake2b_state blake_state; | ||||
| 	uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 }; | ||||
| 	int ret = -1; | ||||
| 
 | ||||
| 	if (outlen > UINT32_MAX) { | ||||
| 		goto fail; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Ensure little-endian byte order! */ | ||||
| 	store32(outlen_bytes, (uint32_t)outlen); | ||||
| 
 | ||||
| #define TRY(statement)                                                         \ | ||||
|     do {                                                                       \ | ||||
|         ret = statement;                                                       \ | ||||
|         if (ret < 0) {                                                         \ | ||||
|             goto fail;                                                         \ | ||||
|         }                                                                      \ | ||||
|     } while ((void)0, 0) | ||||
| 
 | ||||
| 	if (outlen <= BLAKE2B_OUTBYTES) { | ||||
| 		TRY(blake2b_init(&blake_state, outlen)); | ||||
| 		TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); | ||||
| 		TRY(blake2b_update(&blake_state, in, inlen)); | ||||
| 		TRY(blake2b_final(&blake_state, out, outlen)); | ||||
| 	} | ||||
| 	else { | ||||
| 		uint32_t toproduce; | ||||
| 		uint8_t out_buffer[BLAKE2B_OUTBYTES]; | ||||
| 		uint8_t in_buffer[BLAKE2B_OUTBYTES]; | ||||
| 		TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES)); | ||||
| 		TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes))); | ||||
| 		TRY(blake2b_update(&blake_state, in, inlen)); | ||||
| 		TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES)); | ||||
| 		memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); | ||||
| 		out += BLAKE2B_OUTBYTES / 2; | ||||
| 		toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2; | ||||
| 
 | ||||
| 		while (toproduce > BLAKE2B_OUTBYTES) { | ||||
| 			memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); | ||||
| 			TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, | ||||
| 				BLAKE2B_OUTBYTES, NULL, 0)); | ||||
| 			memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); | ||||
| 			out += BLAKE2B_OUTBYTES / 2; | ||||
| 			toproduce -= BLAKE2B_OUTBYTES / 2; | ||||
| 		} | ||||
| 
 | ||||
| 		memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); | ||||
| 		TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL, | ||||
| 			0)); | ||||
| 		memcpy(out, out_buffer, toproduce); | ||||
| 	} | ||||
| fail: | ||||
| 	//clear_internal_memory(&blake_state, sizeof(blake_state));
 | ||||
| 	return ret; | ||||
| #undef TRY | ||||
| } | ||||
| /* Argon2 Team - End Code */ | ||||
| 
 | ||||
							
								
								
									
										64
									
								
								src/blake2/blamka-round-ref.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								src/blake2/blamka-round-ref.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,64 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| /* Original code from Argon2 reference source code package used under CC0 Licence
 | ||||
|  * https://github.com/P-H-C/phc-winner-argon2
 | ||||
|  * Copyright 2015 | ||||
|  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves | ||||
| */ | ||||
| 
 | ||||
| #ifndef BLAKE_ROUND_MKA_H | ||||
| #define BLAKE_ROUND_MKA_H | ||||
| 
 | ||||
| #include "blake2.h" | ||||
| #include "blake2-impl.h" | ||||
| 
 | ||||
|  /* designed by the Lyra PHC team */ | ||||
| static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) { | ||||
| 	const uint64_t m = UINT64_C(0xFFFFFFFF); | ||||
| 	const uint64_t xy = (x & m) * (y & m); | ||||
| 	return x + y + 2 * xy; | ||||
| } | ||||
| 
 | ||||
| #define G(a, b, c, d)                                                          \ | ||||
|     do {                                                                       \ | ||||
|         a = fBlaMka(a, b);                                                     \ | ||||
|         d = rotr64(d ^ a, 32);                                                 \ | ||||
|         c = fBlaMka(c, d);                                                     \ | ||||
|         b = rotr64(b ^ c, 24);                                                 \ | ||||
|         a = fBlaMka(a, b);                                                     \ | ||||
|         d = rotr64(d ^ a, 16);                                                 \ | ||||
|         c = fBlaMka(c, d);                                                     \ | ||||
|         b = rotr64(b ^ c, 63);                                                 \ | ||||
|     } while ((void)0, 0) | ||||
| 
 | ||||
| #define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,   \ | ||||
|                            v12, v13, v14, v15)                                 \ | ||||
|     do {                                                                       \ | ||||
|         G(v0, v4, v8, v12);                                                    \ | ||||
|         G(v1, v5, v9, v13);                                                    \ | ||||
|         G(v2, v6, v10, v14);                                                   \ | ||||
|         G(v3, v7, v11, v15);                                                   \ | ||||
|         G(v0, v5, v10, v15);                                                   \ | ||||
|         G(v1, v6, v11, v12);                                                   \ | ||||
|         G(v2, v7, v8, v13);                                                    \ | ||||
|         G(v3, v4, v9, v14);                                                    \ | ||||
|     } while ((void)0, 0) | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										14075
									
								
								src/catch.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14075
									
								
								src/catch.hpp
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										110
									
								
								src/common.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/common.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,110 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <new> | ||||
| #include "intrinPortable.h" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	using addr_t = uint32_t; | ||||
| 
 | ||||
| 	constexpr int RoundToNearest = 0; | ||||
| 	constexpr int RoundDown = 1; | ||||
| 	constexpr int RoundUp = 2; | ||||
| 	constexpr int RoundToZero = 3; | ||||
| 
 | ||||
| 	constexpr int SeedSize = 32; | ||||
| 
 | ||||
| 	constexpr int CacheBlockSize = 1024; | ||||
| 	constexpr int BlockExpansionRatio = 64; | ||||
| 	constexpr uint32_t DatasetBlockSize = BlockExpansionRatio * CacheBlockSize; | ||||
| 	constexpr uint32_t DatasetBlockCount = 65536; | ||||
| 	constexpr uint32_t CacheSize = DatasetBlockCount * CacheBlockSize; | ||||
| 	constexpr uint64_t DatasetSize = (uint64_t)DatasetBlockCount * DatasetBlockSize; | ||||
| 
 | ||||
| 	constexpr int ArgonIterations = 12; | ||||
| 	constexpr uint32_t ArgonMemorySize = 65536; //KiB
 | ||||
| 	constexpr int ArgonLanes = 1; | ||||
| 	const char ArgonSalt[] = "Monero\x1A$"; | ||||
| 	constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1; | ||||
| 
 | ||||
| #ifdef TRACE | ||||
| 	constexpr bool trace = true; | ||||
| #else | ||||
| 	constexpr bool trace = false; | ||||
| #endif | ||||
| 
 | ||||
| 	typedef union { | ||||
| 		double f64; | ||||
| 		int64_t i64; | ||||
| 		uint64_t u64; | ||||
| 		int32_t i32; | ||||
| 		uint32_t u32; | ||||
| 	} convertible_t; | ||||
| 
 | ||||
| 	constexpr int ProgramLength = 512; | ||||
| 	constexpr int InstructionCount = 1024 * 1024; | ||||
| 	constexpr uint32_t ScratchpadSize = 256 * 1024; | ||||
| 	constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t); | ||||
| 	constexpr uint32_t ScratchpadL1 = ScratchpadSize / 16 / sizeof(convertible_t); | ||||
| 	constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t); | ||||
| 	constexpr int RegistersCount = 8; | ||||
| 
 | ||||
| 	struct LightClientMemory { | ||||
| 		uint8_t* cache; | ||||
| 		uint8_t* block; | ||||
| 		uint32_t blockNumber; | ||||
| 		alignas(16) __m128i keys[10]; | ||||
| 
 | ||||
| 		void* operator new(size_t size) { | ||||
| 			void* ptr = _mm_malloc(size, sizeof(__m128i)); | ||||
| 			if (ptr == nullptr) | ||||
| 				throw std::bad_alloc(); | ||||
| 			return ptr; | ||||
| 		} | ||||
| 
 | ||||
| 		void operator delete(void* ptr) { | ||||
| 			_mm_free(ptr); | ||||
| 		} | ||||
| 	}; | ||||
| 
 | ||||
| 	struct MemoryRegisters { | ||||
| 		addr_t ma, mx; | ||||
| 		union { | ||||
| 			uint8_t* dataset; | ||||
| 			LightClientMemory* lcm; | ||||
| 		}; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); | ||||
| 
 | ||||
| 	struct RegisterFile { | ||||
| 		convertible_t r[RegistersCount]; | ||||
| 		convertible_t f[RegistersCount]; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(RegisterFile) == 2 * RegistersCount * sizeof(convertible_t), "Invalid alignment of struct RandomX::RegisterFile"); | ||||
| 
 | ||||
| 	extern "C" { | ||||
| 		void executeProgram(RegisterFile& registerFile, convertible_t& scratchpad, MemoryRegisters& memory); | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										306
									
								
								src/dataset.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										306
									
								
								src/dataset.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,306 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| // Parts of this file are originally copyright (c) xmr-stak
 | ||||
| 
 | ||||
| #include "common.hpp" | ||||
| #include "dataset.hpp" | ||||
| #include "Pcg32.hpp" | ||||
| #include "argon2_core.h" | ||||
| #include <new> | ||||
| #include <algorithm> | ||||
| #include <stdexcept> | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| #if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2) | ||||
| #define __SSE2__ 1 | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__SSE2__) | ||||
| #include <wmmintrin.h> | ||||
| #define PREFETCH(memory) _mm_prefetch((const char *)((memory).dataset + (memory).ma), _MM_HINT_T0) | ||||
| #else | ||||
| #define PREFETCH(memory) | ||||
| #endif | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	void initializeCache(const void* input, size_t inputLength, void* memory) { | ||||
| 		uint32_t memory_blocks, segment_length; | ||||
| 		argon2_instance_t instance; | ||||
| 		argon2_context context; | ||||
| 
 | ||||
| 		context.out = nullptr; | ||||
| 		context.outlen = 0; | ||||
| 		context.pwd = CONST_CAST(uint8_t *)input; | ||||
| 		context.pwdlen = (uint32_t)inputLength; | ||||
| 		context.salt = CONST_CAST(uint8_t *)ArgonSalt; | ||||
| 		context.saltlen = (uint32_t)ArgonSaltSize; | ||||
| 		context.secret = NULL; | ||||
| 		context.secretlen = 0; | ||||
| 		context.ad = NULL; | ||||
| 		context.adlen = 0; | ||||
| 		context.t_cost = ArgonIterations; | ||||
| 		context.m_cost = ArgonMemorySize; | ||||
| 		context.lanes = ArgonLanes; | ||||
| 		context.threads = 1; | ||||
| 		context.allocate_cbk = NULL; | ||||
| 		context.free_cbk = NULL; | ||||
| 		context.flags = ARGON2_DEFAULT_FLAGS; | ||||
| 		context.version = ARGON2_VERSION_NUMBER; | ||||
| 
 | ||||
| 		/* 2. Align memory size */ | ||||
| 		/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */ | ||||
| 		memory_blocks = context.m_cost; | ||||
| 
 | ||||
| 		segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); | ||||
| 
 | ||||
| 		instance.version = context.version; | ||||
| 		instance.memory = NULL; | ||||
| 		instance.passes = context.t_cost; | ||||
| 		instance.memory_blocks = memory_blocks; | ||||
| 		instance.segment_length = segment_length; | ||||
| 		instance.lane_length = segment_length * ARGON2_SYNC_POINTS; | ||||
| 		instance.lanes = context.lanes; | ||||
| 		instance.threads = context.threads; | ||||
| 		instance.type = Argon2_d; | ||||
| 		instance.memory = (block*)memory; | ||||
| 
 | ||||
| 		if (instance.threads > instance.lanes) { | ||||
| 			instance.threads = instance.lanes; | ||||
| 		} | ||||
| 
 | ||||
| 		/* 3. Initialization: Hashing inputs, allocating memory, filling first
 | ||||
| 		 * blocks | ||||
| 		 */ | ||||
| 		initialize(&instance, &context); | ||||
| 
 | ||||
| 		fill_memory_blocks(&instance); | ||||
| 	} | ||||
| 
 | ||||
| 	// This will shift and xor tmp1 into itself as 4 32-bit vals such as
 | ||||
| 	// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
 | ||||
| 	static inline __m128i sl_xor(__m128i tmp1) { | ||||
| 		__m128i tmp4; | ||||
| 		tmp4 = _mm_slli_si128(tmp1, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		tmp4 = _mm_slli_si128(tmp4, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		tmp4 = _mm_slli_si128(tmp4, 0x04); | ||||
| 		tmp1 = _mm_xor_si128(tmp1, tmp4); | ||||
| 		return tmp1; | ||||
| 	} | ||||
| 
 | ||||
| 	template<uint8_t rcon, bool soft> | ||||
| 	static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) { | ||||
| 		__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon); | ||||
| 		xout1 = _mm_shuffle_epi32(xout1, 0xFF); | ||||
| 		*xout0 = sl_xor(*xout0); | ||||
| 		*xout0 = _mm_xor_si128(*xout0, xout1); | ||||
| 		xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00); | ||||
| 		xout1 = _mm_shuffle_epi32(xout1, 0xAA); | ||||
| 		*xout2 = sl_xor(*xout2); | ||||
| 		*xout2 = _mm_xor_si128(*xout2, xout1); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	void expandAesKeys(const __m128i* seed, __m128i* keys) { | ||||
| 		__m128i xout0, xout2; | ||||
| 		xout0 = _mm_load_si128(seed); | ||||
| 		xout2 = _mm_load_si128(seed + 1); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x01, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x02, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x04, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 		aes_genkey_sub<0x08, soft>(&xout0, &xout2); | ||||
| 		*keys++ = xout0; | ||||
| 		*keys++ = xout2; | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void expandAesKeys<true>(const __m128i* seed, __m128i* keys); | ||||
| 
 | ||||
| 	template | ||||
| 		void expandAesKeys<false>(const __m128i* seed, __m128i* keys); | ||||
| 
 | ||||
| 	template<typename T> | ||||
| 	static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) { | ||||
| 		auto count = bytes / sizeof(T); | ||||
| 		for (auto i = count - 1; i >= 1; --i) { | ||||
| 			int j = gen.getUniform(0, i); | ||||
| 			std::swap(buffer[j], buffer[i]); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft, bool enc> | ||||
| 	void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]) { | ||||
| 		__m128i xin, xout; | ||||
| 		//Initialization vector = block number extended to 128 bits
 | ||||
| 		xout = _mm_cvtsi32_si128(blockNumber); | ||||
| 		//Expand + AES
 | ||||
| 		for (uint32_t i = 0; i < DatasetBlockSize / sizeof(__m128i); ++i) { | ||||
| 			if ((i % 32) == 0) { | ||||
| 				xin = _mm_set_epi64x(*(uint64_t*)(in + i / 4), 0); | ||||
| 				xout = _mm_xor_si128(xin, xout); | ||||
| 			} | ||||
| 			if (enc) { | ||||
| 				xout = aesenc<soft>(xout, keys[0]); | ||||
| 				xout = aesenc<soft>(xout, keys[1]); | ||||
| 				xout = aesenc<soft>(xout, keys[2]); | ||||
| 				xout = aesenc<soft>(xout, keys[3]); | ||||
| 				xout = aesenc<soft>(xout, keys[4]); | ||||
| 				xout = aesenc<soft>(xout, keys[5]); | ||||
| 				xout = aesenc<soft>(xout, keys[6]); | ||||
| 				xout = aesenc<soft>(xout, keys[7]); | ||||
| 				xout = aesenc<soft>(xout, keys[8]); | ||||
| 				xout = aesenc<soft>(xout, keys[9]); | ||||
| 			} | ||||
| 			else { | ||||
| 				xout = aesdec<soft>(xout, keys[0]); | ||||
| 				xout = aesdec<soft>(xout, keys[1]); | ||||
| 				xout = aesdec<soft>(xout, keys[2]); | ||||
| 				xout = aesdec<soft>(xout, keys[3]); | ||||
| 				xout = aesdec<soft>(xout, keys[4]); | ||||
| 				xout = aesdec<soft>(xout, keys[5]); | ||||
| 				xout = aesdec<soft>(xout, keys[6]); | ||||
| 				xout = aesdec<soft>(xout, keys[7]); | ||||
| 				xout = aesdec<soft>(xout, keys[8]); | ||||
| 				xout = aesdec<soft>(xout, keys[9]); | ||||
| 			} | ||||
| 			_mm_store_si128((__m128i*)(out + i * sizeof(__m128i)), xout); | ||||
| 		} | ||||
| 		//Shuffle
 | ||||
| 		Pcg32 gen(&xout); | ||||
| 		shuffle<uint32_t>((uint32_t*)out, DatasetBlockSize, gen); | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<true, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<true, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<false, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 
 | ||||
| 	template | ||||
| 		void initBlock<false, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 
 | ||||
| 	convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) { | ||||
| 		convertible_t data; | ||||
| 		data.u64 = *(uint64_t*)(memory.dataset + memory.ma); | ||||
| 		memory.ma += 8; | ||||
| 		memory.mx ^= addr; | ||||
| 		if ((memory.mx & 0xFFF8) == 0) { | ||||
| 			memory.ma = memory.mx & ~7; | ||||
| 			PREFETCH(memory); | ||||
| 		} | ||||
| 		return data; | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i k[10]) { | ||||
| 		if (blockNumber % 2 == 1) { | ||||
| 			initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, k); | ||||
| 		} | ||||
| 		else { | ||||
| 			initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, k); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) { | ||||
| 		convertible_t data; | ||||
| 		auto blockNumber = memory.ma / DatasetBlockSize; | ||||
| 		if (memory.lcm->blockNumber != blockNumber) { | ||||
| 			initBlock<softAes>(memory.lcm->cache, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys); | ||||
| 			memory.lcm->blockNumber = blockNumber; | ||||
| 		} | ||||
| 		data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize)); | ||||
| 		memory.ma += 8; | ||||
| 		memory.mx ^= addr; | ||||
| 		if ((memory.mx & 0xFFF8) == 0) { | ||||
| 			memory.ma = memory.mx & ~7; | ||||
| 		} | ||||
| 		return data; | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		convertible_t datasetReadLight<false>(addr_t addr, MemoryRegisters& memory); | ||||
| 
 | ||||
| 	template | ||||
| 		convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInit(const void* seed, uint8_t*& dataset) { | ||||
| 		if (sizeof(size_t) <= 4) | ||||
| 			throw std::runtime_error("Platform doesn't support enough memory for the dataset"); | ||||
| 		dataset = (uint8_t*)_mm_malloc(DatasetSize, sizeof(__m128i)); | ||||
| 		if (dataset == nullptr) { | ||||
| 			throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed."); | ||||
| 		} | ||||
| 		uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize, sizeof(__m128i)); | ||||
| 		if (dataset == nullptr) { | ||||
| 			throw std::bad_alloc(); | ||||
| 		} | ||||
| 		initializeCache(seed, SeedSize, cache); | ||||
| 		alignas(16) __m128i keys[10]; | ||||
| 		expandAesKeys<softAes>((const __m128i*)seed, keys); | ||||
| 		for (uint32_t i = 0; i < DatasetBlockCount; ++i) { | ||||
| 			initBlock<softAes>(cache, dataset + i * DatasetBlockSize, i, keys); | ||||
| 		} | ||||
| 		_mm_free(cache); | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInit<false>(const void*, uint8_t*&); | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInit<true>(const void*, uint8_t*&); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInitLight(const void* seed, LightClientMemory*& lcm) { | ||||
| 		lcm = new LightClientMemory(); | ||||
| 		lcm->cache = (uint8_t*)_mm_malloc(CacheSize, sizeof(__m128i)); | ||||
| 		if (lcm->cache == nullptr) { | ||||
| 			throw std::bad_alloc(); | ||||
| 		} | ||||
| 		initializeCache(seed, SeedSize, lcm->cache); | ||||
| 		expandAesKeys<softAes>((__m128i*)seed, lcm->keys); | ||||
| 		lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i)); | ||||
| 		if (lcm->block == nullptr) { | ||||
| 			throw std::bad_alloc(); | ||||
| 		} | ||||
| 		lcm->blockNumber = -1; | ||||
| 	} | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInitLight<false>(const void*, LightClientMemory*&); | ||||
| 
 | ||||
| 	template | ||||
| 		void datasetInitLight<true>(const void*, LightClientMemory*&); | ||||
| } | ||||
							
								
								
									
										64
									
								
								src/dataset.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								src/dataset.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,64 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include "intrinPortable.h" | ||||
| #include "argon2.h" | ||||
| #include "common.hpp" | ||||
| #include "softAes.h" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value"); | ||||
| 
 | ||||
| 	void initializeCache(const void* input, size_t inputLength, void* memory); | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	void expandAesKeys(const __m128i* seed, __m128i* keys); | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	inline __m128i aesenc(__m128i in, __m128i key) { | ||||
| 		return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft> | ||||
| 	inline __m128i aesdec(__m128i in, __m128i key) { | ||||
| 		return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key); | ||||
| 	} | ||||
| 
 | ||||
| 	template<bool soft, bool enc> | ||||
| 	void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i keys[10]); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInit(const void* seed, uint8_t*& dataset); | ||||
| 
 | ||||
| 	convertible_t datasetRead(addr_t addr, MemoryRegisters& memory); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	void datasetInitLight(const void* seed, LightClientMemory*& lcm); | ||||
| 
 | ||||
| 	template<bool softAes> | ||||
| 	convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory); | ||||
| } | ||||
| 
 | ||||
							
								
								
									
										92
									
								
								src/instructionWeights.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								src/instructionWeights.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,92 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #define WT_ADD_64 16 | ||||
| #define WT_ADD_32 4 | ||||
| #define WT_SUB_64 16 | ||||
| #define WT_SUB_32 4 | ||||
| #define WT_MUL_64 15 | ||||
| #define WT_MULH_64 11 | ||||
| #define WT_MUL_32 11 | ||||
| #define WT_IMUL_32 11 | ||||
| #define WT_IMULH_64 11 | ||||
| #define WT_DIV_64 1  | ||||
| #define WT_IDIV_64 1 | ||||
| #define WT_AND_64 4 | ||||
| #define WT_AND_32 2 | ||||
| #define WT_OR_64 4 | ||||
| #define WT_OR_32 2 | ||||
| #define WT_XOR_64 4 | ||||
| #define WT_XOR_32 2 | ||||
| #define WT_SHL_64 3 | ||||
| #define WT_SHR_64 3 | ||||
| #define WT_SAR_64 3 | ||||
| #define WT_ROL_64 9 | ||||
| #define WT_ROR_64 9 | ||||
| #define WT_FPADD 20 | ||||
| #define WT_FPSUB 20 | ||||
| #define WT_FPMUL 22 | ||||
| #define WT_FPDIV 8 | ||||
| #define WT_FPSQRT 6 | ||||
| #define WT_FPROUND 2 | ||||
| #define WT_CALL 17 | ||||
| #define WT_RET 15 | ||||
| 
 | ||||
| constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \ | ||||
| WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \ | ||||
| WT_DIV_64 + WT_IDIV_64 + WT_AND_64 + WT_AND_32 + WT_OR_64 + \ | ||||
| WT_OR_32 + WT_XOR_64 + WT_XOR_32 + WT_SHL_64 + WT_SHR_64 + \ | ||||
| WT_SAR_64 + WT_ROL_64 + WT_ROR_64 + WT_FPADD + WT_FPSUB + WT_FPMUL \ | ||||
| + WT_FPDIV + WT_FPSQRT + WT_FPROUND + WT_CALL + WT_RET; | ||||
| 
 | ||||
| static_assert(wtSum == 256, | ||||
| 	"Sum of instruction weights must be 256"); | ||||
| 
 | ||||
| #define REP1(x) x, | ||||
| #define REP2(x) REP1(x) x, | ||||
| #define REP3(x) REP2(x) x, | ||||
| #define REP4(x) REP3(x) x, | ||||
| #define REP5(x) REP4(x) x, | ||||
| #define REP6(x) REP5(x) x, | ||||
| #define REP7(x) REP6(x) x, | ||||
| #define REP8(x) REP7(x) x, | ||||
| #define REP9(x) REP8(x) x, | ||||
| #define REP10(x) REP9(x) x, | ||||
| #define REP11(x) REP10(x) x, | ||||
| #define REP12(x) REP11(x) x, | ||||
| #define REP13(x) REP12(x) x, | ||||
| #define REP14(x) REP13(x) x, | ||||
| #define REP15(x) REP14(x) x, | ||||
| #define REP16(x) REP15(x) x, | ||||
| #define REP17(x) REP16(x) x, | ||||
| #define REP18(x) REP17(x) x, | ||||
| #define REP19(x) REP18(x) x, | ||||
| #define REP20(x) REP19(x) x, | ||||
| #define REP21(x) REP20(x) x, | ||||
| #define REP22(x) REP21(x) x, | ||||
| #define REP23(x) REP22(x) x, | ||||
| #define REP24(x) REP23(x) x, | ||||
| #define REP25(x) REP24(x) x, | ||||
| #define REP26(x) REP25(x) x, | ||||
| #define REPNX(x,N) REP##N(x) | ||||
| #define REPN(x,N) REPNX(x,N) | ||||
| #define NUM(x) x | ||||
| #define WT(x) NUM(WT_##x) | ||||
							
								
								
									
										84
									
								
								src/instructions.hpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								src/instructions.hpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,84 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include "common.hpp" | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	inline double convertToDouble(int64_t x) { | ||||
| 		return (double)(x &-2048L); | ||||
| 	} | ||||
| 
 | ||||
| 	inline double convertToDoubleNonZero(int64_t x) { | ||||
| 		return (double)((x & -2048L) | 2048); | ||||
| 	} | ||||
| 
 | ||||
| 	inline double convertToDoubleNonNegative(int64_t x) { | ||||
| 		return (double)(x & 9223372036854773760L); | ||||
| 	} | ||||
| 
 | ||||
| 	extern "C" { | ||||
| 		void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void AND_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void AND_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void OR_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void OR_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void FPINIT(); | ||||
| 		void FPADD(convertible_t& a, double b, convertible_t& c); | ||||
| 		void FPSUB(convertible_t& a, double b, convertible_t& c); | ||||
| 		void FPMUL(convertible_t& a, double b, convertible_t& c); | ||||
| 		void FPDIV(convertible_t& a, double b, convertible_t& c); | ||||
| 		void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 		void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c); | ||||
| 
 | ||||
| 		inline void FPADD_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			FPADD(a, b.f64, c); | ||||
| 		} | ||||
| 
 | ||||
| 		inline void FPSUB_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			FPSUB(a, b.f64, c); | ||||
| 		} | ||||
| 
 | ||||
| 		inline void FPMUL_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			FPMUL(a, b.f64, c); | ||||
| 		} | ||||
| 
 | ||||
| 		inline void FPDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			FPDIV(a, b.f64, c); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										310
									
								
								src/instructionsPortable.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										310
									
								
								src/instructionsPortable.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,310 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| //#define DEBUG
 | ||||
| //#define FTZ
 | ||||
| #include "instructions.hpp" | ||||
| #include "intrinPortable.h" | ||||
| #pragma STDC FENV_ACCESS on | ||||
| #include <cfenv> | ||||
| #include <cmath> | ||||
| #ifdef DEBUG | ||||
| #include <iostream> | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__SIZEOF_INT128__) | ||||
| 	typedef unsigned __int128 uint128_t; | ||||
| 	typedef __int128 int128_t; | ||||
| 	static inline uint64_t __umulhi64(uint64_t a, uint64_t b) { | ||||
| 		return ((uint128_t)a * b) >> 64; | ||||
| 	} | ||||
| 	static inline uint64_t __imulhi64(int64_t a, int64_t b) { | ||||
| 		return ((int128_t)a * b) >> 64; | ||||
| 	} | ||||
| 	#define umulhi64 __umulhi64 | ||||
| 	#define imulhi64 __imulhi64 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| 	#define HAS_VALUE(X) X ## 0 | ||||
| 	#define EVAL_DEFINE(X) HAS_VALUE(X) | ||||
| 	#include <intrin.h> | ||||
| 	#include <stdlib.h> | ||||
| 	#define ror64 _rotr64 | ||||
| 	#define rol64 _rotl64 | ||||
| 	#if EVAL_DEFINE(__MACHINEARM64_X64(1)) | ||||
| 		#define umulhi64 __umulh | ||||
| 	#endif | ||||
| 	#if EVAL_DEFINE(__MACHINEX64(1)) | ||||
| 		static inline uint64_t __imulhi64(int64_t a, int64_t b) { | ||||
| 			int64_t hi; | ||||
| 			_mul128(a, b, &hi); | ||||
| 			return hi; | ||||
| 		} | ||||
| 		#define imulhi64 __imulhi64 | ||||
| 	#endif | ||||
| 	static inline uint32_t _setRoundMode(uint32_t mode) { | ||||
| 		return _controlfp(mode, _MCW_RC); | ||||
| 	} | ||||
| 	#define setRoundMode _setRoundMode | ||||
| #endif | ||||
| 
 | ||||
| #ifndef setRoundMode | ||||
| 	#define setRoundMode fesetround | ||||
| #endif | ||||
| 
 | ||||
| #ifndef ror64 | ||||
| 	static inline uint64_t __ror64(uint64_t a, int b) { | ||||
| 		return (a >> b) | (a << (64 - b)); | ||||
| 	} | ||||
| 	#define ror64 __ror64 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef rol64 | ||||
| 	static inline uint64_t __rol64(uint64_t a, int b) { | ||||
| 		return (a << b) | (a >> (64 - b)); | ||||
| 	} | ||||
| 	#define rol64 __rol64 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef sar64 | ||||
| 	#include <type_traits> | ||||
| 	constexpr int64_t builtintShr64(int64_t value, int shift) noexcept { | ||||
| 		return value >> shift; | ||||
| 	} | ||||
| 
 | ||||
| 	struct UsesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> { | ||||
| 	}; | ||||
| 
 | ||||
| 	static inline int64_t __sar64(int64_t a, int b) { | ||||
| 		return UsesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b); | ||||
| 	} | ||||
| 	#define sar64 __sar64 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef umulhi64 | ||||
| 	#define LO(x) ((x)&0xffffffff) | ||||
| 	#define HI(x) ((x)>>32) | ||||
| 	static inline uint64_t __umulhi64(uint64_t a, uint64_t b) { | ||||
| 		uint64_t ah = HI(a), al = LO(a); | ||||
| 		uint64_t bh = HI(b), bl = LO(b); | ||||
| 		uint64_t x00 = al * bl; | ||||
| 		uint64_t x01 = al * bh; | ||||
| 		uint64_t x10 = ah * bl; | ||||
| 		uint64_t x11 = ah * bh; | ||||
| 		uint64_t m1 = LO(x10) + LO(x01) + HI(x00); | ||||
| 		uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1); | ||||
| 		uint64_t m3 = HI(x11) + HI(m2); | ||||
| 
 | ||||
| 		return (m3 << 32) + LO(m2); | ||||
| 	} | ||||
| 	#define umulhi64 __umulhi64 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef imulhi64 | ||||
| 	static inline int64_t __imulhi64(int64_t a, int64_t b) { | ||||
| 		int64_t hi = umulhi64(a, b); | ||||
| 		if (a < 0LL) hi -= b; | ||||
| 		if (b < 0LL) hi -= a; | ||||
| 		return hi; | ||||
| 	} | ||||
| 	#define imulhi64 __imulhi64 | ||||
| #endif | ||||
| 
 | ||||
| static double FlushDenormal(double x) { | ||||
| 	if (std::fpclassify(x) == FP_SUBNORMAL) { | ||||
| 		return 0; | ||||
| 	} | ||||
| 	return x; | ||||
| } | ||||
| 
 | ||||
| #ifdef FTZ | ||||
| #undef FTZ | ||||
| #define FTZ(x) FlushDenormal(x) | ||||
| #else | ||||
| #define FTZ(x) x | ||||
| #endif | ||||
| 
 | ||||
| namespace RandomX { | ||||
| 
 | ||||
| 	extern "C" { | ||||
| 
 | ||||
| 		void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 + b.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u32 + b.u32; | ||||
| 		} | ||||
| 
 | ||||
| 		void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 - b.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u32 - b.u32; | ||||
| 		} | ||||
| 
 | ||||
| 		void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 * b.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = umulhi64(a.u64, b.u64); | ||||
| 		} | ||||
| 
 | ||||
| 		void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = (uint64_t)a.u32 * b.u32; | ||||
| 		} | ||||
| 
 | ||||
| 		void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.i64 = (int64_t)a.i32 * b.i32; | ||||
| 		} | ||||
| 
 | ||||
| 		void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.i64 = imulhi64(a.i64, b.i64); | ||||
| 		} | ||||
| 
 | ||||
| 		void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U); | ||||
| 		} | ||||
| 
 | ||||
| 		void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			if (a.i64 == INT64_MIN && b.i32 == -1) | ||||
| 				c.i64 = INT64_MIN; | ||||
| 			else | ||||
| 				c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1); | ||||
| 		} | ||||
| 
 | ||||
| 		void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 & b.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u32 & b.u32; | ||||
| 		} | ||||
| 
 | ||||
| 		void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 | b.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u32 | b.u32; | ||||
| 		} | ||||
| 
 | ||||
| 		void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 ^ b.u64; | ||||
| 		} | ||||
| 
 | ||||
| 		void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u32 ^ b.u32; | ||||
| 		} | ||||
| 
 | ||||
| 		void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 << (b.u64 & 63); | ||||
| 		} | ||||
| 
 | ||||
| 		void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = a.u64 >> (b.u64 & 63); | ||||
| 		} | ||||
| 
 | ||||
| 		void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = sar64(a.i64, b.u64 & 63); | ||||
| 		} | ||||
| 
 | ||||
| 		void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = rol64(a.u64, (b.u64 & 63)); | ||||
| 		} | ||||
| 
 | ||||
| 		void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.u64 = ror64(a.u64, (b.u64 & 63)); | ||||
| 		} | ||||
| 
 | ||||
| 		void FPINIT() { | ||||
| 			setRoundMode(FE_TONEAREST); | ||||
| 		} | ||||
| 
 | ||||
| 		void FPADD(convertible_t& a, double b, convertible_t& c) { | ||||
| 			c.f64 = FTZ(convertToDouble(a.i64) + b); | ||||
| 		} | ||||
| 
 | ||||
| 		void FPSUB(convertible_t& a, double b, convertible_t& c) { | ||||
| 			c.f64 = FTZ(convertToDouble(a.i64) - b); | ||||
| 		} | ||||
| 
 | ||||
| 		void FPMUL(convertible_t& a, double b, convertible_t& c) { | ||||
| 			c.f64 = FTZ(convertToDoubleNonZero(a.i64) * b); | ||||
| 		} | ||||
| 
 | ||||
| 		void FPDIV(convertible_t& a, double b, convertible_t& c) { | ||||
| 			c.f64 = FTZ(convertToDoubleNonZero(a.i64) / b); | ||||
| 		} | ||||
| 
 | ||||
| 		void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| #ifdef __SSE2__ | ||||
| 			double d = convertToDoubleNonNegative(a.i64); | ||||
| 			c.f64 = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&d))); | ||||
| #else | ||||
| 			c.f64 = FTZ(sqrt(convertToDoubleNonNegative(a.i64))); | ||||
| #endif | ||||
| 
 | ||||
| 		} | ||||
| 
 | ||||
| 		void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c) { | ||||
| 			c.f64 = convertToDouble(a.i64); | ||||
| 			switch (a.u64 & 3) { | ||||
| 				case RoundDown: | ||||
| #ifdef DEBUG | ||||
| 					std::cout << "Round FE_DOWNWARD (" << FE_DOWNWARD << ") = " << | ||||
| #endif | ||||
| 					setRoundMode(FE_DOWNWARD); | ||||
| #ifdef DEBUG | ||||
| 					std::cout << std::endl; | ||||
| #endif | ||||
| 					break; | ||||
| 				case RoundUp: | ||||
| #ifdef DEBUG | ||||
| 					std::cout << "Round FE_UPWARD (" << FE_UPWARD << ") = " << | ||||
| #endif | ||||
| 					setRoundMode(FE_UPWARD); | ||||
| #ifdef DEBUG | ||||
| 					std::cout << std::endl; | ||||
| #endif | ||||
| 					break; | ||||
| 				case RoundToZero: | ||||
| #ifdef DEBUG | ||||
| 					std::cout << "Round FE_TOWARDZERO (" << FE_TOWARDZERO << ") = " << | ||||
| #endif | ||||
| 					setRoundMode(FE_TOWARDZERO); | ||||
| #ifdef DEBUG | ||||
| 					std::cout << std::endl; | ||||
| #endif | ||||
| 					break; | ||||
| 				default: | ||||
| #ifdef DEBUG | ||||
| 					std::cout << "Round FE_TONEAREST (" << FE_TONEAREST << ") = " << | ||||
| #endif | ||||
| 					setRoundMode(FE_TONEAREST); | ||||
| #ifdef DEBUG | ||||
| 					std::cout << std::endl; | ||||
| #endif | ||||
| 					break; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										302
									
								
								src/instructionsX64.asm
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										302
									
								
								src/instructionsX64.asm
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,302 @@ | |||
| ; Copyright (c) 2018 tevador | ||||
| ; | ||||
| ; This file is part of RandomX. | ||||
| ; | ||||
| ; RandomX is free software: you can redistribute it and/or modify | ||||
| ; it under the terms of the GNU General Public License as published by | ||||
| ; the Free Software Foundation, either version 3 of the License, or | ||||
| ; (at your option) any later version. | ||||
| ; | ||||
| ; RandomX is distributed in the hope that it will be useful, | ||||
| ; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| ; GNU General Public License for more details. | ||||
| ; | ||||
| ; You should have received a copy of the GNU General Public License | ||||
| ; along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| PUBLIC ADD_64 | ||||
| PUBLIC ADD_32 | ||||
| PUBLIC SUB_64 | ||||
| PUBLIC SUB_32 | ||||
| PUBLIC MUL_64 | ||||
| PUBLIC MULH_64 | ||||
| PUBLIC MUL_32 | ||||
| PUBLIC IMUL_32 | ||||
| PUBLIC IMULH_64 | ||||
| PUBLIC DIV_64 | ||||
| PUBLIC IDIV_64 | ||||
| PUBLIC AND_64 | ||||
| PUBLIC AND_32 | ||||
| PUBLIC OR_64 | ||||
| PUBLIC OR_32 | ||||
| PUBLIC XOR_64 | ||||
| PUBLIC XOR_32 | ||||
| PUBLIC SHL_64 | ||||
| PUBLIC SHR_64 | ||||
| PUBLIC SAR_64 | ||||
| PUBLIC ROL_64 | ||||
| PUBLIC ROR_64 | ||||
| PUBLIC FPINIT | ||||
| PUBLIC FPADD | ||||
| PUBLIC FPSUB | ||||
| PUBLIC FPMUL | ||||
| PUBLIC FPDIV | ||||
| PUBLIC FPSQRT | ||||
| PUBLIC FPROUND | ||||
| 
 | ||||
| .code | ||||
| 
 | ||||
| ADD_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	add	rax, QWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| ADD_64 ENDP | ||||
| 
 | ||||
| ADD_32 PROC | ||||
| 	mov	eax, DWORD PTR [rcx] | ||||
| 	add	eax, DWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| ADD_32 ENDP | ||||
| 
 | ||||
| SUB_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	sub	rax, QWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| SUB_64 ENDP | ||||
| 
 | ||||
| SUB_32 PROC | ||||
| 	mov	eax, DWORD PTR [rcx] | ||||
| 	sub	eax, DWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| SUB_32 ENDP | ||||
| 
 | ||||
| MUL_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	imul	rax, QWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| MUL_64 ENDP | ||||
| 
 | ||||
| MULH_64 PROC | ||||
| 	mov	rax, QWORD PTR [rdx] | ||||
| 	mul	QWORD PTR [rcx] | ||||
| 	mov	QWORD PTR [r8], rdx | ||||
| 	ret	0 | ||||
| MULH_64 ENDP | ||||
| 
 | ||||
| MUL_32 PROC | ||||
| 	mov	r9d, DWORD PTR [rcx] | ||||
| 	mov	eax, DWORD PTR [rdx] | ||||
| 	imul	r9, rax | ||||
| 	mov	QWORD PTR [r8], r9 | ||||
| 	ret	0 | ||||
| MUL_32 ENDP | ||||
| 
 | ||||
| IMUL_32 PROC | ||||
| 	movsxd	r9, DWORD PTR [rcx] | ||||
| 	movsxd	rax, DWORD PTR [rdx] | ||||
| 	imul	r9, rax | ||||
| 	mov	QWORD PTR [r8], r9 | ||||
| 	ret	0 | ||||
| IMUL_32 ENDP | ||||
| 
 | ||||
| IMULH_64 PROC | ||||
| 	mov	rax, QWORD PTR [rdx] | ||||
| 	imul	QWORD PTR [rcx] | ||||
| 	mov	QWORD PTR [r8], rdx | ||||
| 	ret	0 | ||||
| IMULH_64 ENDP | ||||
| 
 | ||||
| DIV_64 PROC | ||||
| 	mov	r9d, DWORD PTR [rdx] | ||||
| 	mov	eax, 1 | ||||
| 	test	r9d, r9d | ||||
| 	cmovne	eax, r9d | ||||
| 	xor	edx, edx | ||||
| 	mov	r9d, eax | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	div	r9 | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret 0 | ||||
| DIV_64 ENDP | ||||
| 
 | ||||
| IDIV_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	cmp	DWORD PTR [rdx], -1 | ||||
| 	jne	SHORT SAFE_IDIV_64 | ||||
| ;	mov	rcx, -9223372036854775808 | ||||
| ;	cmp	rax, rcx | ||||
| 	mov rcx, rax | ||||
| 	rol	rcx, 1 | ||||
| 	dec	rcx | ||||
| 	jnz	SHORT SAFE_IDIV_64 | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| SAFE_IDIV_64: | ||||
| 	mov	ecx, DWORD PTR [rdx] | ||||
| 	test	ecx, ecx | ||||
| 	mov	edx, 1 | ||||
| 	cmovne	edx, ecx | ||||
| 	movsxd	rcx, edx | ||||
| 	cqo | ||||
| 	idiv	rcx | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret 0 | ||||
| IDIV_64 ENDP | ||||
| 
 | ||||
| AND_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	and	rax, QWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| AND_64 ENDP | ||||
| 
 | ||||
| AND_32 PROC | ||||
| 	mov	eax, DWORD PTR [rcx] | ||||
| 	and	eax, DWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| AND_32 ENDP | ||||
| 
 | ||||
| OR_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	or	rax, QWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| OR_64 ENDP | ||||
| 
 | ||||
| OR_32 PROC | ||||
| 	mov	eax, DWORD PTR [rcx] | ||||
| 	or	eax, DWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| OR_32 ENDP | ||||
| 
 | ||||
| XOR_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	xor	rax, QWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| XOR_64 ENDP | ||||
| 
 | ||||
| XOR_32 PROC | ||||
| 	mov	eax, DWORD PTR [rcx] | ||||
| 	xor	eax, DWORD PTR [rdx] | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| XOR_32 ENDP | ||||
| 
 | ||||
| SHL_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov	rcx, QWORD PTR [rdx] | ||||
| 	shl	rax, cl | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| SHL_64 ENDP | ||||
| 
 | ||||
| SHR_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov	rcx, QWORD PTR [rdx] | ||||
| 	shr	rax, cl | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| SHR_64 ENDP | ||||
| 
 | ||||
| SAR_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov	rcx, QWORD PTR [rdx] | ||||
| 	sar	rax, cl | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| SAR_64 ENDP | ||||
| 
 | ||||
| ROL_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov	rcx, QWORD PTR [rdx] | ||||
| 	rol	rax, cl | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| ROL_64 ENDP | ||||
| 
 | ||||
| ROR_64 PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov	rcx, QWORD PTR [rdx] | ||||
| 	ror	rax, cl | ||||
| 	mov	QWORD PTR [r8], rax | ||||
| 	ret	0 | ||||
| ROR_64 ENDP | ||||
| 
 | ||||
| FPINIT PROC | ||||
| 	mov	DWORD PTR [rsp+8], 40896 | ||||
| 	ldmxcsr	DWORD PTR [rsp+8] | ||||
| 	ret	0 | ||||
| FPINIT ENDP | ||||
| 
 | ||||
| FPADD PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	and rax, -2048 | ||||
| 	cvtsi2sd xmm0, rax | ||||
| 	addsd	xmm0, xmm1 | ||||
| 	movsd	QWORD PTR [r8], xmm0 | ||||
| 	ret	0 | ||||
| FPADD ENDP | ||||
| 
 | ||||
| FPSUB PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	and rax, -2048 | ||||
| 	cvtsi2sd xmm0, rax | ||||
| 	subsd	xmm0, xmm1 | ||||
| 	movsd	QWORD PTR [r8], xmm0 | ||||
| 	ret	0 | ||||
| FPSUB ENDP | ||||
| 
 | ||||
| FPMUL PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	and rax, -2048 | ||||
| 	or	rax, 2048 | ||||
| 	cvtsi2sd xmm0, rax | ||||
| 	mulsd	xmm0, xmm1 | ||||
| 	movsd	QWORD PTR [r8], xmm0 | ||||
| 	ret	0 | ||||
| FPMUL ENDP | ||||
| 
 | ||||
| FPDIV PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	and rax, -2048 | ||||
| 	or	rax, 2048 | ||||
| 	cvtsi2sd xmm0, rax | ||||
| 	divsd	xmm0, xmm1 | ||||
| 	movsd	QWORD PTR [r8], xmm0 | ||||
| 	ret	0 | ||||
| FPDIV ENDP | ||||
| 
 | ||||
| FPSQRT PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov rcx, 9223372036854773760 | ||||
| 	and rax, rcx | ||||
| 	cvtsi2sd xmm0, rax | ||||
| 	sqrtsd	xmm1, xmm0 | ||||
| 	movsd	QWORD PTR [r8], xmm1 | ||||
| 	ret	0 | ||||
| FPSQRT ENDP | ||||
| 
 | ||||
| FPROUND PROC | ||||
| 	mov	rax, QWORD PTR [rcx] | ||||
| 	mov rcx, rax | ||||
| 	shl	rax, 13 | ||||
| 	and rcx, -2048 | ||||
| 	and	eax, 24576 | ||||
| 	cvtsi2sd xmm0, rcx	 | ||||
| 	movsd	QWORD PTR [r8], xmm0 | ||||
| 	or	eax, 40896 | ||||
| 	mov	DWORD PTR [rsp+8], eax | ||||
| 	ldmxcsr	DWORD PTR [rsp+8] | ||||
| 	ret	0 | ||||
| FPROUND ENDP | ||||
| 
 | ||||
| END | ||||
							
								
								
									
										134
									
								
								src/intrinPortable.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								src/intrinPortable.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,134 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| #if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2) | ||||
| #define __SSE2__ 1 | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| #ifdef __SSE2__ | ||||
| #ifdef __GNUC__ | ||||
| #include <x86intrin.h> | ||||
| #else | ||||
| #include <intrin.h> | ||||
| #endif | ||||
| #else | ||||
| #include <cstdint> | ||||
| #include <stdexcept> | ||||
| 
 | ||||
| #define _mm_malloc(a,b) malloc(a) | ||||
| #define _mm_free(a) free(a) | ||||
| 
 | ||||
| typedef union { | ||||
| 	uint64_t u64[2]; | ||||
| 	uint32_t u32[4]; | ||||
| 	uint16_t u16[8]; | ||||
| 	uint8_t u8[16]; | ||||
| } __m128i; | ||||
| 
 | ||||
| static const char* platformError = "Platform doesn't support hardware AES"; | ||||
| 
 | ||||
| inline __m128i _mm_aeskeygenassist_si128(__m128i key, uint8_t rcon) { | ||||
| 	throw std::runtime_error(platformError); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_aesenc_si128(__m128i v, __m128i rkey) { | ||||
| 	throw std::runtime_error(platformError); | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_aesdec_si128(__m128i v, __m128i rkey) { | ||||
| 	throw std::runtime_error(platformError); | ||||
| } | ||||
| 
 | ||||
| inline int _mm_cvtsi128_si32(__m128i v) { | ||||
| 	return v.u32[0]; | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_cvtsi32_si128(int si32) { | ||||
| 	__m128i v; | ||||
| 	v.u32[0] = si32; | ||||
| 	v.u32[1] = 0; | ||||
| 	v.u32[2] = 0; | ||||
| 	v.u32[3] = 0; | ||||
| 	return v; | ||||
| } | ||||
| 
 | ||||
| inline  __m128i _mm_set_epi64x(int64_t _I1, int64_t _I0) { | ||||
| 	__m128i v; | ||||
| 	v.u64[0] = _I0; | ||||
| 	v.u64[1] = _I1; | ||||
| 	return v; | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_set_epi32(int _I3, int _I2, int _I1, int _I0) { | ||||
| 	__m128i v; | ||||
| 	v.u32[0] = _I0; | ||||
| 	v.u32[1] = _I1; | ||||
| 	v.u32[2] = _I2; | ||||
| 	v.u32[3] = _I3; | ||||
| 	return v; | ||||
| }; | ||||
| 
 | ||||
| inline __m128i _mm_xor_si128(__m128i _A, __m128i _B) { | ||||
| 	__m128i c; | ||||
| 	c.u32[0] = _A.u32[0] ^ _B.u32[0]; | ||||
| 	c.u32[1] = _A.u32[1] ^ _B.u32[1]; | ||||
| 	c.u32[2] = _A.u32[2] ^ _B.u32[2]; | ||||
| 	c.u32[3] = _A.u32[3] ^ _B.u32[3]; | ||||
| 	return c; | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_shuffle_epi32(__m128i _A, int _Imm) { | ||||
| 	__m128i c; | ||||
| 	c.u32[0] = _A.u32[_Imm & 3]; | ||||
| 	c.u32[1] = _A.u32[(_Imm >> 2) & 3]; | ||||
| 	c.u32[2] = _A.u32[(_Imm >> 4) & 3]; | ||||
| 	c.u32[3] = _A.u32[(_Imm >> 6) & 3]; | ||||
| 	return c; | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_load_si128(__m128i const*_P) { | ||||
| 	return *_P; | ||||
| } | ||||
| 
 | ||||
| inline void _mm_store_si128(__m128i *_P, __m128i _B) { | ||||
| 	*_P = _B; | ||||
| } | ||||
| 
 | ||||
| inline __m128i _mm_slli_si128(__m128i _A, int _Imm) { | ||||
| 	_Imm &= 255; | ||||
| 	if (_Imm > 15) { | ||||
| 		_A.u64[0] = 0; | ||||
| 		_A.u64[1] = 0; | ||||
| 	} | ||||
| 	else { | ||||
| 		for (int i = 15; i >= _Imm; --i) { | ||||
| 			_A.u8[i] = _A.u8[i - _Imm]; | ||||
| 		} | ||||
| 		for (int i = 0; i < _Imm; ++i) { | ||||
| 			_A.u8[i] = 0; | ||||
| 		} | ||||
| 	} | ||||
| 	return _A; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										136
									
								
								src/main.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								src/main.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,136 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| //#define TRACE
 | ||||
| #include "InterpretedVirtualMachine.hpp" | ||||
| #include "Stopwatch.hpp" | ||||
| #include "blake2/blake2.h" | ||||
| #include <fstream> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <exception> | ||||
| #include <cstring> | ||||
| #include "Program.hpp" | ||||
| #include <string> | ||||
| 
 | ||||
| const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; | ||||
| 
 | ||||
| void dump(const char* buffer, uint64_t count, const char* name) { | ||||
| 	std::ofstream fout(name, std::ios::out | std::ios::binary); | ||||
| 	fout.write(buffer, count); | ||||
| 	fout.close(); | ||||
| } | ||||
| 
 | ||||
| constexpr char hexmap[] = "0123456789abcdef"; | ||||
| void outputHex(std::ostream& os, const char* data, int length) { | ||||
| 	for (int i = 0; i < length; ++i) { | ||||
| 		os << hexmap[(data[i] & 0xF0) >> 4]; | ||||
| 		os << hexmap[data[i] & 0x0F]; | ||||
| 	} | ||||
| 	os << std::endl; | ||||
| } | ||||
| 
 | ||||
| void readOption(const char* option, int argc, char** argv, bool& out) { | ||||
| 	for (int i = 0; i < argc; ++i) { | ||||
| 		if (strcmp(argv[i], option) == 0) { | ||||
| 			out = true; | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| 	out = false; | ||||
| } | ||||
| 
 | ||||
| void readInt(int argc, char** argv, int& out, int defaultValue) { | ||||
| 	for (int i = 0; i < argc; ++i) { | ||||
| 		if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) { | ||||
| 			return; | ||||
| 		} | ||||
| 	} | ||||
| 	out = defaultValue; | ||||
| } | ||||
| 
 | ||||
| std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { | ||||
| 	for (int i = 0; i < RandomX::RegistersCount; ++i) | ||||
| 		os << std::hex << "r" << i << " = " << rf.r[i].u64 << std::endl << std::dec; | ||||
| 	for (int i = 0; i < RandomX::RegistersCount; ++i) | ||||
| 		os << std::hex << "f" << i << " = " << rf.f[i].u64 << " (" << rf.f[i].f64 << ")" << std::endl << std::dec; | ||||
| 	return os; | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char** argv) { | ||||
| 	bool softAes, lightClient; | ||||
| 	int programCount; | ||||
| 	readOption("--softAes", argc, argv, softAes); | ||||
| 	readOption("--lightClient", argc, argv, lightClient); | ||||
| 	readInt(argc, argv, programCount, 1000); | ||||
| 	if (softAes) | ||||
| 		std::cout << "Using software AES." << std::endl; | ||||
| 
 | ||||
| 	uint8_t hash[32]; | ||||
| 	char cumulative[32] = { 0 }; | ||||
| 	unsigned char blockTemplate[] = { | ||||
| 		0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, | ||||
| 		0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, | ||||
| 		0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, | ||||
| 		0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 | ||||
| 	}; | ||||
| 	int* nonce = (int*)(blockTemplate + 39); | ||||
| 	RandomX::InterpretedVirtualMachine vm(softAes); | ||||
| 	try { | ||||
| 		std::cout << "Initializing..." << std::endl; | ||||
| 		Stopwatch sw(true); | ||||
| 		vm.initializeDataset(seed, lightClient); | ||||
| 		if(lightClient) | ||||
| 			std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl; | ||||
| 		else | ||||
| 			std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl; | ||||
| 		std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl; | ||||
| 		sw.restart(); | ||||
| 		for (int i = 0; i < programCount; ++i) { | ||||
| 			*nonce = i; | ||||
| 			if (RandomX::trace) std::cout << "Nonce: " << i << " "; | ||||
| 			blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); | ||||
| 			int spIndex = hash[24] | ((hash[25] & 63) << 8); | ||||
| 			vm.initializeScratchpad(spIndex); | ||||
| 			//dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, "scratchpad-before.txt");
 | ||||
| 			//return 0;
 | ||||
| 			vm.initializeProgram(hash); | ||||
| 			vm.execute(); | ||||
| 			/*std::string fileName("scratchpad-after-");
 | ||||
| 			fileName = fileName + std::to_string(i) + ".txt"; | ||||
| 			dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, fileName.c_str());*/ | ||||
| 			blake2b((void*)hash, sizeof(hash), &vm.getRegisterFile(), sizeof(RandomX::RegisterFile), nullptr, 0); | ||||
| 			//std::cout << vm.getRegisterFile();
 | ||||
| 			if (RandomX::trace) { | ||||
| 				outputHex(std::cout, (char*)hash, sizeof(hash)); | ||||
| 			} | ||||
| 			((uint64_t*)cumulative)[0] ^= ((uint64_t*)hash)[0]; | ||||
| 			((uint64_t*)cumulative)[1] ^= ((uint64_t*)hash)[1]; | ||||
| 			((uint64_t*)cumulative)[2] ^= ((uint64_t*)hash)[2]; | ||||
| 			((uint64_t*)cumulative)[3] ^= ((uint64_t*)hash)[3]; | ||||
| 		} | ||||
| 		std::cout << "Cumulative output hash: "; | ||||
| 		outputHex(std::cout, cumulative, sizeof(cumulative)); | ||||
| 		std::cout << "Performance: " << programCount / sw.getElapsed() << " programs per second" << std::endl; | ||||
| 	} | ||||
| 	catch (std::exception& e) { | ||||
| 		std::cout << "ERROR: " << e.what() << std::endl; | ||||
| 		return 1; | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
							
								
								
									
										377
									
								
								src/softAes.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										377
									
								
								src/softAes.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,377 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| // Parts of this file are originally copyright (c) xmr-stak
 | ||||
| // Parts of this file are originally copyright (c) 2014-2017, The Monero Project
 | ||||
| 
 | ||||
| #include "softAes.h" | ||||
| 
 | ||||
| alignas(16) const uint8_t sbox[256] = { | ||||
| 	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, | ||||
| 	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, | ||||
| 	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, | ||||
| 	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, | ||||
| 	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, | ||||
| 	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, | ||||
| 	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, | ||||
| 	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, | ||||
| 	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, | ||||
| 	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, | ||||
| 	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, | ||||
| 	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, | ||||
| 	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, | ||||
| 	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, | ||||
| 	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, | ||||
| 	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutEnc0[256] = { | ||||
| 	0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591, | ||||
| 	0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, | ||||
| 	0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, | ||||
| 	0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b, | ||||
| 	0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, | ||||
| 	0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, | ||||
| 	0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f, | ||||
| 	0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, | ||||
| 	0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, | ||||
| 	0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413, | ||||
| 	0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, | ||||
| 	0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, | ||||
| 	0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511, | ||||
| 	0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, | ||||
| 	0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, | ||||
| 	0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf, | ||||
| 	0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, | ||||
| 	0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, | ||||
| 	0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b, | ||||
| 	0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, | ||||
| 	0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, | ||||
| 	0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2, | ||||
| 	0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, | ||||
| 	0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, | ||||
| 	0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697, | ||||
| 	0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, | ||||
| 	0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, | ||||
| 	0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27, | ||||
| 	0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, | ||||
| 	0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, | ||||
| 	0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0, | ||||
| 	0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutEnc1[256] = { | ||||
| 	0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154, | ||||
| 	0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, | ||||
| 	0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, | ||||
| 	0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b, | ||||
| 	0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, | ||||
| 	0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, | ||||
| 	0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5, | ||||
| 	0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, | ||||
| 	0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, | ||||
| 	0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397, | ||||
| 	0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, | ||||
| 	0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, | ||||
| 	0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194, | ||||
| 	0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, | ||||
| 	0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, | ||||
| 	0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d, | ||||
| 	0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, | ||||
| 	0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, | ||||
| 	0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83, | ||||
| 	0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, | ||||
| 	0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, | ||||
| 	0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b, | ||||
| 	0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, | ||||
| 	0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, | ||||
| 	0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751, | ||||
| 	0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, | ||||
| 	0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, | ||||
| 	0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9, | ||||
| 	0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, | ||||
| 	0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, | ||||
| 	0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8, | ||||
| 	0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutEnc2[256] = { | ||||
| 	0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5, | ||||
| 	0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, | ||||
| 	0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, | ||||
| 	0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0, | ||||
| 	0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, | ||||
| 	0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, | ||||
| 	0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a, | ||||
| 	0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, | ||||
| 	0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, | ||||
| 	0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784, | ||||
| 	0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, | ||||
| 	0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, | ||||
| 	0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485, | ||||
| 	0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, | ||||
| 	0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, | ||||
| 	0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2, | ||||
| 	0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, | ||||
| 	0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, | ||||
| 	0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388, | ||||
| 	0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, | ||||
| 	0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, | ||||
| 	0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79, | ||||
| 	0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, | ||||
| 	0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, | ||||
| 	0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6, | ||||
| 	0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, | ||||
| 	0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, | ||||
| 	0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e, | ||||
| 	0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, | ||||
| 	0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, | ||||
| 	0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868, | ||||
| 	0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutEnc3[256] = { | ||||
| 	0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5, | ||||
| 	0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, | ||||
| 	0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, | ||||
| 	0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0, | ||||
| 	0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, | ||||
| 	0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, | ||||
| 	0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a, | ||||
| 	0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, | ||||
| 	0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, | ||||
| 	0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484, | ||||
| 	0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, | ||||
| 	0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, | ||||
| 	0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585, | ||||
| 	0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, | ||||
| 	0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, | ||||
| 	0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2, | ||||
| 	0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, | ||||
| 	0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, | ||||
| 	0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888, | ||||
| 	0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, | ||||
| 	0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, | ||||
| 	0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979, | ||||
| 	0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, | ||||
| 	0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, | ||||
| 	0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6, | ||||
| 	0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, | ||||
| 	0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, | ||||
| 	0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e, | ||||
| 	0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, | ||||
| 	0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, | ||||
| 	0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868, | ||||
| 	0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutDec0[256] = { | ||||
| 	0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b, | ||||
| 	0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, | ||||
| 	0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, | ||||
| 	0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e, | ||||
| 	0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, | ||||
| 	0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, | ||||
| 	0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566, | ||||
| 	0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, | ||||
| 	0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, | ||||
| 	0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd, | ||||
| 	0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, | ||||
| 	0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, | ||||
| 	0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c, | ||||
| 	0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, | ||||
| 	0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, | ||||
| 	0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14, | ||||
| 	0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, | ||||
| 	0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, | ||||
| 	0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177, | ||||
| 	0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, | ||||
| 	0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, | ||||
| 	0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382, | ||||
| 	0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, | ||||
| 	0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, | ||||
| 	0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235, | ||||
| 	0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, | ||||
| 	0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, | ||||
| 	0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d, | ||||
| 	0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, | ||||
| 	0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, | ||||
| 	0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff, | ||||
| 	0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutDec1[256] = { | ||||
| 	0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93, | ||||
| 	0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, | ||||
| 	0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, | ||||
| 	0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44, | ||||
| 	0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, | ||||
| 	0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, | ||||
| 	0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a, | ||||
| 	0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, | ||||
| 	0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, | ||||
| 	0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51, | ||||
| 	0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, | ||||
| 	0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, | ||||
| 	0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e, | ||||
| 	0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, | ||||
| 	0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, | ||||
| 	0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8, | ||||
| 	0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, | ||||
| 	0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, | ||||
| 	0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0, | ||||
| 	0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, | ||||
| 	0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, | ||||
| 	0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5, | ||||
| 	0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, | ||||
| 	0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, | ||||
| 	0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0, | ||||
| 	0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, | ||||
| 	0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, | ||||
| 	0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13, | ||||
| 	0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, | ||||
| 	0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, | ||||
| 	0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41, | ||||
| 	0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutDec2[256] = { | ||||
| 	0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303, | ||||
| 	0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, | ||||
| 	0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, | ||||
| 	0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8, | ||||
| 	0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, | ||||
| 	0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, | ||||
| 	0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab, | ||||
| 	0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, | ||||
| 	0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, | ||||
| 	0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110, | ||||
| 	0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, | ||||
| 	0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, | ||||
| 	0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72, | ||||
| 	0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, | ||||
| 	0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, | ||||
| 	0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9, | ||||
| 	0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, | ||||
| 	0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, | ||||
| 	0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3, | ||||
| 	0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, | ||||
| 	0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, | ||||
| 	0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af, | ||||
| 	0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, | ||||
| 	0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, | ||||
| 	0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066, | ||||
| 	0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, | ||||
| 	0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, | ||||
| 	0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347, | ||||
| 	0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, | ||||
| 	0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, | ||||
| 	0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195, | ||||
| 	0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257, | ||||
| }; | ||||
| 
 | ||||
| alignas(16) const uint32_t lutDec3[256] = { | ||||
| 	0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3, | ||||
| 	0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, | ||||
| 	0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, | ||||
| 	0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9, | ||||
| 	0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, | ||||
| 	0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, | ||||
| 	0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55, | ||||
| 	0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, | ||||
| 	0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, | ||||
| 	0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e, | ||||
| 	0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, | ||||
| 	0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, | ||||
| 	0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a, | ||||
| 	0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, | ||||
| 	0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, | ||||
| 	0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e, | ||||
| 	0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, | ||||
| 	0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, | ||||
| 	0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1, | ||||
| 	0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, | ||||
| 	0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, | ||||
| 	0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3, | ||||
| 	0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, | ||||
| 	0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, | ||||
| 	0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2, | ||||
| 	0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, | ||||
| 	0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, | ||||
| 	0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6, | ||||
| 	0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, | ||||
| 	0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, | ||||
| 	0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d, | ||||
| 	0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8, | ||||
| }; | ||||
| 
 | ||||
| static inline uint32_t subw(uint32_t w) { | ||||
| 	return (sbox[w >> 24] << 24) | | ||||
| 		(sbox[(w >> 16) & 0xff] << 16) | | ||||
| 		(sbox[(w >> 8) & 0xff] << 8) | | ||||
| 		sbox[w & 0xff]; | ||||
| } | ||||
| 
 | ||||
| #if defined(__clang__) || defined(__arm__) | ||||
| static inline uint32_t _rotr(uint32_t value, uint32_t amount) { | ||||
| 	return (value >> amount) | (value << (-amount & 31)); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon) { | ||||
| 	uint32_t X1 = subw(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55))); | ||||
| 	uint32_t X3 = subw(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF))); | ||||
| 	return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1); | ||||
| } | ||||
| 
 | ||||
| __m128i soft_aesenc(__m128i in, __m128i key) { | ||||
| 	uint32_t s0, s1, s2, s3; | ||||
| 
 | ||||
| 	s0 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF)); | ||||
| 	s1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA)); | ||||
| 	s2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55)); | ||||
| 	s3 = _mm_cvtsi128_si32(in); | ||||
| 
 | ||||
| 	__m128i out = _mm_set_epi32( | ||||
| 		(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]), | ||||
| 		(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]), | ||||
| 		(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]), | ||||
| 		(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24]) | ||||
| 	); | ||||
| 
 | ||||
| 	return _mm_xor_si128(out, key); | ||||
| } | ||||
| 
 | ||||
| __m128i soft_aesdec(__m128i in, __m128i key) { | ||||
| 	uint32_t s0, s1, s2, s3; | ||||
| 
 | ||||
| 	s0 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF)); | ||||
| 	s1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA)); | ||||
| 	s2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55)); | ||||
| 	s3 = _mm_cvtsi128_si32(in); | ||||
| 
 | ||||
| 	__m128i out = _mm_set_epi32( | ||||
| 		(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]), | ||||
| 		(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]), | ||||
| 		(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]), | ||||
| 		(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24]) | ||||
| 	); | ||||
| 
 | ||||
| 	return _mm_xor_si128(out, key); | ||||
| } | ||||
							
								
								
									
										28
									
								
								src/softAes.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/softAes.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,28 @@ | |||
| /*
 | ||||
| Copyright (c) 2018 tevador | ||||
| 
 | ||||
| This file is part of RandomX. | ||||
| 
 | ||||
| RandomX is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| RandomX is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
 | ||||
| */ | ||||
| 
 | ||||
| #pragma once | ||||
| #include <stdint.h> | ||||
| #include "intrinPortable.h" | ||||
| 
 | ||||
| __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon); | ||||
| 
 | ||||
| __m128i soft_aesenc(__m128i in, __m128i key); | ||||
| 
 | ||||
| __m128i soft_aesdec(__m128i in, __m128i key); | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue