mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Added ISWAP instruction
Scratchpad -> 2 MiB New scratchpad initialization New dataset initialization
This commit is contained in:
parent
20eb549725
commit
1ee94bef2a
23 changed files with 528 additions and 290 deletions
5
makefile
5
makefile
|
@ -13,7 +13,7 @@ LDFLAGS=-lpthread
|
||||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o hashAes1Rx4.o)
|
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o hashAes1Rx4.o)
|
||||||
ifeq ($(PLATFORM),x86_64)
|
ifeq ($(PLATFORM),x86_64)
|
||||||
ROBJS += $(OBJDIR)/JitCompilerX86-static.o
|
ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all: release test
|
all: release test
|
||||||
|
@ -77,6 +77,9 @@ $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompile
|
||||||
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read.inc)) | $(OBJDIR)
|
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read.inc)) | $(OBJDIR)
|
||||||
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
|
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
|
||||||
|
|
||||||
|
$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc)) | $(OBJDIR)
|
||||||
|
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@
|
||||||
|
|
||||||
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR)
|
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
||||||
|
|
||||||
|
|
|
@ -72,16 +72,16 @@ namespace RandomX {
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
||||||
asmCode << "\tmov " << reg << ", " << regR32[instr.src] << std::endl;
|
asmCode << "\tmov " << reg << ", " << regR32[instr.src] << std::endl;
|
||||||
asmCode << "\tand " << reg << ", " << ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
asmCode << "\tand " << reg << ", " << ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
||||||
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl;
|
asmCode << "\tmov eax" << ", " << regR32[instr.dst] << std::endl;
|
||||||
asmCode << "\tand eax" << ", " << ((instr.alt % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
|
asmCode << "\tand eax" << ", " << ((instr.mod % 4) ? (ScratchpadL1Mask & (-maskAlign)) : (ScratchpadL2Mask & (-maskAlign))) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
|
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
|
||||||
return instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
return instr.imm32 & ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
//1 uOP
|
//1 uOP
|
||||||
|
@ -348,6 +348,13 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//2 uOPs
|
||||||
|
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//1 uOPs
|
//1 uOPs
|
||||||
void AssemblyGeneratorX86::h_FPSWAP_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPSWAP_R(Instruction& instr, int i) {
|
||||||
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
||||||
|
@ -431,7 +438,7 @@ namespace RandomX {
|
||||||
//6 uOPs
|
//6 uOPs
|
||||||
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
|
||||||
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
|
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
|
||||||
int rotate = (13 - (instr.alt & 63)) & 63;
|
int rotate = (13 - (instr.imm32 & 63)) & 63;
|
||||||
if (rotate != 0)
|
if (rotate != 0)
|
||||||
asmCode << "\trol rax, " << rotate << std::endl;
|
asmCode << "\trol rax, " << rotate << std::endl;
|
||||||
asmCode << "\tand eax, 24576" << std::endl;
|
asmCode << "\tand eax, 24576" << std::endl;
|
||||||
|
@ -441,7 +448,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const char* condition(Instruction& instr, bool invert = false) {
|
static inline const char* condition(Instruction& instr, bool invert = false) {
|
||||||
switch (((instr.alt >> 2) & 7) ^ invert)
|
switch (((instr.mod >> 2) & 7) ^ invert)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
return "be";
|
return "be";
|
||||||
|
@ -519,6 +526,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(IXOR_M)
|
INST_HANDLE(IXOR_M)
|
||||||
INST_HANDLE(IROR_R)
|
INST_HANDLE(IROR_R)
|
||||||
INST_HANDLE(IROL_R)
|
INST_HANDLE(IROL_R)
|
||||||
|
INST_HANDLE(ISWAP_R)
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
INST_HANDLE(FPSWAP_R)
|
INST_HANDLE(FPSWAP_R)
|
||||||
|
|
|
@ -63,6 +63,7 @@ namespace RandomX {
|
||||||
void h_IXOR_M(Instruction&, int);
|
void h_IXOR_M(Instruction&, int);
|
||||||
void h_IROR_R(Instruction&, int);
|
void h_IROR_R(Instruction&, int);
|
||||||
void h_IROL_R(Instruction&, int);
|
void h_IROL_R(Instruction&, int);
|
||||||
|
void h_ISWAP_R(Instruction&, int);
|
||||||
void h_FPSWAP_R(Instruction&, int);
|
void h_FPSWAP_R(Instruction&, int);
|
||||||
void h_FPADD_R(Instruction&, int);
|
void h_FPADD_R(Instruction&, int);
|
||||||
void h_FPADD_M(Instruction&, int);
|
void h_FPADD_M(Instruction&, int);
|
||||||
|
|
|
@ -57,7 +57,7 @@ namespace RandomX {
|
||||||
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
||||||
*(((uint32_t*)®) + i) = gen();
|
*(((uint32_t*)®) + i) = gen();
|
||||||
}
|
}
|
||||||
FPINIT();
|
initFpu();
|
||||||
/*for (int i = 0; i < RegistersCount / 2; ++i) {
|
/*for (int i = 0; i < RegistersCount / 2; ++i) {
|
||||||
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
||||||
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
||||||
|
|
|
@ -29,15 +29,15 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::genAddressReg(std::ostream& os) const {
|
void Instruction::genAddressReg(std::ostream& os) const {
|
||||||
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
|
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)src << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::genAddressRegDst(std::ostream& os) const {
|
void Instruction::genAddressRegDst(std::ostream& os) const {
|
||||||
os << ((alt % 4) ? "L1" : "L2") << "[r" << (int)dst << "]";
|
os << ((mod % 4) ? "L1" : "L2") << "[r" << (int)dst << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::genAddressImm(std::ostream& os) const {
|
void Instruction::genAddressImm(std::ostream& os) const {
|
||||||
os << ((alt % 4) ? "L1" : "L2") << "[" << (imm32 & ((alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]";
|
os << ((mod % 4) ? "L1" : "L2") << "[" << (imm32 & ((mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask)) << "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_IADD_R(std::ostream& os) const {
|
void Instruction::h_IADD_R(std::ostream& os) const {
|
||||||
|
@ -211,6 +211,10 @@ namespace RandomX {
|
||||||
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
os << "r" << (int)dst << ", " << imm32 << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Instruction::h_ISWAP_R(std::ostream& os) const {
|
||||||
|
os << "r" << (int)dst << ", r" << (int)src << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
void Instruction::h_FPSWAP_R(std::ostream& os) const {
|
void Instruction::h_FPSWAP_R(std::ostream& os) const {
|
||||||
const char reg = (dst >= 4) ? 'e' : 'f';
|
const char reg = (dst >= 4) ? 'e' : 'f';
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
|
@ -280,7 +284,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_CFROUND(std::ostream& os) const {
|
void Instruction::h_CFROUND(std::ostream& os) const {
|
||||||
os << "r" << (int)src << ", " << (alt & 63) << std::endl;
|
os << "r" << (int)src << ", " << (imm32 & 63) << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline const char* condition(int index) {
|
static inline const char* condition(int index) {
|
||||||
|
@ -306,11 +310,11 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_COND_R(std::ostream& os) const {
|
void Instruction::h_COND_R(std::ostream& os) const {
|
||||||
os << "r" << (int)dst << ", " << condition((alt >> 2) & 7) << "(r" << (int)src << ", " << imm32 << ")" << std::endl;
|
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(r" << (int)src << ", " << imm32 << ")" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_COND_M(std::ostream& os) const {
|
void Instruction::h_COND_M(std::ostream& os) const {
|
||||||
os << "r" << (int)dst << ", " << condition((alt >> 2) & 7) << "(";
|
os << "r" << (int)dst << ", " << condition((mod >> 2) & 7) << "(";
|
||||||
genAddressReg(os);
|
genAddressReg(os);
|
||||||
os << ", " << imm32 << ")" << std::endl;
|
os << ", " << imm32 << ")" << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -356,6 +360,7 @@ namespace RandomX {
|
||||||
INST_NAME(IXOR_M)
|
INST_NAME(IXOR_M)
|
||||||
INST_NAME(IROR_R)
|
INST_NAME(IROR_R)
|
||||||
INST_NAME(IROL_R)
|
INST_NAME(IROL_R)
|
||||||
|
INST_NAME(ISWAP_R)
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
INST_NAME(FPSWAP_R)
|
INST_NAME(FPSWAP_R)
|
||||||
|
@ -406,6 +411,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(IXOR_M)
|
INST_HANDLE(IXOR_M)
|
||||||
INST_HANDLE(IROR_R)
|
INST_HANDLE(IROR_R)
|
||||||
INST_HANDLE(IROL_R)
|
INST_HANDLE(IROL_R)
|
||||||
|
INST_HANDLE(ISWAP_R)
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
INST_HANDLE(FPSWAP_R)
|
INST_HANDLE(FPSWAP_R)
|
||||||
|
|
|
@ -28,12 +28,52 @@ namespace RandomX {
|
||||||
|
|
||||||
typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const;
|
typedef void(Instruction::*InstructionVisualizer)(std::ostream&) const;
|
||||||
|
|
||||||
|
namespace InstructionType {
|
||||||
|
constexpr int IADD_R = 0;
|
||||||
|
constexpr int IADD_M = 1;
|
||||||
|
constexpr int IADD_RC = 2;
|
||||||
|
constexpr int ISUB_R = 3;
|
||||||
|
constexpr int ISUB_M = 4;
|
||||||
|
constexpr int IMUL_9C = 5;
|
||||||
|
constexpr int IMUL_R = 6;
|
||||||
|
constexpr int IMUL_M = 7;
|
||||||
|
constexpr int IMULH_R = 8;
|
||||||
|
constexpr int IMULH_M = 9;
|
||||||
|
constexpr int ISMULH_R = 10;
|
||||||
|
constexpr int ISMULH_M = 11;
|
||||||
|
constexpr int IDIV_C = 12;
|
||||||
|
constexpr int ISDIV_C = 13;
|
||||||
|
constexpr int INEG_R = 14;
|
||||||
|
constexpr int IXOR_R = 15;
|
||||||
|
constexpr int IXOR_M = 16;
|
||||||
|
constexpr int IROR_R = 17;
|
||||||
|
constexpr int IROL_R = 18;
|
||||||
|
constexpr int ISWAP_R = 19;
|
||||||
|
constexpr int FPSWAP_R = 20;
|
||||||
|
constexpr int FPADD_R = 21;
|
||||||
|
constexpr int FPADD_M = 22;
|
||||||
|
constexpr int FPSUB_R = 23;
|
||||||
|
constexpr int FPSUB_M = 24;
|
||||||
|
constexpr int FPNEG_R = 25;
|
||||||
|
constexpr int FPMUL_R = 26;
|
||||||
|
constexpr int FPMUL_M = 27;
|
||||||
|
constexpr int FPDIV_R = 28;
|
||||||
|
constexpr int FPDIV_M = 29;
|
||||||
|
constexpr int FPSQRT_R = 30;
|
||||||
|
constexpr int COND_R = 31;
|
||||||
|
constexpr int COND_M = 32;
|
||||||
|
constexpr int CFROUND = 33;
|
||||||
|
constexpr int ISTORE = 34;
|
||||||
|
constexpr int FSTORE = 35;
|
||||||
|
constexpr int NOP = 36;
|
||||||
|
}
|
||||||
|
|
||||||
class Instruction {
|
class Instruction {
|
||||||
public:
|
public:
|
||||||
uint8_t opcode;
|
uint8_t opcode;
|
||||||
uint8_t dst;
|
uint8_t dst;
|
||||||
uint8_t src;
|
uint8_t src;
|
||||||
uint8_t alt;
|
uint8_t mod;
|
||||||
int32_t imm32;
|
int32_t imm32;
|
||||||
const char* getName() const {
|
const char* getName() const {
|
||||||
return names[opcode];
|
return names[opcode];
|
||||||
|
@ -70,6 +110,7 @@ namespace RandomX {
|
||||||
void h_IXOR_M(std::ostream&) const;
|
void h_IXOR_M(std::ostream&) const;
|
||||||
void h_IROR_R(std::ostream&) const;
|
void h_IROR_R(std::ostream&) const;
|
||||||
void h_IROL_R(std::ostream&) const;
|
void h_IROL_R(std::ostream&) const;
|
||||||
|
void h_ISWAP_R(std::ostream&) const;
|
||||||
void h_FPSWAP_R(std::ostream&) const;
|
void h_FPSWAP_R(std::ostream&) const;
|
||||||
void h_FPADD_R(std::ostream&) const;
|
void h_FPADD_R(std::ostream&) const;
|
||||||
void h_FPADD_M(std::ostream&) const;
|
void h_FPADD_M(std::ostream&) const;
|
||||||
|
|
|
@ -30,6 +30,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include "intrinPortable.h"
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#endif
|
#endif
|
||||||
|
@ -98,7 +99,7 @@ namespace RandomX {
|
||||||
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
for (unsigned i = 0; i < sizeof(reg) / sizeof(Pcg32::result_type); ++i) {
|
||||||
*(((uint32_t*)®) + i) = gen();
|
*(((uint32_t*)®) + i) = gen();
|
||||||
}
|
}
|
||||||
FPINIT();
|
initFpu();
|
||||||
for (int i = 0; i < RegistersCount; ++i) {
|
for (int i = 0; i < RegistersCount; ++i) {
|
||||||
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
|
||||||
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
|
||||||
|
@ -114,24 +115,32 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
void InterpretedVirtualMachine::execute() {
|
void InterpretedVirtualMachine::execute() {
|
||||||
while (ic > 0) {
|
for(int i = 0; i < InstructionCount; ++i) {
|
||||||
#ifdef STATS
|
for (int j = 0; j < ProgramLength; ++j) {
|
||||||
count_instructions[pc]++;
|
auto& ibc = byteCode[j];
|
||||||
#endif
|
switch (ibc.type)
|
||||||
auto& inst = p(pc);
|
{
|
||||||
if(trace) std::cout << inst.getName() << " (" << std::dec << pc << ")" << std::endl;
|
case InstructionType::CFROUND: {
|
||||||
pc = (pc + 1) % ProgramLength;
|
uint64_t rcFlag = rotr(ibc.isrc->u64, ibc.imm.i32);
|
||||||
auto handler = engine[inst.opcode];
|
setRoundMode(rcFlag);
|
||||||
(this->*handler)(inst);
|
|
||||||
ic--;
|
|
||||||
}
|
}
|
||||||
#ifdef STATS
|
break;
|
||||||
count_endstack += stack.size();
|
}
|
||||||
#endif
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_HANDLE(x) REPN(&InterpretedVirtualMachine::h_##x, WT(x))
|
|
||||||
|
void InterpretedVirtualMachine::executeInstruction(Instruction& instr) {
|
||||||
|
switch (instr.opcode)
|
||||||
|
{
|
||||||
|
CASE_REP(IADD_R)
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
InstructionHandler InterpretedVirtualMachine::engine[256] = {
|
InstructionHandler InterpretedVirtualMachine::engine[256] = {
|
||||||
|
|
||||||
|
|
|
@ -33,10 +33,24 @@ namespace RandomX {
|
||||||
virtual std::ostream& printCxx(std::ostream&) const = 0;
|
virtual std::ostream& printCxx(std::ostream&) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct InstructionByteCode;
|
||||||
class InterpretedVirtualMachine;
|
class InterpretedVirtualMachine;
|
||||||
|
|
||||||
typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&);
|
typedef void(InterpretedVirtualMachine::*InstructionHandler)(Instruction&);
|
||||||
|
|
||||||
|
struct alignas(64) InstructionByteCode {
|
||||||
|
convertible_t* idst;
|
||||||
|
convertible_t* isrc;
|
||||||
|
convertible_t imm;
|
||||||
|
fpu_reg_t* fdst;
|
||||||
|
fpu_reg_t* fsrc;
|
||||||
|
uint32_t condition;
|
||||||
|
uint32_t memMask;
|
||||||
|
uint32_t type;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr int asedwfagdewsa = sizeof(InstructionByteCode);
|
||||||
|
|
||||||
class InterpretedVirtualMachine : public VirtualMachine {
|
class InterpretedVirtualMachine : public VirtualMachine {
|
||||||
public:
|
public:
|
||||||
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
InterpretedVirtualMachine(bool soft, bool async) : softAes(soft), asyncWorker(async) {}
|
||||||
|
@ -53,6 +67,7 @@ namespace RandomX {
|
||||||
static const ITransform* addressTransformations[TransformationCount];
|
static const ITransform* addressTransformations[TransformationCount];
|
||||||
bool softAes, asyncWorker;
|
bool softAes, asyncWorker;
|
||||||
Program p;
|
Program p;
|
||||||
|
InstructionByteCode byteCode[ProgramLength];
|
||||||
std::vector<convertible_t> stack;
|
std::vector<convertible_t> stack;
|
||||||
uint64_t pc, ic;
|
uint64_t pc, ic;
|
||||||
const ITransform* currentTransform;
|
const ITransform* currentTransform;
|
||||||
|
@ -106,7 +121,7 @@ namespace RandomX {
|
||||||
int count_FPMUL_nop2 = 0;
|
int count_FPMUL_nop2 = 0;
|
||||||
int datasetAccess[256] = { 0 };
|
int datasetAccess[256] = { 0 };
|
||||||
#endif
|
#endif
|
||||||
|
void executeInstruction(Instruction&);
|
||||||
convertible_t loada(Instruction&);
|
convertible_t loada(Instruction&);
|
||||||
convertible_t loadbiashift(Instruction&);
|
convertible_t loadbiashift(Instruction&);
|
||||||
convertible_t loadbiadiv(Instruction&);
|
convertible_t loadbiadiv(Instruction&);
|
||||||
|
|
|
@ -176,6 +176,7 @@ namespace RandomX {
|
||||||
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
||||||
static const uint8_t JMP = 0xe9;
|
static const uint8_t JMP = 0xe9;
|
||||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||||
|
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
||||||
|
|
||||||
size_t JitCompilerX86::getCodeSize() {
|
size_t JitCompilerX86::getCodeSize() {
|
||||||
return codePos - prologueSize;
|
return codePos - prologueSize;
|
||||||
|
@ -248,7 +249,7 @@ namespace RandomX {
|
||||||
emitByte(AND_EAX_I);
|
emitByte(AND_EAX_I);
|
||||||
else
|
else
|
||||||
emit(AND_ECX_I);
|
emit(AND_ECX_I);
|
||||||
emit32((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
emit32((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) {
|
void JitCompilerX86::genAddressRegDst(Instruction& instr, bool align16 = false) {
|
||||||
|
@ -257,11 +258,11 @@ namespace RandomX {
|
||||||
emitByte(AND_EAX_I);
|
emitByte(AND_EAX_I);
|
||||||
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
|
int32_t maskL1 = align16 ? ScratchpadL1Mask16 : ScratchpadL1Mask;
|
||||||
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
|
int32_t maskL2 = align16 ? ScratchpadL2Mask16 : ScratchpadL2Mask;
|
||||||
emit32((instr.alt % 4) ? maskL1 : maskL2);
|
emit32((instr.mod % 4) ? maskL1 : maskL2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::genAddressImm(Instruction& instr) {
|
void JitCompilerX86::genAddressImm(Instruction& instr) {
|
||||||
emit32(instr.imm32 & ((instr.alt % 4) ? ScratchpadL1Mask : ScratchpadL2Mask));
|
emit32(instr.imm32 & ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_IADD_R(Instruction& instr) {
|
void JitCompilerX86::h_IADD_R(Instruction& instr) {
|
||||||
|
@ -595,6 +596,13 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitCompilerX86::h_ISWAP_R(Instruction& instr) {
|
||||||
|
if (instr.src != instr.dst) {
|
||||||
|
emit(REX_XCHG);
|
||||||
|
emitByte(0xc0 + instr.dst + 8 * instr.src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FPSWAP_R(Instruction& instr) {
|
void JitCompilerX86::h_FPSWAP_R(Instruction& instr) {
|
||||||
emit(SHUFPD);
|
emit(SHUFPD);
|
||||||
emitByte(0xc0 + 9 * instr.dst);
|
emitByte(0xc0 + 9 * instr.dst);
|
||||||
|
@ -682,7 +690,7 @@ namespace RandomX {
|
||||||
void JitCompilerX86::h_CFROUND(Instruction& instr) {
|
void JitCompilerX86::h_CFROUND(Instruction& instr) {
|
||||||
emit(REX_MOV_RR64);
|
emit(REX_MOV_RR64);
|
||||||
emitByte(0xc0 + instr.src);
|
emitByte(0xc0 + instr.src);
|
||||||
int rotate = (13 - (instr.alt & 63)) & 63;
|
int rotate = (13 - (instr.imm32 & 63)) & 63;
|
||||||
if (rotate != 0) {
|
if (rotate != 0) {
|
||||||
emit(ROL_RAX);
|
emit(ROL_RAX);
|
||||||
emitByte(rotate);
|
emitByte(rotate);
|
||||||
|
@ -691,7 +699,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint8_t condition(Instruction& instr, bool invert = false) {
|
static inline uint8_t condition(Instruction& instr, bool invert = false) {
|
||||||
switch ((instr.alt & 7) ^ invert)
|
switch ((instr.mod & 7) ^ invert)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
return 0x96; //setbe
|
return 0x96; //setbe
|
||||||
|
@ -777,6 +785,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(IXOR_M)
|
INST_HANDLE(IXOR_M)
|
||||||
INST_HANDLE(IROR_R)
|
INST_HANDLE(IROR_R)
|
||||||
INST_HANDLE(IROL_R)
|
INST_HANDLE(IROL_R)
|
||||||
|
INST_HANDLE(ISWAP_R)
|
||||||
INST_HANDLE(FPSWAP_R)
|
INST_HANDLE(FPSWAP_R)
|
||||||
INST_HANDLE(FPADD_R)
|
INST_HANDLE(FPADD_R)
|
||||||
INST_HANDLE(FPADD_M)
|
INST_HANDLE(FPADD_M)
|
||||||
|
|
|
@ -109,6 +109,7 @@ namespace RandomX {
|
||||||
void h_IXOR_M(Instruction&);
|
void h_IXOR_M(Instruction&);
|
||||||
void h_IROR_R(Instruction&);
|
void h_IROR_R(Instruction&);
|
||||||
void h_IROL_R(Instruction&);
|
void h_IROL_R(Instruction&);
|
||||||
|
void h_ISWAP_R(Instruction&);
|
||||||
void h_FPSWAP_R(Instruction&);
|
void h_FPSWAP_R(Instruction&);
|
||||||
void h_FPADD_R(Instruction&);
|
void h_FPADD_R(Instruction&);
|
||||||
void h_FPADD_M(Instruction&);
|
void h_FPADD_M(Instruction&);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
mov rdx, rax
|
mov rdx, rax
|
||||||
and eax, 1048512
|
and eax, 2097088
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
push rcx
|
push rcx
|
||||||
xor r8, qword ptr [rcx+0]
|
xor r8, qword ptr [rcx+0]
|
||||||
|
@ -11,7 +11,7 @@
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
ror rdx, 32
|
ror rdx, 32
|
||||||
and edx, 1048512
|
and edx, 2097088
|
||||||
lea rcx, [rsi+rdx]
|
lea rcx, [rsi+rdx]
|
||||||
push rcx
|
push rcx
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
|
|
87
src/asm/squareHash.inc
Normal file
87
src/asm/squareHash.inc
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
mov rax, 1613783669344650115
|
||||||
|
add rax, rcx
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 1
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 2
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 3
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 4
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 5
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 6
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 7
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 8
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 9
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 10
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 11
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 12
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 13
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 14
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 15
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 16
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 17
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 18
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 19
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 20
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 21
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 22
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 23
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 24
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 25
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 26
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 27
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 28
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 29
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 30
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 31
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 32
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 33
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 34
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 35
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 36
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 37
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 38
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 39
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 40
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 41
|
||||||
|
mul rax
|
||||||
|
sub rax, rdx ;# 42
|
||||||
|
ret
|
|
@ -26,11 +26,6 @@ namespace RandomX {
|
||||||
|
|
||||||
using addr_t = uint32_t;
|
using addr_t = uint32_t;
|
||||||
|
|
||||||
constexpr int RoundToNearest = 0;
|
|
||||||
constexpr int RoundDown = 1;
|
|
||||||
constexpr int RoundUp = 2;
|
|
||||||
constexpr int RoundToZero = 3;
|
|
||||||
|
|
||||||
constexpr int SeedSize = 32;
|
constexpr int SeedSize = 32;
|
||||||
constexpr int ResultSize = 32;
|
constexpr int ResultSize = 32;
|
||||||
|
|
||||||
|
@ -46,7 +41,7 @@ namespace RandomX {
|
||||||
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
||||||
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
|
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
|
||||||
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||||
constexpr int DatasetIterations = 3;
|
constexpr int DatasetIterations = 10;
|
||||||
|
|
||||||
|
|
||||||
#ifdef TRACE
|
#ifdef TRACE
|
||||||
|
@ -72,12 +67,12 @@ namespace RandomX {
|
||||||
convertible_t hi;
|
convertible_t hi;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr int ProgramLength = 128;
|
constexpr int ProgramLength = 256;
|
||||||
constexpr uint32_t InstructionCount = 1024;
|
constexpr uint32_t InstructionCount = 1024;
|
||||||
constexpr uint32_t ScratchpadSize = 1024 * 1024;
|
constexpr uint32_t ScratchpadSize = 2 * 1024 * 1024;
|
||||||
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 64 / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL1 = ScratchpadSize / 128 / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL2 = ScratchpadSize / 4 / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL2 = ScratchpadSize / 8 / sizeof(convertible_t);
|
||||||
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL3 = ScratchpadSize / sizeof(convertible_t);
|
||||||
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
|
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
|
||||||
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
|
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
|
||||||
|
@ -133,6 +128,8 @@ namespace RandomX {
|
||||||
|
|
||||||
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
typedef void(*ProgramFunc)(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
||||||
|
|
||||||
|
typedef bool(*Condition)(convertible_t&, convertible_t&);
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
void executeProgram(RegisterFile&, MemoryRegisters&, convertible_t*, uint64_t);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,10 +28,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#include "Cache.hpp"
|
#include "Cache.hpp"
|
||||||
#include "virtualMemory.hpp"
|
#include "virtualMemory.hpp"
|
||||||
#include "softAes.h"
|
#include "softAes.h"
|
||||||
|
#include "squareHash.h"
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_NTA)
|
#define PREFETCHNTA(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
||||||
#else
|
#else
|
||||||
#define PREFETCH(memory)
|
#define PREFETCH(memory)
|
||||||
#endif
|
#endif
|
||||||
|
@ -49,42 +50,37 @@ namespace RandomX {
|
||||||
|
|
||||||
template<bool soft>
|
template<bool soft>
|
||||||
void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||||
__m128i x0, x1, x2, x3;
|
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||||
|
|
||||||
__m128i* xit = (__m128i*)intermediate;
|
r0 = 4ULL * blockNumber;
|
||||||
__m128i* xout = (__m128i*)out;
|
r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0;
|
||||||
|
|
||||||
x0 = _mm_cvtsi32_si128(blockNumber);
|
constexpr int mask = (CacheSize - 1) & -64;
|
||||||
constexpr int mask = (CacheSize / CacheLineSize) - 1;
|
|
||||||
|
|
||||||
for (auto i = 0; i < DatasetIterations; ++i) {
|
for (auto i = 0; i < DatasetIterations; ++i) {
|
||||||
x0 = aesenc<soft>(x0, keys[0]);
|
uint64_t* mix = (uint64_t*)(intermediate + (r0 & mask));
|
||||||
//x0 = aesenc<soft>(x0, keys[1]);
|
PREFETCHNTA(mix);
|
||||||
x1 = aesenc<soft>(x0, keys[2]);
|
r0 = squareHash(r0);
|
||||||
//x1 = aesenc<soft>(x1, keys[3]);
|
r0 ^= mix[0];
|
||||||
x2 = aesenc<soft>(x1, keys[4]);
|
r1 ^= mix[1];
|
||||||
//x2 = aesenc<soft>(x2, keys[5]);
|
r2 ^= mix[2];
|
||||||
x3 = aesenc<soft>(x2, keys[6]);
|
r3 ^= mix[3];
|
||||||
//x3 = aesenc<soft>(x3, keys[7]);
|
r4 ^= mix[4];
|
||||||
|
r5 ^= mix[5];
|
||||||
int index = _mm_cvtsi128_si32(x3);
|
r6 ^= mix[6];
|
||||||
index &= mask;
|
r7 ^= mix[7];
|
||||||
|
|
||||||
__m128i t0 = _mm_load_si128(xit + 4 * index + 0);
|
|
||||||
__m128i t1 = _mm_load_si128(xit + 4 * index + 1);
|
|
||||||
__m128i t2 = _mm_load_si128(xit + 4 * index + 2);
|
|
||||||
__m128i t3 = _mm_load_si128(xit + 4 * index + 3);
|
|
||||||
|
|
||||||
x0 = _mm_xor_si128(x0, t0);
|
|
||||||
x1 = _mm_xor_si128(x1, t1);
|
|
||||||
x2 = _mm_xor_si128(x2, t2);
|
|
||||||
x3 = _mm_xor_si128(x3, t3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_mm_store_si128(xout + 0, x0);
|
uint64_t* out64 = (uint64_t*)out;
|
||||||
_mm_store_si128(xout + 1, x1);
|
|
||||||
_mm_store_si128(xout + 2, x2);
|
out64[0] = r0;
|
||||||
_mm_store_si128(xout + 3, x3);
|
out64[1] = r1;
|
||||||
|
out64[2] = r2;
|
||||||
|
out64[3] = r3;
|
||||||
|
out64[4] = r4;
|
||||||
|
out64[5] = r5;
|
||||||
|
out64[6] = r6;
|
||||||
|
out64[7] = r7;
|
||||||
}
|
}
|
||||||
|
|
||||||
template
|
template
|
||||||
|
@ -98,7 +94,7 @@ namespace RandomX {
|
||||||
memory.mx ^= addr;
|
memory.mx ^= addr;
|
||||||
memory.mx &= -64; //align to cache line
|
memory.mx &= -64; //align to cache line
|
||||||
std::swap(memory.mx, memory.ma);
|
std::swap(memory.mx, memory.ma);
|
||||||
PREFETCH(memory);
|
PREFETCHNTA(memory.ds.dataset + memory.ma);
|
||||||
for (int i = 0; i < RegistersCount; ++i)
|
for (int i = 0; i < RegistersCount; ++i)
|
||||||
reg.r[i].u64 ^= datasetLine[i];
|
reg.r[i].u64 ^= datasetLine[i];
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,3 +71,44 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||||
|
|
||||||
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
|
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
|
||||||
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
|
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||||
|
const uint8_t* outptr = (uint8_t*)buffer;
|
||||||
|
const uint8_t* outputEnd = outptr + outputSize;
|
||||||
|
|
||||||
|
__m128i state0, state1, state2, state3;
|
||||||
|
__m128i key0, key1, key2, key3;
|
||||||
|
|
||||||
|
key0 = _mm_set_epi32(0x9274f206, 0x79498d2f, 0x7d2de6ab, 0x67a04d26);
|
||||||
|
key1 = _mm_set_epi32(0xe1f7af05, 0x2a3a6f1d, 0x86658a15, 0x4f719812);
|
||||||
|
key2 = _mm_set_epi32(0xd1b1f791, 0x9e2ec914, 0x14c77bce, 0xba90750e);
|
||||||
|
key3 = _mm_set_epi32(0x179d0fd9, 0x6e57883c, 0xa53bbe4f, 0xaa07621f);
|
||||||
|
|
||||||
|
state0 = _mm_load_si128((__m128i*)state + 0);
|
||||||
|
state1 = _mm_load_si128((__m128i*)state + 1);
|
||||||
|
state2 = _mm_load_si128((__m128i*)state + 2);
|
||||||
|
state3 = _mm_load_si128((__m128i*)state + 3);
|
||||||
|
|
||||||
|
while (outptr < outputEnd) {
|
||||||
|
state0 = aesdec<softAes>(state0, key0);
|
||||||
|
state1 = aesenc<softAes>(state1, key1);
|
||||||
|
state2 = aesdec<softAes>(state2, key2);
|
||||||
|
state3 = aesenc<softAes>(state3, key3);
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)outptr + 0, state0);
|
||||||
|
_mm_store_si128((__m128i*)outptr + 1, state1);
|
||||||
|
_mm_store_si128((__m128i*)outptr + 2, state2);
|
||||||
|
_mm_store_si128((__m128i*)outptr + 3, state3);
|
||||||
|
|
||||||
|
outptr += 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)state + 0, state0);
|
||||||
|
_mm_store_si128((__m128i*)state + 1, state1);
|
||||||
|
_mm_store_si128((__m128i*)state + 2, state2);
|
||||||
|
_mm_store_si128((__m128i*)state + 3, state3);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||||
|
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
|
@ -21,3 +21,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||||
|
|
|
@ -37,8 +37,9 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#define WT_INEG_R 2
|
#define WT_INEG_R 2
|
||||||
#define WT_IXOR_R 12
|
#define WT_IXOR_R 12
|
||||||
#define WT_IXOR_M 3
|
#define WT_IXOR_M 3
|
||||||
#define WT_IROR_R 12
|
#define WT_IROR_R 10
|
||||||
#define WT_IROL_R 12
|
#define WT_IROL_R 10
|
||||||
|
#define WT_ISWAP_R 4
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
#define WT_FPSWAP_R 8
|
#define WT_FPSWAP_R 8
|
||||||
|
@ -72,7 +73,7 @@ constexpr int wtSum = WT_IADD_R + WT_IADD_M + WT_IADD_RC + WT_ISUB_R + \
|
||||||
WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
||||||
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
||||||
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
||||||
WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
|
WT_ISWAP_R + WT_FPSWAP_R + WT_FPADD_R + WT_FPADD_M + WT_FPSUB_R + WT_FPSUB_M + \
|
||||||
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
|
WT_FPNEG_R + WT_FPMUL_R + WT_FPMUL_M + WT_FPDIV_R + WT_FPDIV_M + \
|
||||||
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
WT_FPSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@ You should have received a copy of the GNU General Public License
|
||||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
#include "instructions.hpp"
|
|
||||||
#include "intrinPortable.h"
|
#include "intrinPortable.h"
|
||||||
#pragma STDC FENV_ACCESS on
|
#pragma STDC FENV_ACCESS on
|
||||||
#include <cfenv>
|
#include <cfenv>
|
||||||
|
@ -29,14 +28,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#if defined(__SIZEOF_INT128__)
|
#if defined(__SIZEOF_INT128__)
|
||||||
typedef unsigned __int128 uint128_t;
|
typedef unsigned __int128 uint128_t;
|
||||||
typedef __int128 int128_t;
|
typedef __int128 int128_t;
|
||||||
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
|
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||||
return ((uint128_t)a * b) >> 64;
|
return ((uint128_t)a * b) >> 64;
|
||||||
}
|
}
|
||||||
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
|
int64_t smulh(int64_t a, int64_t b) {
|
||||||
return ((int128_t)a * b) >> 64;
|
return ((int128_t)a * b) >> 64;
|
||||||
}
|
}
|
||||||
#define umulhi64 __umulhi64
|
#define HAVE_MULH
|
||||||
#define imulhi64 __imulhi64
|
#define HAVE_SMULH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
|
@ -44,62 +43,62 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#define EVAL_DEFINE(X) HAS_VALUE(X)
|
#define EVAL_DEFINE(X) HAS_VALUE(X)
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#define ror64 _rotr64
|
|
||||||
#define rol64 _rotl64
|
uint64_t rotl(uint64_t x, int c) {
|
||||||
|
return _rotl64(x, c);
|
||||||
|
}
|
||||||
|
uint64_t rotr(uint64_t x , int c) {
|
||||||
|
return _rotr64(x, c);
|
||||||
|
}
|
||||||
|
#define HAVE_ROTL
|
||||||
|
#define HAVE_ROTR
|
||||||
|
|
||||||
#if EVAL_DEFINE(__MACHINEARM64_X64(1))
|
#if EVAL_DEFINE(__MACHINEARM64_X64(1))
|
||||||
#define umulhi64 __umulh
|
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||||
|
return __umulh(a, b);
|
||||||
|
}
|
||||||
|
#define HAVE_MULH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if EVAL_DEFINE(__MACHINEX64(1))
|
#if EVAL_DEFINE(__MACHINEX64(1))
|
||||||
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
|
int64_t smulh(int64_t a, int64_t b) {
|
||||||
int64_t hi;
|
int64_t hi;
|
||||||
_mul128(a, b, &hi);
|
_mul128(a, b, &hi);
|
||||||
return hi;
|
return hi;
|
||||||
}
|
}
|
||||||
#define imulhi64 __imulhi64
|
#define HAVE_SMULH
|
||||||
#endif
|
#endif
|
||||||
static inline uint32_t _setRoundMode(uint32_t mode) {
|
|
||||||
return _controlfp(mode, _MCW_RC);
|
static void setRoundMode__(uint32_t mode) {
|
||||||
|
_controlfp(mode, _MCW_RC);
|
||||||
}
|
}
|
||||||
#define setRoundMode _setRoundMode
|
#define HAVE_SETROUNDMODE_IMPL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef setRoundMode
|
#ifndef HAVE_SETROUNDMODE_IMPL
|
||||||
#define setRoundMode fesetround
|
static void setRoundMode__(uint32_t mode) {
|
||||||
|
fesetround(mode);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ror64
|
#ifndef HAVE_ROTR
|
||||||
static inline uint64_t __ror64(uint64_t a, int b) {
|
uint64_t rotr(uint64_t a, int b) {
|
||||||
return (a >> b) | (a << (64 - b));
|
return (a >> b) | (a << (64 - b));
|
||||||
}
|
}
|
||||||
#define ror64 __ror64
|
#define HAS_ROTR
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef rol64
|
#ifndef HAVE_ROTL
|
||||||
static inline uint64_t __rol64(uint64_t a, int b) {
|
uint64_t rotl(uint64_t a, int b) {
|
||||||
return (a << b) | (a >> (64 - b));
|
return (a << b) | (a >> (64 - b));
|
||||||
}
|
}
|
||||||
#define rol64 __rol64
|
#define HAS_ROTL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef sar64
|
#ifndef HAVE_MULH
|
||||||
#include <type_traits>
|
|
||||||
constexpr int64_t builtintShr64(int64_t value, int shift) noexcept {
|
|
||||||
return value >> shift;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct UsesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> {
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline int64_t __sar64(int64_t a, int b) {
|
|
||||||
return UsesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b);
|
|
||||||
}
|
|
||||||
#define sar64 __sar64
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef umulhi64
|
|
||||||
#define LO(x) ((x)&0xffffffff)
|
#define LO(x) ((x)&0xffffffff)
|
||||||
#define HI(x) ((x)>>32)
|
#define HI(x) ((x)>>32)
|
||||||
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
|
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||||
uint64_t ah = HI(a), al = LO(a);
|
uint64_t ah = HI(a), al = LO(a);
|
||||||
uint64_t bh = HI(b), bl = LO(b);
|
uint64_t bh = HI(b), bl = LO(b);
|
||||||
uint64_t x00 = al * bl;
|
uint64_t x00 = al * bl;
|
||||||
|
@ -112,17 +111,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
return (m3 << 32) + LO(m2);
|
return (m3 << 32) + LO(m2);
|
||||||
}
|
}
|
||||||
#define umulhi64 __umulhi64
|
#define HAVE_MULH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef imulhi64
|
#ifndef HAVE_SMULH
|
||||||
static inline int64_t __imulhi64(int64_t a, int64_t b) {
|
int64_t smulh(int64_t a, int64_t b) {
|
||||||
int64_t hi = umulhi64(a, b);
|
int64_t hi = mulh(a, b);
|
||||||
if (a < 0LL) hi -= b;
|
if (a < 0LL) hi -= b;
|
||||||
if (b < 0LL) hi -= a;
|
if (b < 0LL) hi -= a;
|
||||||
return hi;
|
return hi;
|
||||||
}
|
}
|
||||||
#define imulhi64 __imulhi64
|
#define HAVE_SMULH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// avoid undefined behavior of signed overflow
|
// avoid undefined behavior of signed overflow
|
||||||
|
@ -137,20 +136,20 @@ static inline int32_t safeSub(int32_t a, int32_t b) {
|
||||||
|
|
||||||
#if defined(__has_builtin)
|
#if defined(__has_builtin)
|
||||||
#if __has_builtin(__builtin_sub_overflow)
|
#if __has_builtin(__builtin_sub_overflow)
|
||||||
static inline bool __subOverflow(int32_t a, int32_t b) {
|
static inline bool subOverflow__(int32_t a, int32_t b) {
|
||||||
int32_t temp;
|
int32_t temp;
|
||||||
return __builtin_sub_overflow(a, b, &temp);
|
return __builtin_sub_overflow(a, b, &temp);
|
||||||
}
|
}
|
||||||
#define subOverflow __subOverflow
|
#define HAVE_SUB_OVERFLOW
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef subOverflow
|
#ifndef HAVE_SUB_OVERFLOW
|
||||||
static inline bool __subOverflow(int32_t a, int32_t b) {
|
static inline bool subOverflow__(int32_t a, int32_t b) {
|
||||||
auto c = safeSub(a, b);
|
auto c = safeSub(a, b);
|
||||||
return (c < a) != (b > 0);
|
return (c < a) != (b > 0);
|
||||||
}
|
}
|
||||||
#define subOverflow __subOverflow
|
#define HAVE_SUB_OVERFLOW
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline double FlushDenormalNaN(double x) {
|
static inline double FlushDenormalNaN(double x) {
|
||||||
|
@ -165,47 +164,57 @@ static inline double FlushNaN(double x) {
|
||||||
return x != x ? 0.0 : x;
|
return x != x ? 0.0 : x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setRoundMode(uint32_t rcflag) {
|
||||||
|
switch (rcflag & 3) {
|
||||||
|
case RoundDown:
|
||||||
|
setRoundMode__(FE_DOWNWARD);
|
||||||
|
break;
|
||||||
|
case RoundUp:
|
||||||
|
setRoundMode__(FE_UPWARD);
|
||||||
|
break;
|
||||||
|
case RoundToZero:
|
||||||
|
setRoundMode__(FE_TOWARDZERO);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
setRoundMode__(FE_TONEAREST);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool condition(uint32_t type, int32_t value, int32_t imm32) {
|
||||||
|
switch (type & 7)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return (uint32_t)value <= (uint32_t)imm32;
|
||||||
|
case 1:
|
||||||
|
return (uint32_t)value > (uint32_t)imm32;
|
||||||
|
case 2:
|
||||||
|
return safeSub(value, imm32) < 0;
|
||||||
|
case 3:
|
||||||
|
return safeSub(value, imm32) >= 0;
|
||||||
|
case 4:
|
||||||
|
return subOverflow__(value, imm32);
|
||||||
|
case 5:
|
||||||
|
return !subOverflow__(value, imm32);
|
||||||
|
case 6:
|
||||||
|
return value < imm32;
|
||||||
|
case 7:
|
||||||
|
return value >= imm32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initFpu() {
|
||||||
|
#ifdef __SSE2__
|
||||||
|
_mm_setcsr(0x9FC0); //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
|
||||||
|
#else
|
||||||
|
setRoundMode(FE_TONEAREST);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
/*void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||||
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 + b.u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u32 + b.u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 - b.u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u32 - b.u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 * b.u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = umulhi64(a.u64, b.u64);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = (uint64_t)a.u32 * b.u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.i64 = (int64_t)a.i32 * b.i32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.i64 = imulhi64(a.i64, b.i64);
|
|
||||||
}
|
|
||||||
|
|
||||||
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
|
c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,80 +225,6 @@ namespace RandomX {
|
||||||
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 & b.u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u32 & b.u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 | b.u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u32 | b.u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 ^ b.u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u32 ^ b.u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 << (b.u64 & 63);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = a.u64 >> (b.u64 & 63);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = sar64(a.i64, b.u64 & 63);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = rol64(a.u64, (b.u64 & 63));
|
|
||||||
}
|
|
||||||
|
|
||||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
|
||||||
c.u64 = ror64(a.u64, (b.u64 & 63));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool JMP_COND(uint8_t type, convertible_t& regb, int32_t imm32) {
|
|
||||||
switch (type & 7)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
return regb.u32 <= (uint32_t)imm32;
|
|
||||||
case 1:
|
|
||||||
return regb.u32 > (uint32_t)imm32;
|
|
||||||
case 2:
|
|
||||||
return safeSub(regb.i32, imm32) < 0;
|
|
||||||
case 3:
|
|
||||||
return safeSub(regb.i32, imm32) >= 0;
|
|
||||||
case 4:
|
|
||||||
return subOverflow(regb.i32, imm32);
|
|
||||||
case 5:
|
|
||||||
return !subOverflow(regb.i32, imm32);
|
|
||||||
case 6:
|
|
||||||
return regb.i32 < imm32;
|
|
||||||
case 7:
|
|
||||||
return regb.i32 >= imm32;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void FPINIT() {
|
|
||||||
#ifdef __SSE2__
|
|
||||||
_mm_setcsr(0x9FC0); //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
|
|
||||||
#else
|
|
||||||
setRoundMode(FE_TONEAREST);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
void FPADD(convertible_t& a, fpu_reg_t& b, fpu_reg_t& c) {
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
__m128i ai = _mm_loadl_epi64((const __m128i*)&a);
|
||||||
|
@ -368,48 +303,8 @@ namespace RandomX {
|
||||||
c.lo.f64 = sqrt(std::abs(alo));
|
c.lo.f64 = sqrt(std::abs(alo));
|
||||||
c.hi.f64 = sqrt(std::abs(ahi));
|
c.hi.f64 = sqrt(std::abs(ahi));
|
||||||
#endif
|
#endif
|
||||||
}
|
}*/
|
||||||
|
|
||||||
|
|
||||||
void FPROUND(convertible_t a, uint8_t rot) {
|
|
||||||
uint64_t flag = ror64(a.u64, rot);
|
|
||||||
switch (flag & 3) {
|
|
||||||
case RoundDown:
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << "Round FE_DOWNWARD (" << FE_DOWNWARD << ") = " <<
|
|
||||||
#endif
|
|
||||||
setRoundMode(FE_DOWNWARD);
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case RoundUp:
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << "Round FE_UPWARD (" << FE_UPWARD << ") = " <<
|
|
||||||
#endif
|
|
||||||
setRoundMode(FE_UPWARD);
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case RoundToZero:
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << "Round FE_TOWARDZERO (" << FE_TOWARDZERO << ") = " <<
|
|
||||||
#endif
|
|
||||||
setRoundMode(FE_TOWARDZERO);
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << "Round FE_TONEAREST (" << FE_TONEAREST << ") = " <<
|
|
||||||
#endif
|
|
||||||
setRoundMode(FE_TONEAREST);
|
|
||||||
#ifdef DEBUG
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -19,6 +19,8 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
|
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
|
||||||
#define __SSE2__ 1
|
#define __SSE2__ 1
|
||||||
|
@ -45,6 +47,18 @@ typedef union {
|
||||||
uint8_t u8[16];
|
uint8_t u8[16];
|
||||||
} __m128i;
|
} __m128i;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
double lo;
|
||||||
|
double hi;
|
||||||
|
} __m128d;
|
||||||
|
|
||||||
|
inline __m128d _mm_load_pd(const double* pd) {
|
||||||
|
__m128d x;
|
||||||
|
x.lo = *(pd + 0);
|
||||||
|
x.hi = *(pd + 1);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
static const char* platformError = "Platform doesn't support hardware AES";
|
static const char* platformError = "Platform doesn't support hardware AES";
|
||||||
|
|
||||||
inline __m128i _mm_aeskeygenassist_si128(__m128i key, uint8_t rcon) {
|
inline __m128i _mm_aeskeygenassist_si128(__m128i key, uint8_t rcon) {
|
||||||
|
@ -132,3 +146,16 @@ inline __m128i _mm_slli_si128(__m128i _A, int _Imm) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
constexpr int RoundToNearest = 0;
|
||||||
|
constexpr int RoundDown = 1;
|
||||||
|
constexpr int RoundUp = 2;
|
||||||
|
constexpr int RoundToZero = 3;
|
||||||
|
|
||||||
|
uint64_t mulh(uint64_t, uint64_t);
|
||||||
|
int64_t smulh(int64_t, int64_t);
|
||||||
|
uint64_t rotl(uint64_t, int);
|
||||||
|
uint64_t rotr(uint64_t, int);
|
||||||
|
void initFpu();
|
||||||
|
void setRoundMode(uint32_t);
|
||||||
|
bool condition(uint32_t, int32_t, int32_t);
|
||||||
|
|
|
@ -35,6 +35,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#include "dataset.hpp"
|
#include "dataset.hpp"
|
||||||
#include "Cache.hpp"
|
#include "Cache.hpp"
|
||||||
#include "Pcg32.hpp"
|
#include "Pcg32.hpp"
|
||||||
|
#include "hashAes1Rx4.hpp"
|
||||||
|
|
||||||
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
||||||
|
|
||||||
|
@ -153,7 +154,7 @@ void generateNative(int nonce) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
|
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread, uint8_t* scratchpad) {
|
||||||
uint64_t hash[4];
|
alignas(16) uint64_t hash[8];
|
||||||
unsigned char blockTemplate[] = {
|
unsigned char blockTemplate[] = {
|
||||||
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||||
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
||||||
|
@ -167,8 +168,8 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||||
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
||||||
*noncePtr = nonce;
|
*noncePtr = nonce;
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
fillAes1Rx4<false>((void*)hash, RandomX::ScratchpadSize, scratchpad);
|
||||||
vm->initializeScratchpad(scratchpad, spIndex);
|
//vm->initializeScratchpad(scratchpad, spIndex);
|
||||||
vm->setScratchpad(scratchpad);
|
vm->setScratchpad(scratchpad);
|
||||||
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
||||||
for (int chain = 0; chain < 16; ++chain) {
|
for (int chain = 0; chain < 16; ++chain) {
|
||||||
|
@ -309,7 +310,7 @@ int main(int argc, char** argv) {
|
||||||
}
|
}
|
||||||
uint8_t* scratchpadMem;
|
uint8_t* scratchpadMem;
|
||||||
if (largePages) {
|
if (largePages) {
|
||||||
scratchpadMem = (uint8_t*)allocLargePagesMemory(RandomX::ScratchpadSize * (threadCount + 1) / 2);
|
scratchpadMem = (uint8_t*)allocLargePagesMemory(threadCount * RandomX::ScratchpadSize);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize);
|
scratchpadMem = (uint8_t*)_mm_malloc(threadCount * RandomX::ScratchpadSize, RandomX::CacheLineSize);
|
||||||
|
|
17
src/squareHash.S
Normal file
17
src/squareHash.S
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
.intel_syntax noprefix
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
.text
|
||||||
|
#else
|
||||||
|
.section .text
|
||||||
|
#endif
|
||||||
|
#if defined(__WIN32__) || defined(__APPLE__)
|
||||||
|
#define DECL(x) _##x
|
||||||
|
#else
|
||||||
|
#define DECL(x) x
|
||||||
|
#endif
|
||||||
|
|
||||||
|
.global DECL(squareHash)
|
||||||
|
|
||||||
|
DECL(squareHash):
|
||||||
|
mov rcx, rsi
|
||||||
|
#include "asm/squareHash.inc"
|
9
src/squareHash.asm
Normal file
9
src/squareHash.asm
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
PUBLIC squareHash
|
||||||
|
|
||||||
|
.code
|
||||||
|
|
||||||
|
squareHash PROC
|
||||||
|
include asm/squareHash.inc
|
||||||
|
squareHash ENDP
|
||||||
|
|
||||||
|
END
|
71
src/squareHash.h
Normal file
71
src/squareHash.h
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
Copyright (c) 2019 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if !defined(_M_X64) && !defined(__x86_64__)
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint64_t lo;
|
||||||
|
uint64_t hi;
|
||||||
|
} uint128_t;
|
||||||
|
|
||||||
|
#define LO(x) ((x)&0xffffffff)
|
||||||
|
#define HI(x) ((x)>>32)
|
||||||
|
static inline uint128_t square128(uint64_t x) {
|
||||||
|
uint64_t xh = HI(x), xl = LO(x);
|
||||||
|
uint64_t xll = xl * xl;
|
||||||
|
uint64_t xlh = xl * xh;
|
||||||
|
uint64_t xhh = xh * xh;
|
||||||
|
uint64_t m1 = 2 * LO(xlh) + HI(xll);
|
||||||
|
uint64_t m2 = 2 * HI(xlh) + LO(xhh) + HI(m1);
|
||||||
|
uint64_t m3 = HI(xhh) + HI(m2);
|
||||||
|
|
||||||
|
uint128_t x2;
|
||||||
|
|
||||||
|
x2.lo = (m1 << 32) + LO(xll);
|
||||||
|
x2.hi = (m3 << 32) + LO(m2);
|
||||||
|
|
||||||
|
return x2;
|
||||||
|
}
|
||||||
|
#undef LO(x)
|
||||||
|
#undef HI(x)
|
||||||
|
|
||||||
|
inline uint64_t squareHash(uint64_t x) {
|
||||||
|
x += 1613783669344650115;
|
||||||
|
for (int i = 0; i < 42; ++i) {
|
||||||
|
uint128_t x2 = square128(x);
|
||||||
|
x = x2.lo - x2.hi;
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
uint64_t squareHash(uint64_t);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in a new issue