mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Support for multiple threads
This commit is contained in:
parent
cb12feaf91
commit
b9d2d853aa
13 changed files with 436 additions and 304 deletions
10
makefile
10
makefile
|
@ -9,10 +9,9 @@ endif
|
||||||
BINDIR=bin
|
BINDIR=bin
|
||||||
SRCDIR=src
|
SRCDIR=src
|
||||||
OBJDIR=obj
|
OBJDIR=obj
|
||||||
LDFLAGS=
|
LDFLAGS=-lpthread
|
||||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o)
|
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o)
|
||||||
SRC1=$(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp)
|
|
||||||
|
|
||||||
all: release test
|
all: release test
|
||||||
|
|
||||||
|
@ -52,7 +51,7 @@ $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-imp
|
||||||
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR)
|
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
|
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR)
|
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR)
|
||||||
|
@ -73,6 +72,9 @@ $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp
|
||||||
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR)
|
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
|
||||||
|
|
||||||
|
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
|
||||||
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
|
||||||
|
|
||||||
|
|
147
src/Cache.cpp
Normal file
147
src/Cache.cpp
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
/*
|
||||||
|
Copyright (c) 2018 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include "Cache.hpp"
|
||||||
|
#include "softAes.h"
|
||||||
|
#include "argon2.h"
|
||||||
|
#include "Pcg32.hpp"
|
||||||
|
#include "argon2_core.h"
|
||||||
|
|
||||||
|
namespace RandomX {
|
||||||
|
|
||||||
|
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
||||||
|
|
||||||
|
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||||
|
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||||
|
static inline __m128i sl_xor(__m128i tmp1) {
|
||||||
|
__m128i tmp4;
|
||||||
|
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||||
|
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||||
|
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||||
|
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||||
|
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||||
|
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||||
|
return tmp1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<uint8_t rcon, bool soft>
|
||||||
|
static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
|
||||||
|
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||||
|
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
|
||||||
|
*xout0 = sl_xor(*xout0);
|
||||||
|
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||||
|
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||||
|
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
|
||||||
|
*xout2 = sl_xor(*xout2);
|
||||||
|
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool soft>
|
||||||
|
static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
|
||||||
|
__m128i xout0, xout2;
|
||||||
|
xout0 = _mm_load_si128(seed);
|
||||||
|
xout2 = _mm_load_si128(seed + 1);
|
||||||
|
*keys++ = xout0;
|
||||||
|
*keys++ = xout2;
|
||||||
|
aesGenKeys<0x01, soft>(&xout0, &xout2);
|
||||||
|
*keys++ = xout0;
|
||||||
|
*keys++ = xout2;
|
||||||
|
aesGenKeys<0x02, soft>(&xout0, &xout2);
|
||||||
|
*keys++ = xout0;
|
||||||
|
*keys++ = xout2;
|
||||||
|
aesGenKeys<0x04, soft>(&xout0, &xout2);
|
||||||
|
*keys++ = xout0;
|
||||||
|
*keys++ = xout2;
|
||||||
|
aesGenKeys<0x08, soft>(&xout0, &xout2);
|
||||||
|
*keys++ = xout0;
|
||||||
|
*keys++ = xout2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cache::argonFill(const void* seed, size_t seedSize) {
|
||||||
|
uint32_t memory_blocks, segment_length;
|
||||||
|
argon2_instance_t instance;
|
||||||
|
argon2_context context;
|
||||||
|
|
||||||
|
context.out = nullptr;
|
||||||
|
context.outlen = 0;
|
||||||
|
context.pwd = CONST_CAST(uint8_t *)seed;
|
||||||
|
context.pwdlen = (uint32_t)seedSize;
|
||||||
|
context.salt = CONST_CAST(uint8_t *)ArgonSalt;
|
||||||
|
context.saltlen = (uint32_t)ArgonSaltSize;
|
||||||
|
context.secret = NULL;
|
||||||
|
context.secretlen = 0;
|
||||||
|
context.ad = NULL;
|
||||||
|
context.adlen = 0;
|
||||||
|
context.t_cost = ArgonIterations;
|
||||||
|
context.m_cost = ArgonMemorySize;
|
||||||
|
context.lanes = ArgonLanes;
|
||||||
|
context.threads = 1;
|
||||||
|
context.allocate_cbk = NULL;
|
||||||
|
context.free_cbk = NULL;
|
||||||
|
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||||
|
context.version = ARGON2_VERSION_NUMBER;
|
||||||
|
|
||||||
|
/* 2. Align memory size */
|
||||||
|
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||||
|
memory_blocks = context.m_cost;
|
||||||
|
|
||||||
|
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
||||||
|
|
||||||
|
instance.version = context.version;
|
||||||
|
instance.memory = NULL;
|
||||||
|
instance.passes = context.t_cost;
|
||||||
|
instance.memory_blocks = memory_blocks;
|
||||||
|
instance.segment_length = segment_length;
|
||||||
|
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||||
|
instance.lanes = context.lanes;
|
||||||
|
instance.threads = context.threads;
|
||||||
|
instance.type = Argon2_d;
|
||||||
|
instance.memory = (block*)memory;
|
||||||
|
|
||||||
|
if (instance.threads > instance.lanes) {
|
||||||
|
instance.threads = instance.lanes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||||
|
* blocks
|
||||||
|
*/
|
||||||
|
argon_initialize(&instance, &context);
|
||||||
|
|
||||||
|
fill_memory_blocks(&instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void Cache::initialize(const void* seed, size_t seedSize) {
|
||||||
|
//Argon2d memory fill
|
||||||
|
argonFill(seed, seedSize);
|
||||||
|
|
||||||
|
//Circular shift of the cache buffer by 512 bytes
|
||||||
|
//realized by copying the first 512 bytes to the back
|
||||||
|
//of the buffer and shifting the start by 512 bytes
|
||||||
|
memcpy(memory + CacheSize, memory, CacheShift);
|
||||||
|
|
||||||
|
//AES keys
|
||||||
|
expandAesKeys<softAes>((__m128i*)seed, keys.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
template void Cache::initialize<true>(const void*, size_t);
|
||||||
|
|
||||||
|
template void Cache::initialize<false>(const void*, size_t);
|
||||||
|
}
|
57
src/Cache.hpp
Normal file
57
src/Cache.hpp
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
/*
|
||||||
|
Copyright (c) 2018 tevador
|
||||||
|
|
||||||
|
This file is part of RandomX.
|
||||||
|
|
||||||
|
RandomX is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
RandomX is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <new>
|
||||||
|
#include "common.hpp"
|
||||||
|
#include "dataset.hpp"
|
||||||
|
|
||||||
|
namespace RandomX {
|
||||||
|
|
||||||
|
class Cache {
|
||||||
|
public:
|
||||||
|
void* operator new(size_t size) {
|
||||||
|
void* ptr = _mm_malloc(size, sizeof(__m128i));
|
||||||
|
if (ptr == nullptr)
|
||||||
|
throw std::bad_alloc();
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator delete(void* ptr) {
|
||||||
|
_mm_free(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void initialize(const void* seed, size_t seedSize);
|
||||||
|
|
||||||
|
const KeysContainer& getKeys() const {
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint8_t* getCache() {
|
||||||
|
return memory + CacheShift;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
alignas(16) KeysContainer keys;
|
||||||
|
uint8_t memory[CacheSize + CacheShift];
|
||||||
|
void argonFill(const void* seed, size_t seedSize);
|
||||||
|
};
|
||||||
|
}
|
|
@ -31,11 +31,11 @@ namespace RandomX {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompiledVirtualMachine::initializeDataset(const void* seed, bool lightClient) {
|
void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
|
||||||
if (lightClient) {
|
if (lightClient) {
|
||||||
throw std::runtime_error("Compiled VM does not support light-client mode");
|
throw std::runtime_error("Compiled VM does not support light-client mode");
|
||||||
}
|
}
|
||||||
VirtualMachine::initializeDataset(seed, lightClient);
|
VirtualMachine::setDataset(ds, lightClient);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompiledVirtualMachine::initializeProgram(const void* seed) {
|
void CompiledVirtualMachine::initializeProgram(const void* seed) {
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace RandomX {
|
||||||
class CompiledVirtualMachine : public VirtualMachine {
|
class CompiledVirtualMachine : public VirtualMachine {
|
||||||
public:
|
public:
|
||||||
CompiledVirtualMachine(bool softAes);
|
CompiledVirtualMachine(bool softAes);
|
||||||
void initializeDataset(const void* seed, bool light = false) override;
|
void setDataset(dataset_t ds, bool light = false) override;
|
||||||
void initializeProgram(const void* seed) override;
|
void initializeProgram(const void* seed) override;
|
||||||
virtual void execute() override;
|
virtual void execute() override;
|
||||||
void* getProgram() {
|
void* getProgram() {
|
||||||
|
|
|
@ -20,58 +20,65 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#include "VirtualMachine.hpp"
|
#include "VirtualMachine.hpp"
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "dataset.hpp"
|
#include "dataset.hpp"
|
||||||
|
#include "Cache.hpp"
|
||||||
#include "t1ha/t1ha.h"
|
#include "t1ha/t1ha.h"
|
||||||
#include "blake2/blake2.h"
|
#include "blake2/blake2.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
|
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
|
||||||
mem.dataset = nullptr;
|
mem.ds.dataset = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VirtualMachine::initializeDataset(const void* seed, bool light) {
|
VirtualMachine::~VirtualMachine() {
|
||||||
if (lightClient) {
|
if (lightClient) {
|
||||||
_mm_free(mem.lcm->cache);
|
delete mem.ds.lightDataset->block;
|
||||||
_mm_free(mem.lcm->block);
|
delete mem.ds.lightDataset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void VirtualMachine::setDataset(dataset_t ds, bool light) {
|
||||||
|
if (mem.ds.dataset != nullptr) {
|
||||||
|
throw std::runtime_error("Dataset is already initialized");
|
||||||
}
|
}
|
||||||
_mm_free(mem.dataset);
|
|
||||||
lightClient = light;
|
lightClient = light;
|
||||||
if (light) {
|
if (light) {
|
||||||
|
auto lds = mem.ds.lightDataset = new LightClientDataset();
|
||||||
|
lds->cache = ds.cache;
|
||||||
|
lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
||||||
|
lds->blockNumber = -1;
|
||||||
|
if (lds->block == nullptr) {
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
if (softAes) {
|
if (softAes) {
|
||||||
datasetInitLight<true>(seed, mem.lcm);
|
|
||||||
readDataset = &datasetReadLight<true>;
|
readDataset = &datasetReadLight<true>;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
datasetInitLight<false>(seed, mem.lcm);
|
|
||||||
readDataset = &datasetReadLight<false>;
|
readDataset = &datasetReadLight<false>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
mem.ds = ds;
|
||||||
readDataset = &datasetRead;
|
readDataset = &datasetRead;
|
||||||
if (softAes) {
|
|
||||||
datasetInit<true>(seed, mem.dataset);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
datasetInit<false>(seed, mem.dataset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VirtualMachine::initializeScratchpad(uint32_t index) {
|
void VirtualMachine::initializeScratchpad(uint32_t index) {
|
||||||
if (lightClient) {
|
if (lightClient) {
|
||||||
|
auto cache = mem.ds.lightDataset->cache;
|
||||||
if (softAes) {
|
if (softAes) {
|
||||||
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
||||||
initBlock<true>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
|
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
||||||
initBlock<false>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
|
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
memcpy(scratchpad, mem.dataset + ScratchpadSize * index, ScratchpadSize);
|
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,30 +26,12 @@ namespace RandomX {
|
||||||
class VirtualMachine {
|
class VirtualMachine {
|
||||||
public:
|
public:
|
||||||
VirtualMachine(bool softAes);
|
VirtualMachine(bool softAes);
|
||||||
virtual ~VirtualMachine() {}
|
virtual ~VirtualMachine();
|
||||||
virtual void initializeDataset(const void* seed, bool light = false);
|
virtual void setDataset(dataset_t ds, bool light = false);
|
||||||
void initializeScratchpad(uint32_t index);
|
void initializeScratchpad(uint32_t index);
|
||||||
virtual void initializeProgram(const void* seed) = 0;
|
virtual void initializeProgram(const void* seed) = 0;
|
||||||
virtual void execute() = 0;
|
virtual void execute() = 0;
|
||||||
void getResult(void*);
|
void getResult(void*);
|
||||||
const RegisterFile& getRegisterFile() const {
|
|
||||||
return reg;
|
|
||||||
}
|
|
||||||
const convertible_t* getScratchpad() const {
|
|
||||||
return scratchpad;
|
|
||||||
}
|
|
||||||
const void* getCache() {
|
|
||||||
if (lightClient) {
|
|
||||||
return mem.lcm->cache;
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
const __m128i* getKeys() {
|
|
||||||
if (lightClient) {
|
|
||||||
return mem.lcm->keys;
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
protected:
|
protected:
|
||||||
bool softAes, lightClient;
|
bool softAes, lightClient;
|
||||||
RegisterFile reg;
|
RegisterFile reg;
|
||||||
|
|
|
@ -473,7 +473,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type)
|
||||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
int argon_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||||
int result = ARGON2_OK;
|
int result = ARGON2_OK;
|
||||||
|
|
||||||
|
|
|
@ -204,7 +204,7 @@ void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
||||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||||
* will be modified if successful.
|
* will be modified if successful.
|
||||||
*/
|
*/
|
||||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
int argon_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||||
|
|
|
@ -20,8 +20,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <new>
|
|
||||||
#include "intrinPortable.h"
|
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
|
@ -55,13 +53,13 @@ namespace RandomX {
|
||||||
constexpr bool trace = false;
|
constexpr bool trace = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef union {
|
union convertible_t {
|
||||||
double f64;
|
double f64;
|
||||||
int64_t i64;
|
int64_t i64;
|
||||||
uint64_t u64;
|
uint64_t u64;
|
||||||
int32_t i32;
|
int32_t i32;
|
||||||
uint32_t u32;
|
uint32_t u32;
|
||||||
} convertible_t;
|
};
|
||||||
|
|
||||||
constexpr int ProgramLength = 512;
|
constexpr int ProgramLength = 512;
|
||||||
constexpr int InstructionCount = 1024 * 1024;
|
constexpr int InstructionCount = 1024 * 1024;
|
||||||
|
@ -71,34 +69,27 @@ namespace RandomX {
|
||||||
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
|
||||||
constexpr int RegistersCount = 8;
|
constexpr int RegistersCount = 8;
|
||||||
|
|
||||||
|
class Cache;
|
||||||
|
|
||||||
inline int wrapInstr(int i) {
|
inline int wrapInstr(int i) {
|
||||||
return i % RandomX::ProgramLength;
|
return i % RandomX::ProgramLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct LightClientMemory {
|
struct LightClientDataset {
|
||||||
uint8_t* cache;
|
Cache* cache;
|
||||||
uint8_t* block;
|
uint8_t* block;
|
||||||
uint32_t blockNumber;
|
uint32_t blockNumber;
|
||||||
alignas(16) __m128i keys[10];
|
};
|
||||||
|
|
||||||
void* operator new(size_t size) {
|
union dataset_t {
|
||||||
void* ptr = _mm_malloc(size, sizeof(__m128i));
|
uint8_t* dataset;
|
||||||
if (ptr == nullptr)
|
Cache* cache;
|
||||||
throw std::bad_alloc();
|
LightClientDataset* lightDataset;
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void operator delete(void* ptr) {
|
|
||||||
_mm_free(ptr);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MemoryRegisters {
|
struct MemoryRegisters {
|
||||||
addr_t ma, mx;
|
addr_t ma, mx;
|
||||||
union {
|
dataset_t ds;
|
||||||
uint8_t* dataset;
|
|
||||||
LightClientMemory* lcm;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters");
|
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters");
|
||||||
|
|
216
src/dataset.cpp
216
src/dataset.cpp
|
@ -19,135 +19,25 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
// Parts of this file are originally copyright (c) xmr-stak
|
// Parts of this file are originally copyright (c) xmr-stak
|
||||||
|
|
||||||
#include "common.hpp"
|
|
||||||
#include "dataset.hpp"
|
|
||||||
#include "Pcg32.hpp"
|
|
||||||
#include "argon2_core.h"
|
|
||||||
#include <new>
|
#include <new>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#include "common.hpp"
|
||||||
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
|
#include "dataset.hpp"
|
||||||
#define __SSE2__ 1
|
#include "Pcg32.hpp"
|
||||||
#endif
|
#include "Cache.hpp"
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
#if defined(__SSE2__)
|
||||||
#include <wmmintrin.h>
|
#include <wmmintrin.h>
|
||||||
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).dataset + (memory).ma), _MM_HINT_T0)
|
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0)
|
||||||
#else
|
#else
|
||||||
#define PREFETCH(memory)
|
#define PREFETCH(memory)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
void initializeCache(const void* input, size_t inputLength, void* memory) {
|
|
||||||
uint32_t memory_blocks, segment_length;
|
|
||||||
argon2_instance_t instance;
|
|
||||||
argon2_context context;
|
|
||||||
|
|
||||||
context.out = nullptr;
|
|
||||||
context.outlen = 0;
|
|
||||||
context.pwd = CONST_CAST(uint8_t *)input;
|
|
||||||
context.pwdlen = (uint32_t)inputLength;
|
|
||||||
context.salt = CONST_CAST(uint8_t *)ArgonSalt;
|
|
||||||
context.saltlen = (uint32_t)ArgonSaltSize;
|
|
||||||
context.secret = NULL;
|
|
||||||
context.secretlen = 0;
|
|
||||||
context.ad = NULL;
|
|
||||||
context.adlen = 0;
|
|
||||||
context.t_cost = ArgonIterations;
|
|
||||||
context.m_cost = ArgonMemorySize;
|
|
||||||
context.lanes = ArgonLanes;
|
|
||||||
context.threads = 1;
|
|
||||||
context.allocate_cbk = NULL;
|
|
||||||
context.free_cbk = NULL;
|
|
||||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
|
||||||
context.version = ARGON2_VERSION_NUMBER;
|
|
||||||
|
|
||||||
/* 2. Align memory size */
|
|
||||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
|
||||||
memory_blocks = context.m_cost;
|
|
||||||
|
|
||||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
|
||||||
|
|
||||||
instance.version = context.version;
|
|
||||||
instance.memory = NULL;
|
|
||||||
instance.passes = context.t_cost;
|
|
||||||
instance.memory_blocks = memory_blocks;
|
|
||||||
instance.segment_length = segment_length;
|
|
||||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
|
||||||
instance.lanes = context.lanes;
|
|
||||||
instance.threads = context.threads;
|
|
||||||
instance.type = Argon2_d;
|
|
||||||
instance.memory = (block*)memory;
|
|
||||||
|
|
||||||
if (instance.threads > instance.lanes) {
|
|
||||||
instance.threads = instance.lanes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
|
||||||
* blocks
|
|
||||||
*/
|
|
||||||
initialize(&instance, &context);
|
|
||||||
|
|
||||||
fill_memory_blocks(&instance);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
|
||||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
|
||||||
static inline __m128i sl_xor(__m128i tmp1) {
|
|
||||||
__m128i tmp4;
|
|
||||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
|
||||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
|
||||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
|
||||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
|
||||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
|
||||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
|
||||||
return tmp1;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<uint8_t rcon, bool soft>
|
|
||||||
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) {
|
|
||||||
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
|
|
||||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
|
|
||||||
*xout0 = sl_xor(*xout0);
|
|
||||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
|
||||||
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
|
|
||||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
|
|
||||||
*xout2 = sl_xor(*xout2);
|
|
||||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
void expandAesKeys(const __m128i* seed, __m128i* keys) {
|
|
||||||
__m128i xout0, xout2;
|
|
||||||
xout0 = _mm_load_si128(seed);
|
|
||||||
xout2 = _mm_load_si128(seed + 1);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aes_genkey_sub<0x01, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aes_genkey_sub<0x02, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aes_genkey_sub<0x04, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
aes_genkey_sub<0x08, soft>(&xout0, &xout2);
|
|
||||||
*keys++ = xout0;
|
|
||||||
*keys++ = xout2;
|
|
||||||
}
|
|
||||||
|
|
||||||
template
|
|
||||||
void expandAesKeys<true>(const __m128i* seed, __m128i* keys);
|
|
||||||
|
|
||||||
template
|
|
||||||
void expandAesKeys<false>(const __m128i* seed, __m128i* keys);
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) {
|
static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) {
|
||||||
auto count = bytes / sizeof(T);
|
auto count = bytes / sizeof(T);
|
||||||
|
@ -157,8 +47,18 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<bool soft>
|
||||||
|
static inline __m128i aesenc(__m128i in, __m128i key) {
|
||||||
|
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<bool soft>
|
||||||
|
static inline __m128i aesdec(__m128i in, __m128i key) {
|
||||||
|
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
||||||
|
}
|
||||||
|
|
||||||
template<bool soft, bool enc>
|
template<bool soft, bool enc>
|
||||||
void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]) {
|
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||||
__m128i xin, xout;
|
__m128i xin, xout;
|
||||||
//Initialization vector = block number extended to 128 bits
|
//Initialization vector = block number extended to 128 bits
|
||||||
xout = _mm_cvtsi32_si128(blockNumber);
|
xout = _mm_cvtsi32_si128(blockNumber);
|
||||||
|
@ -200,20 +100,20 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
template
|
template
|
||||||
void initBlock<true, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
void initBlock<true, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||||
|
|
||||||
template
|
template
|
||||||
void initBlock<true, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
void initBlock<true, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||||
|
|
||||||
template
|
template
|
||||||
void initBlock<false, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
void initBlock<false, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||||
|
|
||||||
template
|
template
|
||||||
void initBlock<false, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
void initBlock<false, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||||
|
|
||||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
|
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
|
||||||
convertible_t data;
|
convertible_t data;
|
||||||
data.u64 = *(uint64_t*)(memory.dataset + memory.ma);
|
data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma);
|
||||||
memory.ma += 8;
|
memory.ma += 8;
|
||||||
memory.mx ^= addr;
|
memory.mx ^= addr;
|
||||||
if ((memory.mx & 0xFFF8) == 0) {
|
if ((memory.mx & 0xFFF8) == 0) {
|
||||||
|
@ -224,24 +124,25 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i k[10]) {
|
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) {
|
||||||
if (blockNumber % 2 == 1) {
|
if (blockNumber % 2 == 1) {
|
||||||
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, k);
|
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, k);
|
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
|
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
|
||||||
convertible_t data;
|
convertible_t data;
|
||||||
|
LightClientDataset* lds = memory.ds.lightDataset;
|
||||||
auto blockNumber = memory.ma / DatasetBlockSize;
|
auto blockNumber = memory.ma / DatasetBlockSize;
|
||||||
if (memory.lcm->blockNumber != blockNumber) {
|
if (lds->blockNumber != blockNumber) {
|
||||||
initBlock<softAes>(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
|
initBlock<softAes>(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys());
|
||||||
memory.lcm->blockNumber = blockNumber;
|
lds->blockNumber = blockNumber;
|
||||||
}
|
}
|
||||||
data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize));
|
data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize));
|
||||||
memory.ma += 8;
|
memory.ma += 8;
|
||||||
memory.mx ^= addr;
|
memory.mx ^= addr;
|
||||||
if ((memory.mx & 0xFFF8) == 0) {
|
if ((memory.mx & 0xFFF8) == 0) {
|
||||||
|
@ -256,54 +157,37 @@ namespace RandomX {
|
||||||
template
|
template
|
||||||
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
||||||
|
|
||||||
template<bool softAes>
|
void datasetAlloc(dataset_t& ds) {
|
||||||
void datasetInit(const void* seed, uint8_t*& dataset) {
|
|
||||||
if (sizeof(size_t) <= 4)
|
if (sizeof(size_t) <= 4)
|
||||||
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
||||||
dataset = (uint8_t*)_mm_malloc(DatasetSize, sizeof(__m128i));
|
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64);
|
||||||
if (dataset == nullptr) {
|
if (ds.dataset == nullptr) {
|
||||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed.");
|
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
|
||||||
}
|
}
|
||||||
uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
|
|
||||||
if (cache == nullptr) {
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
}
|
||||||
initializeCache(seed, SeedSize, cache);
|
|
||||||
memcpy(cache + CacheSize, cache, CacheShift);
|
|
||||||
alignas(16) __m128i keys[10];
|
|
||||||
expandAesKeys<softAes>((const __m128i*)seed, keys);
|
|
||||||
for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
|
|
||||||
initBlock<softAes>(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys);
|
|
||||||
}
|
|
||||||
_mm_free(cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
template
|
|
||||||
void datasetInit<false>(const void*, uint8_t*&);
|
|
||||||
|
|
||||||
template
|
|
||||||
void datasetInit<true>(const void*, uint8_t*&);
|
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void datasetInitLight(const void* seed, LightClientMemory*& lcm) {
|
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
||||||
lcm = new LightClientMemory();
|
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
||||||
lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
|
initBlock<softAes>(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys());
|
||||||
if (lcm->cache == nullptr) {
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
}
|
||||||
initializeCache(seed, SeedSize, lcm->cache);
|
|
||||||
memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift);
|
|
||||||
expandAesKeys<softAes>((__m128i*)seed, lcm->keys);
|
|
||||||
lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
|
||||||
if (lcm->block == nullptr) {
|
|
||||||
throw std::bad_alloc();
|
|
||||||
}
|
|
||||||
lcm->blockNumber = -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template
|
template
|
||||||
void datasetInitLight<false>(const void*, LightClientMemory*&);
|
void datasetInit<false>(Cache*, dataset_t, uint32_t, uint32_t);
|
||||||
|
|
||||||
template
|
template
|
||||||
void datasetInitLight<true>(const void*, LightClientMemory*&);
|
void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t);
|
||||||
|
|
||||||
|
template<bool softAes>
|
||||||
|
void datasetInitCache(const void* seed, dataset_t& ds) {
|
||||||
|
ds.cache = new Cache();
|
||||||
|
ds.cache->initialize<softAes>(seed, SeedSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template
|
||||||
|
void datasetInitCache<false>(const void*, dataset_t&);
|
||||||
|
|
||||||
|
template
|
||||||
|
void datasetInitCache<true>(const void*, dataset_t&);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,43 +20,30 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <array>
|
||||||
#include "intrinPortable.h"
|
#include "intrinPortable.h"
|
||||||
#include "argon2.h"
|
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "softAes.h"
|
#include "softAes.h"
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
using KeysContainer = std::array<__m128i, 10>;
|
||||||
|
|
||||||
void initializeCache(const void* input, size_t inputLength, void* memory);
|
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
void expandAesKeys(const __m128i* seed, __m128i* keys);
|
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
inline __m128i aesenc(__m128i in, __m128i key) {
|
|
||||||
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool soft>
|
|
||||||
inline __m128i aesdec(__m128i in, __m128i key) {
|
|
||||||
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<bool soft, bool enc>
|
template<bool soft, bool enc>
|
||||||
void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i keys[10]);
|
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
||||||
|
|
||||||
|
void datasetAlloc(dataset_t& ds);
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void datasetInit(const void* seed, uint8_t*& dataset);
|
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
|
||||||
|
|
||||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
|
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
void datasetInitLight(const void* seed, LightClientMemory*& lcm);
|
void datasetInitCache(const void* seed, dataset_t& dataset);
|
||||||
|
|
||||||
template<bool softAes>
|
template<bool softAes>
|
||||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
|
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
|
||||||
|
|
173
src/main.cpp
173
src/main.cpp
|
@ -30,6 +30,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#include "Program.hpp"
|
#include "Program.hpp"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "instructions.hpp"
|
#include "instructions.hpp"
|
||||||
|
#include <thread>
|
||||||
|
#include <atomic>
|
||||||
|
#include "dataset.hpp"
|
||||||
|
#include "Cache.hpp"
|
||||||
|
|
||||||
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
||||||
|
|
||||||
|
@ -45,7 +49,6 @@ void outputHex(std::ostream& os, const char* data, int length) {
|
||||||
os << hexmap[(data[i] & 0xF0) >> 4];
|
os << hexmap[(data[i] & 0xF0) >> 4];
|
||||||
os << hexmap[data[i] & 0x0F];
|
os << hexmap[data[i] & 0x0F];
|
||||||
}
|
}
|
||||||
os << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void readOption(const char* option, int argc, char** argv, bool& out) {
|
void readOption(const char* option, int argc, char** argv, bool& out) {
|
||||||
|
@ -58,6 +61,15 @@ void readOption(const char* option, int argc, char** argv, bool& out) {
|
||||||
out = false;
|
out = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) {
|
||||||
|
for (int i = 0; i < argc - 1; ++i) {
|
||||||
|
if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out = defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
void readInt(int argc, char** argv, int& out, int defaultValue) {
|
void readInt(int argc, char** argv, int& out, int defaultValue) {
|
||||||
for (int i = 0; i < argc; ++i) {
|
for (int i = 0; i < argc; ++i) {
|
||||||
if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) {
|
if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) {
|
||||||
|
@ -75,81 +87,144 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
class AtomicHash {
|
||||||
bool softAes, lightClient, genAsm, compiled;
|
public:
|
||||||
int programCount;
|
AtomicHash() {
|
||||||
readOption("--softAes", argc, argv, softAes);
|
for (int i = 0; i < 4; ++i)
|
||||||
readOption("--lightClient", argc, argv, lightClient);
|
hash[i].store(0);
|
||||||
readOption("--genAsm", argc, argv, genAsm);
|
}
|
||||||
readOption("--compiled", argc, argv, compiled);
|
void xorWith(uint64_t update[4]) {
|
||||||
readInt(argc, argv, programCount, 1000);
|
for (int i = 0; i < 4; ++i)
|
||||||
|
hash[i].fetch_xor(update[i]);
|
||||||
|
}
|
||||||
|
void print(std::ostream& os) {
|
||||||
|
for (int i = 0; i < 4; ++i)
|
||||||
|
print(hash[i], os);
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
void print(std::atomic<uint64_t>& hash, std::ostream& os) {
|
||||||
|
auto h = hash.load();
|
||||||
|
outputHex(std::cout, (char*)&h, sizeof(h));
|
||||||
|
}
|
||||||
|
std::atomic<uint64_t> hash[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
|
||||||
|
uint64_t hash[4];
|
||||||
unsigned char blockTemplate[] = {
|
unsigned char blockTemplate[] = {
|
||||||
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||||
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
||||||
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
|
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
|
||||||
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
||||||
};
|
};
|
||||||
int* nonce = (int*)(blockTemplate + 39);
|
int* noncePtr = (int*)(blockTemplate + 39);
|
||||||
uint8_t hash[RandomX::ResultSize];
|
int nonce = atomicNonce.fetch_add(1);
|
||||||
|
|
||||||
if (genAsm) {
|
while (nonce < noncesCount) {
|
||||||
*nonce = programCount;
|
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
||||||
|
*noncePtr = nonce;
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
RandomX::AssemblyGeneratorX86 asmX86;
|
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8);
|
||||||
asmX86.generateProgram(hash);
|
vm->initializeScratchpad(spIndex);
|
||||||
asmX86.printCode(std::cout);
|
vm->initializeProgram(hash);
|
||||||
return 0;
|
vm->execute();
|
||||||
|
vm->getResult(hash);
|
||||||
|
result.xorWith(hash);
|
||||||
|
if (RandomX::trace) {
|
||||||
|
std::cout << "Nonce: " << nonce << " ";
|
||||||
|
outputHex(std::cout, (char*)hash, sizeof(hash));
|
||||||
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
nonce = atomicNonce.fetch_add(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
bool softAes, lightClient, genAsm, compiled;
|
||||||
|
int programCount, threadCount;
|
||||||
|
readOption("--softAes", argc, argv, softAes);
|
||||||
|
readOption("--lightClient", argc, argv, lightClient);
|
||||||
|
readOption("--genAsm", argc, argv, genAsm);
|
||||||
|
readOption("--compiled", argc, argv, compiled);
|
||||||
|
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||||
|
readIntOption("--nonces", argc, argv, programCount, 1000);
|
||||||
|
|
||||||
|
std::atomic<int> atomicNonce(0);
|
||||||
|
AtomicHash result;
|
||||||
|
std::vector<RandomX::VirtualMachine*> vms;
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
RandomX::dataset_t dataset;
|
||||||
|
|
||||||
if (softAes)
|
if (softAes)
|
||||||
std::cout << "Using software AES." << std::endl;
|
std::cout << "Using software AES." << std::endl;
|
||||||
|
std::cout << "Initializing..." << std::endl;
|
||||||
char cumulative[RandomX::ResultSize] = { 0 };
|
|
||||||
|
|
||||||
RandomX::VirtualMachine* vm;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
Stopwatch sw(true);
|
||||||
|
if (softAes) {
|
||||||
|
RandomX::datasetInitCache<true>(seed, dataset);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RandomX::datasetInitCache<false>(seed, dataset);
|
||||||
|
}
|
||||||
|
if (RandomX::trace) {
|
||||||
|
std::cout << "Keys: " << std::endl;
|
||||||
|
for (int i = 0; i < dataset.cache->getKeys().size(); ++i) {
|
||||||
|
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
std::cout << "Cache: " << std::endl;
|
||||||
|
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
if (lightClient) {
|
||||||
|
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RandomX::Cache* cache = dataset.cache;
|
||||||
|
RandomX::datasetAlloc(dataset);
|
||||||
|
auto perThread = RandomX::DatasetBlockCount / threadCount;
|
||||||
|
auto remainder = RandomX::DatasetBlockCount % threadCount;
|
||||||
|
for (int i = 0; i < threadCount; ++i) {
|
||||||
|
auto count = perThread + (i == threadCount - 1 ? remainder : 0);
|
||||||
|
if (softAes) {
|
||||||
|
threads.push_back(std::thread(&RandomX::datasetInit<true>, cache, dataset, i * perThread, count));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
threads.push_back(std::thread(&RandomX::datasetInit<false>, cache, dataset, i * perThread, count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < threads.size(); ++i) {
|
||||||
|
threads[i].join();
|
||||||
|
}
|
||||||
|
delete cache;
|
||||||
|
threads.clear();
|
||||||
|
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << "Initializing " << threadCount << " virtual machine(s)..." << std::endl;
|
||||||
|
for (int i = 0; i < threadCount; ++i) {
|
||||||
|
RandomX::VirtualMachine* vm;
|
||||||
if (compiled) {
|
if (compiled) {
|
||||||
vm = new RandomX::CompiledVirtualMachine(softAes);
|
vm = new RandomX::CompiledVirtualMachine(softAes);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
||||||
}
|
}
|
||||||
std::cout << "Initializing..." << std::endl;
|
vm->setDataset(dataset, lightClient);
|
||||||
Stopwatch sw(true);
|
vms.push_back(vm);
|
||||||
vm->initializeDataset(seed, lightClient);
|
}
|
||||||
if(lightClient)
|
|
||||||
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
|
||||||
else
|
|
||||||
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
|
||||||
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
||||||
sw.restart();
|
sw.restart();
|
||||||
for (int i = 0; i < programCount; ++i) {
|
for (int i = 0; i < vms.size(); ++i) {
|
||||||
*nonce = i;
|
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
|
||||||
if (RandomX::trace) std::cout << "Nonce: " << i << " ";
|
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
|
||||||
int spIndex = hash[24] | ((hash[25] & 63) << 8);
|
|
||||||
vm->initializeScratchpad(spIndex);
|
|
||||||
//dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, "scratchpad-before.txt");
|
|
||||||
//return 0;
|
|
||||||
vm->initializeProgram(hash);
|
|
||||||
vm->execute();
|
|
||||||
/*std::string fileName("scratchpad-after-");
|
|
||||||
fileName = fileName + std::to_string(i) + ".txt";
|
|
||||||
dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, fileName.c_str());*/
|
|
||||||
vm->getResult(hash);
|
|
||||||
if (RandomX::trace) {
|
|
||||||
outputHex(std::cout, (char*)hash, sizeof(hash));
|
|
||||||
}
|
}
|
||||||
((uint64_t*)cumulative)[0] ^= ((uint64_t*)hash)[0];
|
for (int i = 0; i < threads.size(); ++i) {
|
||||||
((uint64_t*)cumulative)[1] ^= ((uint64_t*)hash)[1];
|
threads[i].join();
|
||||||
((uint64_t*)cumulative)[2] ^= ((uint64_t*)hash)[2];
|
|
||||||
((uint64_t*)cumulative)[3] ^= ((uint64_t*)hash)[3];
|
|
||||||
}
|
}
|
||||||
double elapsed = sw.getElapsed();
|
double elapsed = sw.getElapsed();
|
||||||
std::cout << "Calculated result: ";
|
std::cout << "Calculated result: ";
|
||||||
outputHex(std::cout, cumulative, sizeof(cumulative));
|
result.print(std::cout);
|
||||||
if(programCount == 1000)
|
if(programCount == 1000)
|
||||||
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
|
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
|
||||||
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
||||||
|
|
Loading…
Reference in a new issue