mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Support for multiple threads
This commit is contained in:
parent
cb12feaf91
commit
b9d2d853aa
13 changed files with 436 additions and 304 deletions
10
makefile
10
makefile
|
@ -9,10 +9,9 @@ endif
|
|||
BINDIR=bin
|
||||
SRCDIR=src
|
||||
OBJDIR=obj
|
||||
LDFLAGS=
|
||||
LDFLAGS=-lpthread
|
||||
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
|
||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o)
|
||||
SRC1=$(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp)
|
||||
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o)
|
||||
|
||||
all: release test
|
||||
|
||||
|
@ -52,7 +51,7 @@ $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-imp
|
|||
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
|
||||
|
||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
|
||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
||||
|
||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR)
|
||||
|
@ -73,6 +72,9 @@ $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp
|
|||
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
|
||||
|
||||
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
|
||||
|
||||
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
|
||||
|
||||
|
|
147
src/Cache.cpp
Normal file
147
src/Cache.cpp
Normal file
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include "Cache.hpp"
|
||||
#include "softAes.h"
|
||||
#include "argon2.h"
|
||||
#include "Pcg32.hpp"
|
||||
#include "argon2_core.h"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1) {
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
template<uint8_t rcon, bool soft>
|
||||
static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
|
||||
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
|
||||
__m128i xout0, xout2;
|
||||
xout0 = _mm_load_si128(seed);
|
||||
xout2 = _mm_load_si128(seed + 1);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x01, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x02, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x04, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aesGenKeys<0x08, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
}
|
||||
|
||||
void Cache::argonFill(const void* seed, size_t seedSize) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
argon2_context context;
|
||||
|
||||
context.out = nullptr;
|
||||
context.outlen = 0;
|
||||
context.pwd = CONST_CAST(uint8_t *)seed;
|
||||
context.pwdlen = (uint32_t)seedSize;
|
||||
context.salt = CONST_CAST(uint8_t *)ArgonSalt;
|
||||
context.saltlen = (uint32_t)ArgonSaltSize;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = ArgonIterations;
|
||||
context.m_cost = ArgonMemorySize;
|
||||
context.lanes = ArgonLanes;
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context.m_cost;
|
||||
|
||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context.version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context.t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context.lanes;
|
||||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)memory;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
argon_initialize(&instance, &context);
|
||||
|
||||
fill_memory_blocks(&instance);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void Cache::initialize(const void* seed, size_t seedSize) {
|
||||
//Argon2d memory fill
|
||||
argonFill(seed, seedSize);
|
||||
|
||||
//Circular shift of the cache buffer by 512 bytes
|
||||
//realized by copying the first 512 bytes to the back
|
||||
//of the buffer and shifting the start by 512 bytes
|
||||
memcpy(memory + CacheSize, memory, CacheShift);
|
||||
|
||||
//AES keys
|
||||
expandAesKeys<softAes>((__m128i*)seed, keys.data());
|
||||
}
|
||||
|
||||
template void Cache::initialize<true>(const void*, size_t);
|
||||
|
||||
template void Cache::initialize<false>(const void*, size_t);
|
||||
}
|
57
src/Cache.hpp
Normal file
57
src/Cache.hpp
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
Copyright (c) 2018 tevador
|
||||
|
||||
This file is part of RandomX.
|
||||
|
||||
RandomX is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
RandomX is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
class Cache {
|
||||
public:
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = _mm_malloc(size, sizeof(__m128i));
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
void initialize(const void* seed, size_t seedSize);
|
||||
|
||||
const KeysContainer& getKeys() const {
|
||||
return keys;
|
||||
}
|
||||
|
||||
const uint8_t* getCache() {
|
||||
return memory + CacheShift;
|
||||
}
|
||||
private:
|
||||
alignas(16) KeysContainer keys;
|
||||
uint8_t memory[CacheSize + CacheShift];
|
||||
void argonFill(const void* seed, size_t seedSize);
|
||||
};
|
||||
}
|
|
@ -31,11 +31,11 @@ namespace RandomX {
|
|||
#endif
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::initializeDataset(const void* seed, bool lightClient) {
|
||||
void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
|
||||
if (lightClient) {
|
||||
throw std::runtime_error("Compiled VM does not support light-client mode");
|
||||
}
|
||||
VirtualMachine::initializeDataset(seed, lightClient);
|
||||
VirtualMachine::setDataset(ds, lightClient);
|
||||
}
|
||||
|
||||
void CompiledVirtualMachine::initializeProgram(const void* seed) {
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace RandomX {
|
|||
class CompiledVirtualMachine : public VirtualMachine {
|
||||
public:
|
||||
CompiledVirtualMachine(bool softAes);
|
||||
void initializeDataset(const void* seed, bool light = false) override;
|
||||
void setDataset(dataset_t ds, bool light = false) override;
|
||||
void initializeProgram(const void* seed) override;
|
||||
virtual void execute() override;
|
||||
void* getProgram() {
|
||||
|
|
|
@ -20,58 +20,65 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "VirtualMachine.hpp"
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
#include "t1ha/t1ha.h"
|
||||
#include "blake2/blake2.h"
|
||||
#include <cstring>
|
||||
|
||||
namespace RandomX {
|
||||
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
|
||||
mem.dataset = nullptr;
|
||||
mem.ds.dataset = nullptr;
|
||||
}
|
||||
|
||||
void VirtualMachine::initializeDataset(const void* seed, bool light) {
|
||||
VirtualMachine::~VirtualMachine() {
|
||||
if (lightClient) {
|
||||
_mm_free(mem.lcm->cache);
|
||||
_mm_free(mem.lcm->block);
|
||||
delete mem.ds.lightDataset->block;
|
||||
delete mem.ds.lightDataset;
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualMachine::setDataset(dataset_t ds, bool light) {
|
||||
if (mem.ds.dataset != nullptr) {
|
||||
throw std::runtime_error("Dataset is already initialized");
|
||||
}
|
||||
_mm_free(mem.dataset);
|
||||
lightClient = light;
|
||||
if (light) {
|
||||
auto lds = mem.ds.lightDataset = new LightClientDataset();
|
||||
lds->cache = ds.cache;
|
||||
lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
||||
lds->blockNumber = -1;
|
||||
if (lds->block == nullptr) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
if (softAes) {
|
||||
datasetInitLight<true>(seed, mem.lcm);
|
||||
readDataset = &datasetReadLight<true>;
|
||||
}
|
||||
else {
|
||||
datasetInitLight<false>(seed, mem.lcm);
|
||||
readDataset = &datasetReadLight<false>;
|
||||
}
|
||||
}
|
||||
else {
|
||||
mem.ds = ds;
|
||||
readDataset = &datasetRead;
|
||||
if (softAes) {
|
||||
datasetInit<true>(seed, mem.dataset);
|
||||
}
|
||||
else {
|
||||
datasetInit<false>(seed, mem.dataset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VirtualMachine::initializeScratchpad(uint32_t index) {
|
||||
if (lightClient) {
|
||||
auto cache = mem.ds.lightDataset->cache;
|
||||
if (softAes) {
|
||||
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
||||
initBlock<true>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
|
||||
initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
|
||||
initBlock<false>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
|
||||
initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
memcpy(scratchpad, mem.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||
memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,30 +26,12 @@ namespace RandomX {
|
|||
class VirtualMachine {
|
||||
public:
|
||||
VirtualMachine(bool softAes);
|
||||
virtual ~VirtualMachine() {}
|
||||
virtual void initializeDataset(const void* seed, bool light = false);
|
||||
virtual ~VirtualMachine();
|
||||
virtual void setDataset(dataset_t ds, bool light = false);
|
||||
void initializeScratchpad(uint32_t index);
|
||||
virtual void initializeProgram(const void* seed) = 0;
|
||||
virtual void execute() = 0;
|
||||
void getResult(void*);
|
||||
const RegisterFile& getRegisterFile() const {
|
||||
return reg;
|
||||
}
|
||||
const convertible_t* getScratchpad() const {
|
||||
return scratchpad;
|
||||
}
|
||||
const void* getCache() {
|
||||
if (lightClient) {
|
||||
return mem.lcm->cache;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
const __m128i* getKeys() {
|
||||
if (lightClient) {
|
||||
return mem.lcm->keys;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
protected:
|
||||
bool softAes, lightClient;
|
||||
RegisterFile reg;
|
||||
|
|
|
@ -473,7 +473,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type)
|
|||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
}
|
||||
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
int argon_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
|
||||
int result = ARGON2_OK;
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
|
|||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
int argon_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* XORing the last block of each lane, hashing it, making the tag. Deallocates
|
||||
|
|
|
@ -20,8 +20,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
#include "intrinPortable.h"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
|
@ -55,13 +53,13 @@ namespace RandomX {
|
|||
constexpr bool trace = false;
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
union convertible_t {
|
||||
double f64;
|
||||
int64_t i64;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
uint32_t u32;
|
||||
} convertible_t;
|
||||
};
|
||||
|
||||
constexpr int ProgramLength = 512;
|
||||
constexpr int InstructionCount = 1024 * 1024;
|
||||
|
@ -71,34 +69,27 @@ namespace RandomX {
|
|||
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
|
||||
constexpr int RegistersCount = 8;
|
||||
|
||||
class Cache;
|
||||
|
||||
inline int wrapInstr(int i) {
|
||||
return i % RandomX::ProgramLength;
|
||||
}
|
||||
|
||||
struct LightClientMemory {
|
||||
uint8_t* cache;
|
||||
struct LightClientDataset {
|
||||
Cache* cache;
|
||||
uint8_t* block;
|
||||
uint32_t blockNumber;
|
||||
alignas(16) __m128i keys[10];
|
||||
};
|
||||
|
||||
void* operator new(size_t size) {
|
||||
void* ptr = _mm_malloc(size, sizeof(__m128i));
|
||||
if (ptr == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void operator delete(void* ptr) {
|
||||
_mm_free(ptr);
|
||||
}
|
||||
union dataset_t {
|
||||
uint8_t* dataset;
|
||||
Cache* cache;
|
||||
LightClientDataset* lightDataset;
|
||||
};
|
||||
|
||||
struct MemoryRegisters {
|
||||
addr_t ma, mx;
|
||||
union {
|
||||
uint8_t* dataset;
|
||||
LightClientMemory* lcm;
|
||||
};
|
||||
dataset_t ds;
|
||||
};
|
||||
|
||||
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters");
|
||||
|
|
216
src/dataset.cpp
216
src/dataset.cpp
|
@ -19,135 +19,25 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
|
||||
// Parts of this file are originally copyright (c) xmr-stak
|
||||
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Pcg32.hpp"
|
||||
#include "argon2_core.h"
|
||||
#include <new>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
|
||||
#define __SSE2__ 1
|
||||
#endif
|
||||
#endif
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "Pcg32.hpp"
|
||||
#include "Cache.hpp"
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <wmmintrin.h>
|
||||
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).dataset + (memory).ma), _MM_HINT_T0)
|
||||
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0)
|
||||
#else
|
||||
#define PREFETCH(memory)
|
||||
#endif
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
void initializeCache(const void* input, size_t inputLength, void* memory) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
argon2_context context;
|
||||
|
||||
context.out = nullptr;
|
||||
context.outlen = 0;
|
||||
context.pwd = CONST_CAST(uint8_t *)input;
|
||||
context.pwdlen = (uint32_t)inputLength;
|
||||
context.salt = CONST_CAST(uint8_t *)ArgonSalt;
|
||||
context.saltlen = (uint32_t)ArgonSaltSize;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = ArgonIterations;
|
||||
context.m_cost = ArgonMemorySize;
|
||||
context.lanes = ArgonLanes;
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context.m_cost;
|
||||
|
||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
|
||||
|
||||
instance.version = context.version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context.t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context.lanes;
|
||||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)memory;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
initialize(&instance, &context);
|
||||
|
||||
fill_memory_blocks(&instance);
|
||||
}
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static inline __m128i sl_xor(__m128i tmp1) {
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
template<uint8_t rcon, bool soft>
|
||||
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) {
|
||||
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
void expandAesKeys(const __m128i* seed, __m128i* keys) {
|
||||
__m128i xout0, xout2;
|
||||
xout0 = _mm_load_si128(seed);
|
||||
xout2 = _mm_load_si128(seed + 1);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aes_genkey_sub<0x01, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aes_genkey_sub<0x02, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aes_genkey_sub<0x04, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
aes_genkey_sub<0x08, soft>(&xout0, &xout2);
|
||||
*keys++ = xout0;
|
||||
*keys++ = xout2;
|
||||
}
|
||||
|
||||
template
|
||||
void expandAesKeys<true>(const __m128i* seed, __m128i* keys);
|
||||
|
||||
template
|
||||
void expandAesKeys<false>(const __m128i* seed, __m128i* keys);
|
||||
|
||||
template<typename T>
|
||||
static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) {
|
||||
auto count = bytes / sizeof(T);
|
||||
|
@ -157,8 +47,18 @@ namespace RandomX {
|
|||
}
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
static inline __m128i aesenc(__m128i in, __m128i key) {
|
||||
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
static inline __m128i aesdec(__m128i in, __m128i key) {
|
||||
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
||||
}
|
||||
|
||||
template<bool soft, bool enc>
|
||||
void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]) {
|
||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||
__m128i xin, xout;
|
||||
//Initialization vector = block number extended to 128 bits
|
||||
xout = _mm_cvtsi32_si128(blockNumber);
|
||||
|
@ -200,20 +100,20 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
template
|
||||
void initBlock<true, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
||||
void initBlock<true, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||
|
||||
template
|
||||
void initBlock<true, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
||||
void initBlock<true, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||
|
||||
template
|
||||
void initBlock<false, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
||||
void initBlock<false, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||
|
||||
template
|
||||
void initBlock<false, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
||||
void initBlock<false, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
|
||||
|
||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
|
||||
convertible_t data;
|
||||
data.u64 = *(uint64_t*)(memory.dataset + memory.ma);
|
||||
data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma);
|
||||
memory.ma += 8;
|
||||
memory.mx ^= addr;
|
||||
if ((memory.mx & 0xFFF8) == 0) {
|
||||
|
@ -224,24 +124,25 @@ namespace RandomX {
|
|||
}
|
||||
|
||||
template<bool softAes>
|
||||
void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i k[10]) {
|
||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) {
|
||||
if (blockNumber % 2 == 1) {
|
||||
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, k);
|
||||
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
|
||||
}
|
||||
else {
|
||||
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, k);
|
||||
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
|
||||
}
|
||||
}
|
||||
|
||||
template<bool softAes>
|
||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
|
||||
convertible_t data;
|
||||
LightClientDataset* lds = memory.ds.lightDataset;
|
||||
auto blockNumber = memory.ma / DatasetBlockSize;
|
||||
if (memory.lcm->blockNumber != blockNumber) {
|
||||
initBlock<softAes>(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
|
||||
memory.lcm->blockNumber = blockNumber;
|
||||
if (lds->blockNumber != blockNumber) {
|
||||
initBlock<softAes>(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys());
|
||||
lds->blockNumber = blockNumber;
|
||||
}
|
||||
data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize));
|
||||
data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize));
|
||||
memory.ma += 8;
|
||||
memory.mx ^= addr;
|
||||
if ((memory.mx & 0xFFF8) == 0) {
|
||||
|
@ -256,54 +157,37 @@ namespace RandomX {
|
|||
template
|
||||
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInit(const void* seed, uint8_t*& dataset) {
|
||||
void datasetAlloc(dataset_t& ds) {
|
||||
if (sizeof(size_t) <= 4)
|
||||
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
|
||||
dataset = (uint8_t*)_mm_malloc(DatasetSize, sizeof(__m128i));
|
||||
if (dataset == nullptr) {
|
||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed.");
|
||||
ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64);
|
||||
if (ds.dataset == nullptr) {
|
||||
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
|
||||
}
|
||||
uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
|
||||
if (cache == nullptr) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
initializeCache(seed, SeedSize, cache);
|
||||
memcpy(cache + CacheSize, cache, CacheShift);
|
||||
alignas(16) __m128i keys[10];
|
||||
expandAesKeys<softAes>((const __m128i*)seed, keys);
|
||||
for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
|
||||
initBlock<softAes>(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys);
|
||||
}
|
||||
_mm_free(cache);
|
||||
}
|
||||
|
||||
template
|
||||
void datasetInit<false>(const void*, uint8_t*&);
|
||||
|
||||
template
|
||||
void datasetInit<true>(const void*, uint8_t*&);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitLight(const void* seed, LightClientMemory*& lcm) {
|
||||
lcm = new LightClientMemory();
|
||||
lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
|
||||
if (lcm->cache == nullptr) {
|
||||
throw std::bad_alloc();
|
||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
|
||||
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
|
||||
initBlock<softAes>(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys());
|
||||
}
|
||||
initializeCache(seed, SeedSize, lcm->cache);
|
||||
memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift);
|
||||
expandAesKeys<softAes>((__m128i*)seed, lcm->keys);
|
||||
lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
|
||||
if (lcm->block == nullptr) {
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
lcm->blockNumber = -1;
|
||||
}
|
||||
|
||||
template
|
||||
void datasetInitLight<false>(const void*, LightClientMemory*&);
|
||||
void datasetInit<false>(Cache*, dataset_t, uint32_t, uint32_t);
|
||||
|
||||
template
|
||||
void datasetInitLight<true>(const void*, LightClientMemory*&);
|
||||
void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitCache(const void* seed, dataset_t& ds) {
|
||||
ds.cache = new Cache();
|
||||
ds.cache->initialize<softAes>(seed, SeedSize);
|
||||
}
|
||||
|
||||
template
|
||||
void datasetInitCache<false>(const void*, dataset_t&);
|
||||
|
||||
template
|
||||
void datasetInitCache<true>(const void*, dataset_t&);
|
||||
}
|
||||
|
|
|
@ -20,43 +20,30 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
#include "intrinPortable.h"
|
||||
#include "argon2.h"
|
||||
#include "common.hpp"
|
||||
#include "softAes.h"
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
|
||||
|
||||
void initializeCache(const void* input, size_t inputLength, void* memory);
|
||||
|
||||
template<bool soft>
|
||||
void expandAesKeys(const __m128i* seed, __m128i* keys);
|
||||
|
||||
template<bool soft>
|
||||
inline __m128i aesenc(__m128i in, __m128i key) {
|
||||
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
|
||||
}
|
||||
|
||||
template<bool soft>
|
||||
inline __m128i aesdec(__m128i in, __m128i key) {
|
||||
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
|
||||
}
|
||||
using KeysContainer = std::array<__m128i, 10>;
|
||||
|
||||
template<bool soft, bool enc>
|
||||
void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
|
||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
|
||||
|
||||
template<bool softAes>
|
||||
void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i keys[10]);
|
||||
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
|
||||
|
||||
void datasetAlloc(dataset_t& ds);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInit(const void* seed, uint8_t*& dataset);
|
||||
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
|
||||
|
||||
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
|
||||
|
||||
template<bool softAes>
|
||||
void datasetInitLight(const void* seed, LightClientMemory*& lcm);
|
||||
void datasetInitCache(const void* seed, dataset_t& dataset);
|
||||
|
||||
template<bool softAes>
|
||||
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
|
||||
|
|
173
src/main.cpp
173
src/main.cpp
|
@ -30,6 +30,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
#include "Program.hpp"
|
||||
#include <string>
|
||||
#include "instructions.hpp"
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include "dataset.hpp"
|
||||
#include "Cache.hpp"
|
||||
|
||||
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
|
||||
|
||||
|
@ -45,7 +49,6 @@ void outputHex(std::ostream& os, const char* data, int length) {
|
|||
os << hexmap[(data[i] & 0xF0) >> 4];
|
||||
os << hexmap[data[i] & 0x0F];
|
||||
}
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void readOption(const char* option, int argc, char** argv, bool& out) {
|
||||
|
@ -58,6 +61,15 @@ void readOption(const char* option, int argc, char** argv, bool& out) {
|
|||
out = false;
|
||||
}
|
||||
|
||||
void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) {
|
||||
for (int i = 0; i < argc - 1; ++i) {
|
||||
if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
out = defaultValue;
|
||||
}
|
||||
|
||||
void readInt(int argc, char** argv, int& out, int defaultValue) {
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) {
|
||||
|
@ -75,81 +87,144 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
|
|||
return os;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, lightClient, genAsm, compiled;
|
||||
int programCount;
|
||||
readOption("--softAes", argc, argv, softAes);
|
||||
readOption("--lightClient", argc, argv, lightClient);
|
||||
readOption("--genAsm", argc, argv, genAsm);
|
||||
readOption("--compiled", argc, argv, compiled);
|
||||
readInt(argc, argv, programCount, 1000);
|
||||
class AtomicHash {
|
||||
public:
|
||||
AtomicHash() {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
hash[i].store(0);
|
||||
}
|
||||
void xorWith(uint64_t update[4]) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
hash[i].fetch_xor(update[i]);
|
||||
}
|
||||
void print(std::ostream& os) {
|
||||
for (int i = 0; i < 4; ++i)
|
||||
print(hash[i], os);
|
||||
os << std::endl;
|
||||
}
|
||||
private:
|
||||
void print(std::atomic<uint64_t>& hash, std::ostream& os) {
|
||||
auto h = hash.load();
|
||||
outputHex(std::cout, (char*)&h, sizeof(h));
|
||||
}
|
||||
std::atomic<uint64_t> hash[4];
|
||||
};
|
||||
|
||||
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
|
||||
uint64_t hash[4];
|
||||
unsigned char blockTemplate[] = {
|
||||
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
|
||||
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
|
||||
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
|
||||
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
|
||||
};
|
||||
int* nonce = (int*)(blockTemplate + 39);
|
||||
uint8_t hash[RandomX::ResultSize];
|
||||
int* noncePtr = (int*)(blockTemplate + 39);
|
||||
int nonce = atomicNonce.fetch_add(1);
|
||||
|
||||
if (genAsm) {
|
||||
*nonce = programCount;
|
||||
while (nonce < noncesCount) {
|
||||
//std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
|
||||
*noncePtr = nonce;
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
RandomX::AssemblyGeneratorX86 asmX86;
|
||||
asmX86.generateProgram(hash);
|
||||
asmX86.printCode(std::cout);
|
||||
return 0;
|
||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8);
|
||||
vm->initializeScratchpad(spIndex);
|
||||
vm->initializeProgram(hash);
|
||||
vm->execute();
|
||||
vm->getResult(hash);
|
||||
result.xorWith(hash);
|
||||
if (RandomX::trace) {
|
||||
std::cout << "Nonce: " << nonce << " ";
|
||||
outputHex(std::cout, (char*)hash, sizeof(hash));
|
||||
std::cout << std::endl;
|
||||
}
|
||||
nonce = atomicNonce.fetch_add(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
bool softAes, lightClient, genAsm, compiled;
|
||||
int programCount, threadCount;
|
||||
readOption("--softAes", argc, argv, softAes);
|
||||
readOption("--lightClient", argc, argv, lightClient);
|
||||
readOption("--genAsm", argc, argv, genAsm);
|
||||
readOption("--compiled", argc, argv, compiled);
|
||||
readIntOption("--threads", argc, argv, threadCount, 1);
|
||||
readIntOption("--nonces", argc, argv, programCount, 1000);
|
||||
|
||||
std::atomic<int> atomicNonce(0);
|
||||
AtomicHash result;
|
||||
std::vector<RandomX::VirtualMachine*> vms;
|
||||
std::vector<std::thread> threads;
|
||||
RandomX::dataset_t dataset;
|
||||
|
||||
if (softAes)
|
||||
std::cout << "Using software AES." << std::endl;
|
||||
|
||||
char cumulative[RandomX::ResultSize] = { 0 };
|
||||
|
||||
RandomX::VirtualMachine* vm;
|
||||
std::cout << "Initializing..." << std::endl;
|
||||
|
||||
try {
|
||||
Stopwatch sw(true);
|
||||
if (softAes) {
|
||||
RandomX::datasetInitCache<true>(seed, dataset);
|
||||
}
|
||||
else {
|
||||
RandomX::datasetInitCache<false>(seed, dataset);
|
||||
}
|
||||
if (RandomX::trace) {
|
||||
std::cout << "Keys: " << std::endl;
|
||||
for (int i = 0; i < dataset.cache->getKeys().size(); ++i) {
|
||||
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "Cache: " << std::endl;
|
||||
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
|
||||
std::cout << std::endl;
|
||||
}
|
||||
if (lightClient) {
|
||||
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
else {
|
||||
RandomX::Cache* cache = dataset.cache;
|
||||
RandomX::datasetAlloc(dataset);
|
||||
auto perThread = RandomX::DatasetBlockCount / threadCount;
|
||||
auto remainder = RandomX::DatasetBlockCount % threadCount;
|
||||
for (int i = 0; i < threadCount; ++i) {
|
||||
auto count = perThread + (i == threadCount - 1 ? remainder : 0);
|
||||
if (softAes) {
|
||||
threads.push_back(std::thread(&RandomX::datasetInit<true>, cache, dataset, i * perThread, count));
|
||||
}
|
||||
else {
|
||||
threads.push_back(std::thread(&RandomX::datasetInit<false>, cache, dataset, i * perThread, count));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
delete cache;
|
||||
threads.clear();
|
||||
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
}
|
||||
std::cout << "Initializing " << threadCount << " virtual machine(s)..." << std::endl;
|
||||
for (int i = 0; i < threadCount; ++i) {
|
||||
RandomX::VirtualMachine* vm;
|
||||
if (compiled) {
|
||||
vm = new RandomX::CompiledVirtualMachine(softAes);
|
||||
}
|
||||
else {
|
||||
vm = new RandomX::InterpretedVirtualMachine(softAes);
|
||||
}
|
||||
std::cout << "Initializing..." << std::endl;
|
||||
Stopwatch sw(true);
|
||||
vm->initializeDataset(seed, lightClient);
|
||||
if(lightClient)
|
||||
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
else
|
||||
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
|
||||
vm->setDataset(dataset, lightClient);
|
||||
vms.push_back(vm);
|
||||
}
|
||||
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
|
||||
sw.restart();
|
||||
for (int i = 0; i < programCount; ++i) {
|
||||
*nonce = i;
|
||||
if (RandomX::trace) std::cout << "Nonce: " << i << " ";
|
||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||
int spIndex = hash[24] | ((hash[25] & 63) << 8);
|
||||
vm->initializeScratchpad(spIndex);
|
||||
//dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, "scratchpad-before.txt");
|
||||
//return 0;
|
||||
vm->initializeProgram(hash);
|
||||
vm->execute();
|
||||
/*std::string fileName("scratchpad-after-");
|
||||
fileName = fileName + std::to_string(i) + ".txt";
|
||||
dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, fileName.c_str());*/
|
||||
vm->getResult(hash);
|
||||
if (RandomX::trace) {
|
||||
outputHex(std::cout, (char*)hash, sizeof(hash));
|
||||
for (int i = 0; i < vms.size(); ++i) {
|
||||
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
|
||||
}
|
||||
((uint64_t*)cumulative)[0] ^= ((uint64_t*)hash)[0];
|
||||
((uint64_t*)cumulative)[1] ^= ((uint64_t*)hash)[1];
|
||||
((uint64_t*)cumulative)[2] ^= ((uint64_t*)hash)[2];
|
||||
((uint64_t*)cumulative)[3] ^= ((uint64_t*)hash)[3];
|
||||
for (int i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
double elapsed = sw.getElapsed();
|
||||
std::cout << "Calculated result: ";
|
||||
outputHex(std::cout, cumulative, sizeof(cumulative));
|
||||
result.print(std::cout);
|
||||
if(programCount == 1000)
|
||||
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
|
||||
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
||||
|
|
Loading…
Reference in a new issue