Support for multiple threads

This commit is contained in:
tevador 2018-12-19 21:54:44 +01:00
parent cb12feaf91
commit b9d2d853aa
13 changed files with 436 additions and 304 deletions

View file

@ -9,10 +9,9 @@ endif
BINDIR=bin BINDIR=bin
SRCDIR=src SRCDIR=src
OBJDIR=obj OBJDIR=obj
LDFLAGS= LDFLAGS=-lpthread
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o) ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o)
SRC1=$(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp)
all: release test all: release test
@ -52,7 +51,7 @@ $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-imp
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR) $(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR) $(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR) $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR)
@ -73,6 +72,9 @@ $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR) $(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR) $(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@

147
src/Cache.cpp Normal file
View file

@ -0,0 +1,147 @@
/*
Copyright (c) 2018 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include <cstring>
#include "Cache.hpp"
#include "softAes.h"
#include "argon2.h"
#include "Pcg32.hpp"
#include "argon2_core.h"
namespace RandomX {
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1) {
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
template<uint8_t rcon, bool soft>
static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
template<bool soft>
static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
__m128i xout0, xout2;
xout0 = _mm_load_si128(seed);
xout2 = _mm_load_si128(seed + 1);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x01, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x02, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x04, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x08, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
}
void Cache::argonFill(const void* seed, size_t seedSize) {
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
argon2_context context;
context.out = nullptr;
context.outlen = 0;
context.pwd = CONST_CAST(uint8_t *)seed;
context.pwdlen = (uint32_t)seedSize;
context.salt = CONST_CAST(uint8_t *)ArgonSalt;
context.saltlen = (uint32_t)ArgonSaltSize;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.t_cost = ArgonIterations;
context.m_cost = ArgonMemorySize;
context.lanes = ArgonLanes;
context.threads = 1;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = ARGON2_VERSION_NUMBER;
/* 2. Align memory size */
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
memory_blocks = context.m_cost;
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
instance.version = context.version;
instance.memory = NULL;
instance.passes = context.t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context.lanes;
instance.threads = context.threads;
instance.type = Argon2_d;
instance.memory = (block*)memory;
if (instance.threads > instance.lanes) {
instance.threads = instance.lanes;
}
/* 3. Initialization: Hashing inputs, allocating memory, filling first
* blocks
*/
argon_initialize(&instance, &context);
fill_memory_blocks(&instance);
}
template<bool softAes>
void Cache::initialize(const void* seed, size_t seedSize) {
//Argon2d memory fill
argonFill(seed, seedSize);
//Circular shift of the cache buffer by 512 bytes
//realized by copying the first 512 bytes to the back
//of the buffer and shifting the start by 512 bytes
memcpy(memory + CacheSize, memory, CacheShift);
//AES keys
expandAesKeys<softAes>((__m128i*)seed, keys.data());
}
template void Cache::initialize<true>(const void*, size_t);
template void Cache::initialize<false>(const void*, size_t);
}

57
src/Cache.hpp Normal file
View file

@ -0,0 +1,57 @@
/*
Copyright (c) 2018 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
#include <new>
#include "common.hpp"
#include "dataset.hpp"
namespace RandomX {
class Cache {
public:
void* operator new(size_t size) {
void* ptr = _mm_malloc(size, sizeof(__m128i));
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void operator delete(void* ptr) {
_mm_free(ptr);
}
template<bool softAes>
void initialize(const void* seed, size_t seedSize);
const KeysContainer& getKeys() const {
return keys;
}
const uint8_t* getCache() {
return memory + CacheShift;
}
private:
alignas(16) KeysContainer keys;
uint8_t memory[CacheSize + CacheShift];
void argonFill(const void* seed, size_t seedSize);
};
}

View file

@ -31,11 +31,11 @@ namespace RandomX {
#endif #endif
} }
void CompiledVirtualMachine::initializeDataset(const void* seed, bool lightClient) { void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
if (lightClient) { if (lightClient) {
throw std::runtime_error("Compiled VM does not support light-client mode"); throw std::runtime_error("Compiled VM does not support light-client mode");
} }
VirtualMachine::initializeDataset(seed, lightClient); VirtualMachine::setDataset(ds, lightClient);
} }
void CompiledVirtualMachine::initializeProgram(const void* seed) { void CompiledVirtualMachine::initializeProgram(const void* seed) {

View file

@ -27,7 +27,7 @@ namespace RandomX {
class CompiledVirtualMachine : public VirtualMachine { class CompiledVirtualMachine : public VirtualMachine {
public: public:
CompiledVirtualMachine(bool softAes); CompiledVirtualMachine(bool softAes);
void initializeDataset(const void* seed, bool light = false) override; void setDataset(dataset_t ds, bool light = false) override;
void initializeProgram(const void* seed) override; void initializeProgram(const void* seed) override;
virtual void execute() override; virtual void execute() override;
void* getProgram() { void* getProgram() {

View file

@ -20,58 +20,65 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "VirtualMachine.hpp" #include "VirtualMachine.hpp"
#include "common.hpp" #include "common.hpp"
#include "dataset.hpp" #include "dataset.hpp"
#include "Cache.hpp"
#include "t1ha/t1ha.h" #include "t1ha/t1ha.h"
#include "blake2/blake2.h" #include "blake2/blake2.h"
#include <cstring> #include <cstring>
namespace RandomX { namespace RandomX {
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) { VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
mem.dataset = nullptr; mem.ds.dataset = nullptr;
} }
void VirtualMachine::initializeDataset(const void* seed, bool light) { VirtualMachine::~VirtualMachine() {
if (lightClient) { if (lightClient) {
_mm_free(mem.lcm->cache); delete mem.ds.lightDataset->block;
_mm_free(mem.lcm->block); delete mem.ds.lightDataset;
}
}
void VirtualMachine::setDataset(dataset_t ds, bool light) {
if (mem.ds.dataset != nullptr) {
throw std::runtime_error("Dataset is already initialized");
} }
_mm_free(mem.dataset);
lightClient = light; lightClient = light;
if (light) { if (light) {
auto lds = mem.ds.lightDataset = new LightClientDataset();
lds->cache = ds.cache;
lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
lds->blockNumber = -1;
if (lds->block == nullptr) {
throw std::bad_alloc();
}
if (softAes) { if (softAes) {
datasetInitLight<true>(seed, mem.lcm);
readDataset = &datasetReadLight<true>; readDataset = &datasetReadLight<true>;
} }
else { else {
datasetInitLight<false>(seed, mem.lcm);
readDataset = &datasetReadLight<false>; readDataset = &datasetReadLight<false>;
} }
} }
else { else {
mem.ds = ds;
readDataset = &datasetRead; readDataset = &datasetRead;
if (softAes) {
datasetInit<true>(seed, mem.dataset);
}
else {
datasetInit<false>(seed, mem.dataset);
}
} }
} }
void VirtualMachine::initializeScratchpad(uint32_t index) { void VirtualMachine::initializeScratchpad(uint32_t index) {
if (lightClient) { if (lightClient) {
auto cache = mem.ds.lightDataset->cache;
if (softAes) { if (softAes) {
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) { for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
initBlock<true>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys); initBlock<true>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
} }
} }
else { else {
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) { for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
initBlock<false>(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys); initBlock<false>(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
} }
} }
} }
else { else {
memcpy(scratchpad, mem.dataset + ScratchpadSize * index, ScratchpadSize); memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
} }
} }

View file

@ -26,30 +26,12 @@ namespace RandomX {
class VirtualMachine { class VirtualMachine {
public: public:
VirtualMachine(bool softAes); VirtualMachine(bool softAes);
virtual ~VirtualMachine() {} virtual ~VirtualMachine();
virtual void initializeDataset(const void* seed, bool light = false); virtual void setDataset(dataset_t ds, bool light = false);
void initializeScratchpad(uint32_t index); void initializeScratchpad(uint32_t index);
virtual void initializeProgram(const void* seed) = 0; virtual void initializeProgram(const void* seed) = 0;
virtual void execute() = 0; virtual void execute() = 0;
void getResult(void*); void getResult(void*);
const RegisterFile& getRegisterFile() const {
return reg;
}
const convertible_t* getScratchpad() const {
return scratchpad;
}
const void* getCache() {
if (lightClient) {
return mem.lcm->cache;
}
return nullptr;
}
const __m128i* getKeys() {
if (lightClient) {
return mem.lcm->keys;
}
return nullptr;
}
protected: protected:
bool softAes, lightClient; bool softAes, lightClient;
RegisterFile reg; RegisterFile reg;

View file

@ -473,7 +473,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type)
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
} }
int initialize(argon2_instance_t *instance, argon2_context *context) { int argon_initialize(argon2_instance_t *instance, argon2_context *context) {
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
int result = ARGON2_OK; int result = ARGON2_OK;

View file

@ -204,7 +204,7 @@ void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
* @return Zero if successful, -1 if memory failed to allocate. @context->state * @return Zero if successful, -1 if memory failed to allocate. @context->state
* will be modified if successful. * will be modified if successful.
*/ */
int initialize(argon2_instance_t *instance, argon2_context *context); int argon_initialize(argon2_instance_t *instance, argon2_context *context);
/* /*
* XORing the last block of each lane, hashing it, making the tag. Deallocates * XORing the last block of each lane, hashing it, making the tag. Deallocates

View file

@ -20,8 +20,6 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include <new>
#include "intrinPortable.h"
namespace RandomX { namespace RandomX {
@ -55,13 +53,13 @@ namespace RandomX {
constexpr bool trace = false; constexpr bool trace = false;
#endif #endif
typedef union { union convertible_t {
double f64; double f64;
int64_t i64; int64_t i64;
uint64_t u64; uint64_t u64;
int32_t i32; int32_t i32;
uint32_t u32; uint32_t u32;
} convertible_t; };
constexpr int ProgramLength = 512; constexpr int ProgramLength = 512;
constexpr int InstructionCount = 1024 * 1024; constexpr int InstructionCount = 1024 * 1024;
@ -71,34 +69,27 @@ namespace RandomX {
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t); constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
constexpr int RegistersCount = 8; constexpr int RegistersCount = 8;
class Cache;
inline int wrapInstr(int i) { inline int wrapInstr(int i) {
return i % RandomX::ProgramLength; return i % RandomX::ProgramLength;
} }
struct LightClientMemory { struct LightClientDataset {
uint8_t* cache; Cache* cache;
uint8_t* block; uint8_t* block;
uint32_t blockNumber; uint32_t blockNumber;
alignas(16) __m128i keys[10]; };
void* operator new(size_t size) { union dataset_t {
void* ptr = _mm_malloc(size, sizeof(__m128i)); uint8_t* dataset;
if (ptr == nullptr) Cache* cache;
throw std::bad_alloc(); LightClientDataset* lightDataset;
return ptr;
}
void operator delete(void* ptr) {
_mm_free(ptr);
}
}; };
struct MemoryRegisters { struct MemoryRegisters {
addr_t ma, mx; addr_t ma, mx;
union { dataset_t ds;
uint8_t* dataset;
LightClientMemory* lcm;
};
}; };
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters"); static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters");

View file

@ -19,135 +19,25 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
// Parts of this file are originally copyright (c) xmr-stak // Parts of this file are originally copyright (c) xmr-stak
#include "common.hpp"
#include "dataset.hpp"
#include "Pcg32.hpp"
#include "argon2_core.h"
#include <new> #include <new>
#include <algorithm> #include <algorithm>
#include <stdexcept> #include <stdexcept>
#include <cstring> #include <cstring>
#if defined(_MSC_VER) #include "common.hpp"
#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2) #include "dataset.hpp"
#define __SSE2__ 1 #include "Pcg32.hpp"
#endif #include "Cache.hpp"
#endif
#if defined(__SSE2__) #if defined(__SSE2__)
#include <wmmintrin.h> #include <wmmintrin.h>
#define PREFETCH(memory) _mm_prefetch((const char *)((memory).dataset + (memory).ma), _MM_HINT_T0) #define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0)
#else #else
#define PREFETCH(memory) #define PREFETCH(memory)
#endif #endif
namespace RandomX { namespace RandomX {
void initializeCache(const void* input, size_t inputLength, void* memory) {
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
argon2_context context;
context.out = nullptr;
context.outlen = 0;
context.pwd = CONST_CAST(uint8_t *)input;
context.pwdlen = (uint32_t)inputLength;
context.salt = CONST_CAST(uint8_t *)ArgonSalt;
context.saltlen = (uint32_t)ArgonSaltSize;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.t_cost = ArgonIterations;
context.m_cost = ArgonMemorySize;
context.lanes = ArgonLanes;
context.threads = 1;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = ARGON2_VERSION_NUMBER;
/* 2. Align memory size */
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
memory_blocks = context.m_cost;
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
instance.version = context.version;
instance.memory = NULL;
instance.passes = context.t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context.lanes;
instance.threads = context.threads;
instance.type = Argon2_d;
instance.memory = (block*)memory;
if (instance.threads > instance.lanes) {
instance.threads = instance.lanes;
}
/* 3. Initialization: Hashing inputs, allocating memory, filling first
* blocks
*/
initialize(&instance, &context);
fill_memory_blocks(&instance);
}
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1) {
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
template<uint8_t rcon, bool soft>
static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) {
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
template<bool soft>
void expandAesKeys(const __m128i* seed, __m128i* keys) {
__m128i xout0, xout2;
xout0 = _mm_load_si128(seed);
xout2 = _mm_load_si128(seed + 1);
*keys++ = xout0;
*keys++ = xout2;
aes_genkey_sub<0x01, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aes_genkey_sub<0x02, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aes_genkey_sub<0x04, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aes_genkey_sub<0x08, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
}
template
void expandAesKeys<true>(const __m128i* seed, __m128i* keys);
template
void expandAesKeys<false>(const __m128i* seed, __m128i* keys);
template<typename T> template<typename T>
static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) { static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) {
auto count = bytes / sizeof(T); auto count = bytes / sizeof(T);
@ -157,8 +47,18 @@ namespace RandomX {
} }
} }
template<bool soft>
static inline __m128i aesenc(__m128i in, __m128i key) {
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
}
template<bool soft>
static inline __m128i aesdec(__m128i in, __m128i key) {
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
}
template<bool soft, bool enc> template<bool soft, bool enc>
void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]) { void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
__m128i xin, xout; __m128i xin, xout;
//Initialization vector = block number extended to 128 bits //Initialization vector = block number extended to 128 bits
xout = _mm_cvtsi32_si128(blockNumber); xout = _mm_cvtsi32_si128(blockNumber);
@ -200,20 +100,20 @@ namespace RandomX {
} }
template template
void initBlock<true, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); void initBlock<true, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template template
void initBlock<true, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); void initBlock<true, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template template
void initBlock<false, true>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); void initBlock<false, true>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template template
void initBlock<false, false>(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); void initBlock<false, false>(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) { convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
convertible_t data; convertible_t data;
data.u64 = *(uint64_t*)(memory.dataset + memory.ma); data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma);
memory.ma += 8; memory.ma += 8;
memory.mx ^= addr; memory.mx ^= addr;
if ((memory.mx & 0xFFF8) == 0) { if ((memory.mx & 0xFFF8) == 0) {
@ -224,24 +124,25 @@ namespace RandomX {
} }
template<bool softAes> template<bool softAes>
void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i k[10]) { void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) {
if (blockNumber % 2 == 1) { if (blockNumber % 2 == 1) {
initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, k); initBlock<softAes, true>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
} }
else { else {
initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, k); initBlock<softAes, false>(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
} }
} }
template<bool softAes> template<bool softAes>
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) { convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
convertible_t data; convertible_t data;
LightClientDataset* lds = memory.ds.lightDataset;
auto blockNumber = memory.ma / DatasetBlockSize; auto blockNumber = memory.ma / DatasetBlockSize;
if (memory.lcm->blockNumber != blockNumber) { if (lds->blockNumber != blockNumber) {
initBlock<softAes>(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys); initBlock<softAes>(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys());
memory.lcm->blockNumber = blockNumber; lds->blockNumber = blockNumber;
} }
data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize)); data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize));
memory.ma += 8; memory.ma += 8;
memory.mx ^= addr; memory.mx ^= addr;
if ((memory.mx & 0xFFF8) == 0) { if ((memory.mx & 0xFFF8) == 0) {
@ -256,54 +157,37 @@ namespace RandomX {
template template
convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory); convertible_t datasetReadLight<true>(addr_t addr, MemoryRegisters& memory);
template<bool softAes> void datasetAlloc(dataset_t& ds) {
void datasetInit(const void* seed, uint8_t*& dataset) {
if (sizeof(size_t) <= 4) if (sizeof(size_t) <= 4)
throw std::runtime_error("Platform doesn't support enough memory for the dataset"); throw std::runtime_error("Platform doesn't support enough memory for the dataset");
dataset = (uint8_t*)_mm_malloc(DatasetSize, sizeof(__m128i)); ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64);
if (dataset == nullptr) { if (ds.dataset == nullptr) {
throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed."); throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
} }
uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
if (cache == nullptr) {
throw std::bad_alloc();
} }
initializeCache(seed, SeedSize, cache);
memcpy(cache + CacheSize, cache, CacheShift);
alignas(16) __m128i keys[10];
expandAesKeys<softAes>((const __m128i*)seed, keys);
for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
initBlock<softAes>(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys);
}
_mm_free(cache);
}
template
void datasetInit<false>(const void*, uint8_t*&);
template
void datasetInit<true>(const void*, uint8_t*&);
template<bool softAes> template<bool softAes>
void datasetInitLight(const void* seed, LightClientMemory*& lcm) { void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
lcm = new LightClientMemory(); for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i)); initBlock<softAes>(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys());
if (lcm->cache == nullptr) {
throw std::bad_alloc();
} }
initializeCache(seed, SeedSize, lcm->cache);
memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift);
expandAesKeys<softAes>((__m128i*)seed, lcm->keys);
lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
if (lcm->block == nullptr) {
throw std::bad_alloc();
}
lcm->blockNumber = -1;
} }
template template
void datasetInitLight<false>(const void*, LightClientMemory*&); void datasetInit<false>(Cache*, dataset_t, uint32_t, uint32_t);
template template
void datasetInitLight<true>(const void*, LightClientMemory*&); void datasetInit<true>(Cache*, dataset_t, uint32_t, uint32_t);
template<bool softAes>
void datasetInitCache(const void* seed, dataset_t& ds) {
ds.cache = new Cache();
ds.cache->initialize<softAes>(seed, SeedSize);
}
template
void datasetInitCache<false>(const void*, dataset_t&);
template
void datasetInitCache<true>(const void*, dataset_t&);
} }

View file

@ -20,43 +20,30 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include <array>
#include "intrinPortable.h" #include "intrinPortable.h"
#include "argon2.h"
#include "common.hpp" #include "common.hpp"
#include "softAes.h" #include "softAes.h"
namespace RandomX { namespace RandomX {
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value"); using KeysContainer = std::array<__m128i, 10>;
void initializeCache(const void* input, size_t inputLength, void* memory);
template<bool soft>
void expandAesKeys(const __m128i* seed, __m128i* keys);
template<bool soft>
inline __m128i aesenc(__m128i in, __m128i key) {
return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
}
template<bool soft>
inline __m128i aesdec(__m128i in, __m128i key) {
return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
}
template<bool soft, bool enc> template<bool soft, bool enc>
void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]); void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
template<bool softAes> template<bool softAes>
void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i keys[10]); void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
void datasetAlloc(dataset_t& ds);
template<bool softAes> template<bool softAes>
void datasetInit(const void* seed, uint8_t*& dataset); void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory); convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
template<bool softAes> template<bool softAes>
void datasetInitLight(const void* seed, LightClientMemory*& lcm); void datasetInitCache(const void* seed, dataset_t& dataset);
template<bool softAes> template<bool softAes>
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory); convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);

View file

@ -30,6 +30,10 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "Program.hpp" #include "Program.hpp"
#include <string> #include <string>
#include "instructions.hpp" #include "instructions.hpp"
#include <thread>
#include <atomic>
#include "dataset.hpp"
#include "Cache.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 }; const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@ -45,7 +49,6 @@ void outputHex(std::ostream& os, const char* data, int length) {
os << hexmap[(data[i] & 0xF0) >> 4]; os << hexmap[(data[i] & 0xF0) >> 4];
os << hexmap[data[i] & 0x0F]; os << hexmap[data[i] & 0x0F];
} }
os << std::endl;
} }
void readOption(const char* option, int argc, char** argv, bool& out) { void readOption(const char* option, int argc, char** argv, bool& out) {
@ -58,6 +61,15 @@ void readOption(const char* option, int argc, char** argv, bool& out) {
out = false; out = false;
} }
void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) {
for (int i = 0; i < argc - 1; ++i) {
if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) {
return;
}
}
out = defaultValue;
}
void readInt(int argc, char** argv, int& out, int defaultValue) { void readInt(int argc, char** argv, int& out, int defaultValue) {
for (int i = 0; i < argc; ++i) { for (int i = 0; i < argc; ++i) {
if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) { if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) {
@ -75,81 +87,144 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
return os; return os;
} }
int main(int argc, char** argv) { class AtomicHash {
bool softAes, lightClient, genAsm, compiled; public:
int programCount; AtomicHash() {
readOption("--softAes", argc, argv, softAes); for (int i = 0; i < 4; ++i)
readOption("--lightClient", argc, argv, lightClient); hash[i].store(0);
readOption("--genAsm", argc, argv, genAsm); }
readOption("--compiled", argc, argv, compiled); void xorWith(uint64_t update[4]) {
readInt(argc, argv, programCount, 1000); for (int i = 0; i < 4; ++i)
hash[i].fetch_xor(update[i]);
}
void print(std::ostream& os) {
for (int i = 0; i < 4; ++i)
print(hash[i], os);
os << std::endl;
}
private:
void print(std::atomic<uint64_t>& hash, std::ostream& os) {
auto h = hash.load();
outputHex(std::cout, (char*)&h, sizeof(h));
}
std::atomic<uint64_t> hash[4];
};
void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
uint64_t hash[4];
unsigned char blockTemplate[] = { unsigned char blockTemplate[] = {
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e, 0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca, 0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
}; };
int* nonce = (int*)(blockTemplate + 39); int* noncePtr = (int*)(blockTemplate + 39);
uint8_t hash[RandomX::ResultSize]; int nonce = atomicNonce.fetch_add(1);
if (genAsm) { while (nonce < noncesCount) {
*nonce = programCount; //std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
*noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
RandomX::AssemblyGeneratorX86 asmX86; int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8);
asmX86.generateProgram(hash); vm->initializeScratchpad(spIndex);
asmX86.printCode(std::cout); vm->initializeProgram(hash);
return 0; vm->execute();
vm->getResult(hash);
result.xorWith(hash);
if (RandomX::trace) {
std::cout << "Nonce: " << nonce << " ";
outputHex(std::cout, (char*)hash, sizeof(hash));
std::cout << std::endl;
} }
nonce = atomicNonce.fetch_add(1);
}
}
int main(int argc, char** argv) {
bool softAes, lightClient, genAsm, compiled;
int programCount, threadCount;
readOption("--softAes", argc, argv, softAes);
readOption("--lightClient", argc, argv, lightClient);
readOption("--genAsm", argc, argv, genAsm);
readOption("--compiled", argc, argv, compiled);
readIntOption("--threads", argc, argv, threadCount, 1);
readIntOption("--nonces", argc, argv, programCount, 1000);
std::atomic<int> atomicNonce(0);
AtomicHash result;
std::vector<RandomX::VirtualMachine*> vms;
std::vector<std::thread> threads;
RandomX::dataset_t dataset;
if (softAes) if (softAes)
std::cout << "Using software AES." << std::endl; std::cout << "Using software AES." << std::endl;
std::cout << "Initializing..." << std::endl;
char cumulative[RandomX::ResultSize] = { 0 };
RandomX::VirtualMachine* vm;
try { try {
Stopwatch sw(true);
if (softAes) {
RandomX::datasetInitCache<true>(seed, dataset);
}
else {
RandomX::datasetInitCache<false>(seed, dataset);
}
if (RandomX::trace) {
std::cout << "Keys: " << std::endl;
for (int i = 0; i < dataset.cache->getKeys().size(); ++i) {
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
}
std::cout << std::endl;
std::cout << "Cache: " << std::endl;
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
std::cout << std::endl;
}
if (lightClient) {
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
}
else {
RandomX::Cache* cache = dataset.cache;
RandomX::datasetAlloc(dataset);
auto perThread = RandomX::DatasetBlockCount / threadCount;
auto remainder = RandomX::DatasetBlockCount % threadCount;
for (int i = 0; i < threadCount; ++i) {
auto count = perThread + (i == threadCount - 1 ? remainder : 0);
if (softAes) {
threads.push_back(std::thread(&RandomX::datasetInit<true>, cache, dataset, i * perThread, count));
}
else {
threads.push_back(std::thread(&RandomX::datasetInit<false>, cache, dataset, i * perThread, count));
}
}
for (int i = 0; i < threads.size(); ++i) {
threads[i].join();
}
delete cache;
threads.clear();
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
}
std::cout << "Initializing " << threadCount << " virtual machine(s)..." << std::endl;
for (int i = 0; i < threadCount; ++i) {
RandomX::VirtualMachine* vm;
if (compiled) { if (compiled) {
vm = new RandomX::CompiledVirtualMachine(softAes); vm = new RandomX::CompiledVirtualMachine(softAes);
} }
else { else {
vm = new RandomX::InterpretedVirtualMachine(softAes); vm = new RandomX::InterpretedVirtualMachine(softAes);
} }
std::cout << "Initializing..." << std::endl; vm->setDataset(dataset, lightClient);
Stopwatch sw(true); vms.push_back(vm);
vm->initializeDataset(seed, lightClient); }
if(lightClient)
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
else
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl; std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
sw.restart(); sw.restart();
for (int i = 0; i < programCount; ++i) { for (int i = 0; i < vms.size(); ++i) {
*nonce = i; threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
if (RandomX::trace) std::cout << "Nonce: " << i << " ";
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
int spIndex = hash[24] | ((hash[25] & 63) << 8);
vm->initializeScratchpad(spIndex);
//dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, "scratchpad-before.txt");
//return 0;
vm->initializeProgram(hash);
vm->execute();
/*std::string fileName("scratchpad-after-");
fileName = fileName + std::to_string(i) + ".txt";
dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, fileName.c_str());*/
vm->getResult(hash);
if (RandomX::trace) {
outputHex(std::cout, (char*)hash, sizeof(hash));
} }
((uint64_t*)cumulative)[0] ^= ((uint64_t*)hash)[0]; for (int i = 0; i < threads.size(); ++i) {
((uint64_t*)cumulative)[1] ^= ((uint64_t*)hash)[1]; threads[i].join();
((uint64_t*)cumulative)[2] ^= ((uint64_t*)hash)[2];
((uint64_t*)cumulative)[3] ^= ((uint64_t*)hash)[3];
} }
double elapsed = sw.getElapsed(); double elapsed = sw.getElapsed();
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
outputHex(std::cout, cumulative, sizeof(cumulative)); result.print(std::cout);
if(programCount == 1000) if(programCount == 1000)
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl; std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl; std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;