diff --git a/makefile b/makefile
index 8aff8c5..3ffacec 100644
--- a/makefile
+++ b/makefile
@@ -9,10 +9,9 @@ endif
BINDIR=bin
SRCDIR=src
OBJDIR=obj
-LDFLAGS=
+LDFLAGS=-lpthread
TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o)
-ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o)
-SRC1=$(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp)
+ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o)
all: release test
@@ -52,7 +51,7 @@ $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-imp
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
-$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
+$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR)
@@ -72,6 +71,9 @@ $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
+
+$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR)
+ $(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
diff --git a/src/Cache.cpp b/src/Cache.cpp
new file mode 100644
index 0000000..171fa58
--- /dev/null
+++ b/src/Cache.cpp
@@ -0,0 +1,147 @@
+/*
+Copyright (c) 2018 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#include
+#include "Cache.hpp"
+#include "softAes.h"
+#include "argon2.h"
+#include "Pcg32.hpp"
+#include "argon2_core.h"
+
+namespace RandomX {
+
+ static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
+
+ // This will shift and xor tmp1 into itself as 4 32-bit vals such as
+ // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
+ static inline __m128i sl_xor(__m128i tmp1) {
+ __m128i tmp4;
+ tmp4 = _mm_slli_si128(tmp1, 0x04);
+ tmp1 = _mm_xor_si128(tmp1, tmp4);
+ tmp4 = _mm_slli_si128(tmp4, 0x04);
+ tmp1 = _mm_xor_si128(tmp1, tmp4);
+ tmp4 = _mm_slli_si128(tmp4, 0x04);
+ tmp1 = _mm_xor_si128(tmp1, tmp4);
+ return tmp1;
+ }
+
+ template
+ static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
+ __m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
+ xout1 = _mm_shuffle_epi32(xout1, 0xFF);
+ *xout0 = sl_xor(*xout0);
+ *xout0 = _mm_xor_si128(*xout0, xout1);
+ xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
+ xout1 = _mm_shuffle_epi32(xout1, 0xAA);
+ *xout2 = sl_xor(*xout2);
+ *xout2 = _mm_xor_si128(*xout2, xout1);
+ }
+
+ template
+ static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
+ __m128i xout0, xout2;
+ xout0 = _mm_load_si128(seed);
+ xout2 = _mm_load_si128(seed + 1);
+ *keys++ = xout0;
+ *keys++ = xout2;
+ aesGenKeys<0x01, soft>(&xout0, &xout2);
+ *keys++ = xout0;
+ *keys++ = xout2;
+ aesGenKeys<0x02, soft>(&xout0, &xout2);
+ *keys++ = xout0;
+ *keys++ = xout2;
+ aesGenKeys<0x04, soft>(&xout0, &xout2);
+ *keys++ = xout0;
+ *keys++ = xout2;
+ aesGenKeys<0x08, soft>(&xout0, &xout2);
+ *keys++ = xout0;
+ *keys++ = xout2;
+ }
+
+ void Cache::argonFill(const void* seed, size_t seedSize) {
+ uint32_t memory_blocks, segment_length;
+ argon2_instance_t instance;
+ argon2_context context;
+
+ context.out = nullptr;
+ context.outlen = 0;
+ context.pwd = CONST_CAST(uint8_t *)seed;
+ context.pwdlen = (uint32_t)seedSize;
+ context.salt = CONST_CAST(uint8_t *)ArgonSalt;
+ context.saltlen = (uint32_t)ArgonSaltSize;
+ context.secret = NULL;
+ context.secretlen = 0;
+ context.ad = NULL;
+ context.adlen = 0;
+ context.t_cost = ArgonIterations;
+ context.m_cost = ArgonMemorySize;
+ context.lanes = ArgonLanes;
+ context.threads = 1;
+ context.allocate_cbk = NULL;
+ context.free_cbk = NULL;
+ context.flags = ARGON2_DEFAULT_FLAGS;
+ context.version = ARGON2_VERSION_NUMBER;
+
+ /* 2. Align memory size */
+ /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
+ memory_blocks = context.m_cost;
+
+ segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
+
+ instance.version = context.version;
+ instance.memory = NULL;
+ instance.passes = context.t_cost;
+ instance.memory_blocks = memory_blocks;
+ instance.segment_length = segment_length;
+ instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
+ instance.lanes = context.lanes;
+ instance.threads = context.threads;
+ instance.type = Argon2_d;
+ instance.memory = (block*)memory;
+
+ if (instance.threads > instance.lanes) {
+ instance.threads = instance.lanes;
+ }
+
+ /* 3. Initialization: Hashing inputs, allocating memory, filling first
+ * blocks
+ */
+ argon_initialize(&instance, &context);
+
+ fill_memory_blocks(&instance);
+ }
+
+ template
+ void Cache::initialize(const void* seed, size_t seedSize) {
+ //Argon2d memory fill
+ argonFill(seed, seedSize);
+
+ //Circular shift of the cache buffer by 512 bytes
+ //realized by copying the first 512 bytes to the back
+ //of the buffer and shifting the start by 512 bytes
+ memcpy(memory + CacheSize, memory, CacheShift);
+
+ //AES keys
+ expandAesKeys((__m128i*)seed, keys.data());
+ }
+
+ template void Cache::initialize(const void*, size_t);
+
+ template void Cache::initialize(const void*, size_t);
+}
\ No newline at end of file
diff --git a/src/Cache.hpp b/src/Cache.hpp
new file mode 100644
index 0000000..7a34ee8
--- /dev/null
+++ b/src/Cache.hpp
@@ -0,0 +1,57 @@
+/*
+Copyright (c) 2018 tevador
+
+This file is part of RandomX.
+
+RandomX is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+RandomX is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with RandomX. If not, see.
+*/
+
+#pragma once
+
+#include
+#include
+#include "common.hpp"
+#include "dataset.hpp"
+
+namespace RandomX {
+
+ class Cache {
+ public:
+ void* operator new(size_t size) {
+ void* ptr = _mm_malloc(size, sizeof(__m128i));
+ if (ptr == nullptr)
+ throw std::bad_alloc();
+ return ptr;
+ }
+
+ void operator delete(void* ptr) {
+ _mm_free(ptr);
+ }
+
+ template
+ void initialize(const void* seed, size_t seedSize);
+
+ const KeysContainer& getKeys() const {
+ return keys;
+ }
+
+ const uint8_t* getCache() {
+ return memory + CacheShift;
+ }
+ private:
+ alignas(16) KeysContainer keys;
+ uint8_t memory[CacheSize + CacheShift];
+ void argonFill(const void* seed, size_t seedSize);
+ };
+}
\ No newline at end of file
diff --git a/src/CompiledVirtualMachine.cpp b/src/CompiledVirtualMachine.cpp
index fdb1498..35e4ba5 100644
--- a/src/CompiledVirtualMachine.cpp
+++ b/src/CompiledVirtualMachine.cpp
@@ -31,11 +31,11 @@ namespace RandomX {
#endif
}
- void CompiledVirtualMachine::initializeDataset(const void* seed, bool lightClient) {
+ void CompiledVirtualMachine::setDataset(dataset_t ds, bool lightClient) {
if (lightClient) {
throw std::runtime_error("Compiled VM does not support light-client mode");
}
- VirtualMachine::initializeDataset(seed, lightClient);
+ VirtualMachine::setDataset(ds, lightClient);
}
void CompiledVirtualMachine::initializeProgram(const void* seed) {
diff --git a/src/CompiledVirtualMachine.hpp b/src/CompiledVirtualMachine.hpp
index b5b1d63..c2e108d 100644
--- a/src/CompiledVirtualMachine.hpp
+++ b/src/CompiledVirtualMachine.hpp
@@ -27,7 +27,7 @@ namespace RandomX {
class CompiledVirtualMachine : public VirtualMachine {
public:
CompiledVirtualMachine(bool softAes);
- void initializeDataset(const void* seed, bool light = false) override;
+ void setDataset(dataset_t ds, bool light = false) override;
void initializeProgram(const void* seed) override;
virtual void execute() override;
void* getProgram() {
diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp
index 4ea7ac0..21c52ac 100644
--- a/src/VirtualMachine.cpp
+++ b/src/VirtualMachine.cpp
@@ -20,58 +20,65 @@ along with RandomX. If not, see.
#include "VirtualMachine.hpp"
#include "common.hpp"
#include "dataset.hpp"
+#include "Cache.hpp"
#include "t1ha/t1ha.h"
#include "blake2/blake2.h"
#include
namespace RandomX {
VirtualMachine::VirtualMachine(bool softAes) : softAes(softAes), lightClient(false) {
- mem.dataset = nullptr;
+ mem.ds.dataset = nullptr;
}
- void VirtualMachine::initializeDataset(const void* seed, bool light) {
+ VirtualMachine::~VirtualMachine() {
if (lightClient) {
- _mm_free(mem.lcm->cache);
- _mm_free(mem.lcm->block);
+ delete mem.ds.lightDataset->block;
+ delete mem.ds.lightDataset;
+ }
+ }
+
+ void VirtualMachine::setDataset(dataset_t ds, bool light) {
+ if (mem.ds.dataset != nullptr) {
+ throw std::runtime_error("Dataset is already initialized");
}
- _mm_free(mem.dataset);
lightClient = light;
if (light) {
+ auto lds = mem.ds.lightDataset = new LightClientDataset();
+ lds->cache = ds.cache;
+ lds->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
+ lds->blockNumber = -1;
+ if (lds->block == nullptr) {
+ throw std::bad_alloc();
+ }
if (softAes) {
- datasetInitLight(seed, mem.lcm);
readDataset = &datasetReadLight;
}
else {
- datasetInitLight(seed, mem.lcm);
readDataset = &datasetReadLight;
}
}
else {
+ mem.ds = ds;
readDataset = &datasetRead;
- if (softAes) {
- datasetInit(seed, mem.dataset);
- }
- else {
- datasetInit(seed, mem.dataset);
- }
}
}
void VirtualMachine::initializeScratchpad(uint32_t index) {
if (lightClient) {
+ auto cache = mem.ds.lightDataset->cache;
if (softAes) {
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
- initBlock(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
+ initBlock(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
}
}
else {
for (int i = 0; i < ScratchpadSize / DatasetBlockSize; ++i) {
- initBlock(mem.lcm->cache + CacheShift, ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, mem.lcm->keys);
+ initBlock(cache->getCache(), ((uint8_t*)scratchpad) + DatasetBlockSize * i, (ScratchpadSize / DatasetBlockSize) * index + i, cache->getKeys());
}
}
}
else {
- memcpy(scratchpad, mem.dataset + ScratchpadSize * index, ScratchpadSize);
+ memcpy(scratchpad, mem.ds.dataset + ScratchpadSize * index, ScratchpadSize);
}
}
diff --git a/src/VirtualMachine.hpp b/src/VirtualMachine.hpp
index 5c83fa5..569718c 100644
--- a/src/VirtualMachine.hpp
+++ b/src/VirtualMachine.hpp
@@ -26,30 +26,12 @@ namespace RandomX {
class VirtualMachine {
public:
VirtualMachine(bool softAes);
- virtual ~VirtualMachine() {}
- virtual void initializeDataset(const void* seed, bool light = false);
+ virtual ~VirtualMachine();
+ virtual void setDataset(dataset_t ds, bool light = false);
void initializeScratchpad(uint32_t index);
virtual void initializeProgram(const void* seed) = 0;
virtual void execute() = 0;
void getResult(void*);
- const RegisterFile& getRegisterFile() const {
- return reg;
- }
- const convertible_t* getScratchpad() const {
- return scratchpad;
- }
- const void* getCache() {
- if (lightClient) {
- return mem.lcm->cache;
- }
- return nullptr;
- }
- const __m128i* getKeys() {
- if (lightClient) {
- return mem.lcm->keys;
- }
- return nullptr;
- }
protected:
bool softAes, lightClient;
RegisterFile reg;
diff --git a/src/argon2_core.c b/src/argon2_core.c
index f90a0d7..cf07be5 100644
--- a/src/argon2_core.c
+++ b/src/argon2_core.c
@@ -473,7 +473,7 @@ void initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type)
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
}
-int initialize(argon2_instance_t *instance, argon2_context *context) {
+int argon_initialize(argon2_instance_t *instance, argon2_context *context) {
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
int result = ARGON2_OK;
diff --git a/src/argon2_core.h b/src/argon2_core.h
index 6886fac..69a6339 100644
--- a/src/argon2_core.h
+++ b/src/argon2_core.h
@@ -204,7 +204,7 @@ void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
* @return Zero if successful, -1 if memory failed to allocate. @context->state
* will be modified if successful.
*/
-int initialize(argon2_instance_t *instance, argon2_context *context);
+int argon_initialize(argon2_instance_t *instance, argon2_context *context);
/*
* XORing the last block of each lane, hashing it, making the tag. Deallocates
diff --git a/src/common.hpp b/src/common.hpp
index 1343e62..04333a5 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -20,8 +20,6 @@ along with RandomX. If not, see.
#pragma once
#include
-#include
-#include "intrinPortable.h"
namespace RandomX {
@@ -55,13 +53,13 @@ namespace RandomX {
constexpr bool trace = false;
#endif
- typedef union {
+ union convertible_t {
double f64;
int64_t i64;
uint64_t u64;
int32_t i32;
uint32_t u32;
- } convertible_t;
+ };
constexpr int ProgramLength = 512;
constexpr int InstructionCount = 1024 * 1024;
@@ -71,34 +69,27 @@ namespace RandomX {
constexpr uint32_t ScratchpadL2 = ScratchpadSize / sizeof(convertible_t);
constexpr int RegistersCount = 8;
+ class Cache;
+
inline int wrapInstr(int i) {
return i % RandomX::ProgramLength;
}
- struct LightClientMemory {
- uint8_t* cache;
+ struct LightClientDataset {
+ Cache* cache;
uint8_t* block;
uint32_t blockNumber;
- alignas(16) __m128i keys[10];
+ };
- void* operator new(size_t size) {
- void* ptr = _mm_malloc(size, sizeof(__m128i));
- if (ptr == nullptr)
- throw std::bad_alloc();
- return ptr;
- }
-
- void operator delete(void* ptr) {
- _mm_free(ptr);
- }
+ union dataset_t {
+ uint8_t* dataset;
+ Cache* cache;
+ LightClientDataset* lightDataset;
};
struct MemoryRegisters {
addr_t ma, mx;
- union {
- uint8_t* dataset;
- LightClientMemory* lcm;
- };
+ dataset_t ds;
};
static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct RandomX::MemoryRegisters");
diff --git a/src/dataset.cpp b/src/dataset.cpp
index 0738b4f..a265fdf 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -19,135 +19,25 @@ along with RandomX. If not, see.
// Parts of this file are originally copyright (c) xmr-stak
-#include "common.hpp"
-#include "dataset.hpp"
-#include "Pcg32.hpp"
-#include "argon2_core.h"
#include
#include
#include
#include
-#if defined(_MSC_VER)
-#if defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)
-#define __SSE2__ 1
-#endif
-#endif
+#include "common.hpp"
+#include "dataset.hpp"
+#include "Pcg32.hpp"
+#include "Cache.hpp"
#if defined(__SSE2__)
#include
-#define PREFETCH(memory) _mm_prefetch((const char *)((memory).dataset + (memory).ma), _MM_HINT_T0)
+#define PREFETCH(memory) _mm_prefetch((const char *)((memory).ds.dataset + (memory).ma), _MM_HINT_T0)
#else
#define PREFETCH(memory)
#endif
namespace RandomX {
- void initializeCache(const void* input, size_t inputLength, void* memory) {
- uint32_t memory_blocks, segment_length;
- argon2_instance_t instance;
- argon2_context context;
-
- context.out = nullptr;
- context.outlen = 0;
- context.pwd = CONST_CAST(uint8_t *)input;
- context.pwdlen = (uint32_t)inputLength;
- context.salt = CONST_CAST(uint8_t *)ArgonSalt;
- context.saltlen = (uint32_t)ArgonSaltSize;
- context.secret = NULL;
- context.secretlen = 0;
- context.ad = NULL;
- context.adlen = 0;
- context.t_cost = ArgonIterations;
- context.m_cost = ArgonMemorySize;
- context.lanes = ArgonLanes;
- context.threads = 1;
- context.allocate_cbk = NULL;
- context.free_cbk = NULL;
- context.flags = ARGON2_DEFAULT_FLAGS;
- context.version = ARGON2_VERSION_NUMBER;
-
- /* 2. Align memory size */
- /* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
- memory_blocks = context.m_cost;
-
- segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
-
- instance.version = context.version;
- instance.memory = NULL;
- instance.passes = context.t_cost;
- instance.memory_blocks = memory_blocks;
- instance.segment_length = segment_length;
- instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
- instance.lanes = context.lanes;
- instance.threads = context.threads;
- instance.type = Argon2_d;
- instance.memory = (block*)memory;
-
- if (instance.threads > instance.lanes) {
- instance.threads = instance.lanes;
- }
-
- /* 3. Initialization: Hashing inputs, allocating memory, filling first
- * blocks
- */
- initialize(&instance, &context);
-
- fill_memory_blocks(&instance);
- }
-
- // This will shift and xor tmp1 into itself as 4 32-bit vals such as
- // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
- static inline __m128i sl_xor(__m128i tmp1) {
- __m128i tmp4;
- tmp4 = _mm_slli_si128(tmp1, 0x04);
- tmp1 = _mm_xor_si128(tmp1, tmp4);
- tmp4 = _mm_slli_si128(tmp4, 0x04);
- tmp1 = _mm_xor_si128(tmp1, tmp4);
- tmp4 = _mm_slli_si128(tmp4, 0x04);
- tmp1 = _mm_xor_si128(tmp1, tmp4);
- return tmp1;
- }
-
- template
- static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2) {
- __m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
- xout1 = _mm_shuffle_epi32(xout1, 0xFF);
- *xout0 = sl_xor(*xout0);
- *xout0 = _mm_xor_si128(*xout0, xout1);
- xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
- xout1 = _mm_shuffle_epi32(xout1, 0xAA);
- *xout2 = sl_xor(*xout2);
- *xout2 = _mm_xor_si128(*xout2, xout1);
- }
-
- template
- void expandAesKeys(const __m128i* seed, __m128i* keys) {
- __m128i xout0, xout2;
- xout0 = _mm_load_si128(seed);
- xout2 = _mm_load_si128(seed + 1);
- *keys++ = xout0;
- *keys++ = xout2;
- aes_genkey_sub<0x01, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- aes_genkey_sub<0x02, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- aes_genkey_sub<0x04, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- aes_genkey_sub<0x08, soft>(&xout0, &xout2);
- *keys++ = xout0;
- *keys++ = xout2;
- }
-
- template
- void expandAesKeys(const __m128i* seed, __m128i* keys);
-
- template
- void expandAesKeys(const __m128i* seed, __m128i* keys);
-
template
static inline void shuffle(T* buffer, size_t bytes, Pcg32& gen) {
auto count = bytes / sizeof(T);
@@ -157,8 +47,18 @@ namespace RandomX {
}
}
+ template
+ static inline __m128i aesenc(__m128i in, __m128i key) {
+ return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
+ }
+
+ template
+ static inline __m128i aesdec(__m128i in, __m128i key) {
+ return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
+ }
+
template
- void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]) {
+ void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
__m128i xin, xout;
//Initialization vector = block number extended to 128 bits
xout = _mm_cvtsi32_si128(blockNumber);
@@ -200,20 +100,20 @@ namespace RandomX {
}
template
- void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
+ void initBlock(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template
- void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
+ void initBlock(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template
- void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
+ void initBlock(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
template
- void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
+ void initBlock(const uint8_t*, uint8_t*, uint32_t, const KeysContainer&);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory) {
convertible_t data;
- data.u64 = *(uint64_t*)(memory.dataset + memory.ma);
+ data.u64 = *(uint64_t*)(memory.ds.dataset + memory.ma);
memory.ma += 8;
memory.mx ^= addr;
if ((memory.mx & 0xFFF8) == 0) {
@@ -224,24 +124,25 @@ namespace RandomX {
}
template
- void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i k[10]) {
+ void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys) {
if (blockNumber % 2 == 1) {
- initBlock(cache + blockNumber * CacheBlockSize, block, blockNumber, k);
+ initBlock(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
}
else {
- initBlock(cache + blockNumber * CacheBlockSize, block, blockNumber, k);
+ initBlock(cache + blockNumber * CacheBlockSize, block, blockNumber, keys);
}
}
template
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory) {
convertible_t data;
+ LightClientDataset* lds = memory.ds.lightDataset;
auto blockNumber = memory.ma / DatasetBlockSize;
- if (memory.lcm->blockNumber != blockNumber) {
- initBlock(memory.lcm->cache + CacheShift, (uint8_t*)memory.lcm->block, blockNumber, memory.lcm->keys);
- memory.lcm->blockNumber = blockNumber;
+ if (lds->blockNumber != blockNumber) {
+ initBlock(lds->cache->getCache(), (uint8_t*)lds->block, blockNumber, lds->cache->getKeys());
+ lds->blockNumber = blockNumber;
}
- data.u64 = *(uint64_t*)(memory.lcm->block + (memory.ma % DatasetBlockSize));
+ data.u64 = *(uint64_t*)(lds->block + (memory.ma % DatasetBlockSize));
memory.ma += 8;
memory.mx ^= addr;
if ((memory.mx & 0xFFF8) == 0) {
@@ -256,54 +157,37 @@ namespace RandomX {
template
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
- template
- void datasetInit(const void* seed, uint8_t*& dataset) {
+ void datasetAlloc(dataset_t& ds) {
if (sizeof(size_t) <= 4)
throw std::runtime_error("Platform doesn't support enough memory for the dataset");
- dataset = (uint8_t*)_mm_malloc(DatasetSize, sizeof(__m128i));
- if (dataset == nullptr) {
- throw std::runtime_error("Dataset memory allocation failed. >4 GiB of virtual memory is needed.");
+ ds.dataset = (uint8_t*)_mm_malloc(DatasetSize, /*sizeof(__m128i)*/ 64);
+ if (ds.dataset == nullptr) {
+ throw std::runtime_error("Dataset memory allocation failed. >4 GiB of free virtual memory is needed.");
}
- uint8_t* cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
- if (cache == nullptr) {
- throw std::bad_alloc();
- }
- initializeCache(seed, SeedSize, cache);
- memcpy(cache + CacheSize, cache, CacheShift);
- alignas(16) __m128i keys[10];
- expandAesKeys((const __m128i*)seed, keys);
- for (uint32_t i = 0; i < DatasetBlockCount; ++i) {
- initBlock(cache + CacheShift, dataset + i * DatasetBlockSize, i, keys);
- }
- _mm_free(cache);
}
- template
- void datasetInit(const void*, uint8_t*&);
-
- template
- void datasetInit(const void*, uint8_t*&);
-
template
- void datasetInitLight(const void* seed, LightClientMemory*& lcm) {
- lcm = new LightClientMemory();
- lcm->cache = (uint8_t*)_mm_malloc(CacheSize + CacheShift, sizeof(__m128i));
- if (lcm->cache == nullptr) {
- throw std::bad_alloc();
+ void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
+ for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
+ initBlock(cache->getCache(), ds.dataset + i * DatasetBlockSize, i, cache->getKeys());
}
- initializeCache(seed, SeedSize, lcm->cache);
- memcpy(lcm->cache + CacheSize, lcm->cache, CacheShift);
- expandAesKeys((__m128i*)seed, lcm->keys);
- lcm->block = (uint8_t*)_mm_malloc(DatasetBlockSize, sizeof(__m128i));
- if (lcm->block == nullptr) {
- throw std::bad_alloc();
- }
- lcm->blockNumber = -1;
}
template
- void datasetInitLight(const void*, LightClientMemory*&);
+ void datasetInit(Cache*, dataset_t, uint32_t, uint32_t);
template
- void datasetInitLight(const void*, LightClientMemory*&);
+ void datasetInit(Cache*, dataset_t, uint32_t, uint32_t);
+
+ template
+ void datasetInitCache(const void* seed, dataset_t& ds) {
+ ds.cache = new Cache();
+ ds.cache->initialize(seed, SeedSize);
+ }
+
+ template
+ void datasetInitCache(const void*, dataset_t&);
+
+ template
+ void datasetInitCache(const void*, dataset_t&);
}
diff --git a/src/dataset.hpp b/src/dataset.hpp
index 42d63e6..bb29197 100644
--- a/src/dataset.hpp
+++ b/src/dataset.hpp
@@ -20,43 +20,30 @@ along with RandomX. If not, see.
#pragma once
#include
+#include
#include "intrinPortable.h"
-#include "argon2.h"
#include "common.hpp"
#include "softAes.h"
namespace RandomX {
- static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
-
- void initializeCache(const void* input, size_t inputLength, void* memory);
-
- template
- void expandAesKeys(const __m128i* seed, __m128i* keys);
-
- template
- inline __m128i aesenc(__m128i in, __m128i key) {
- return soft ? soft_aesenc(in, key) : _mm_aesenc_si128(in, key);
- }
-
- template
- inline __m128i aesdec(__m128i in, __m128i key) {
- return soft ? soft_aesdec(in, key) : _mm_aesdec_si128(in, key);
- }
+ using KeysContainer = std::array<__m128i, 10>;
template
- void initBlock(uint8_t* in, uint8_t* out, uint32_t blockNumber, const __m128i keys[10]);
+ void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
template
- void initBlock(uint8_t* cache, uint8_t* block, uint32_t blockNumber, const __m128i keys[10]);
+ void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
+
+ void datasetAlloc(dataset_t& ds);
template
- void datasetInit(const void* seed, uint8_t*& dataset);
+ void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount);
convertible_t datasetRead(addr_t addr, MemoryRegisters& memory);
template
- void datasetInitLight(const void* seed, LightClientMemory*& lcm);
+ void datasetInitCache(const void* seed, dataset_t& dataset);
template
convertible_t datasetReadLight(addr_t addr, MemoryRegisters& memory);
diff --git a/src/main.cpp b/src/main.cpp
index f3f8124..b486c1a 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -30,6 +30,10 @@ along with RandomX. If not, see.
#include "Program.hpp"
#include
#include "instructions.hpp"
+#include
+#include
+#include "dataset.hpp"
+#include "Cache.hpp"
const uint8_t seed[32] = { 191, 182, 222, 175, 249, 89, 134, 104, 241, 68, 191, 62, 162, 166, 61, 64, 123, 191, 227, 193, 118, 60, 188, 53, 223, 133, 175, 24, 123, 230, 55, 74 };
@@ -45,7 +49,6 @@ void outputHex(std::ostream& os, const char* data, int length) {
os << hexmap[(data[i] & 0xF0) >> 4];
os << hexmap[data[i] & 0x0F];
}
- os << std::endl;
}
void readOption(const char* option, int argc, char** argv, bool& out) {
@@ -58,6 +61,15 @@ void readOption(const char* option, int argc, char** argv, bool& out) {
out = false;
}
+void readIntOption(const char* option, int argc, char** argv, int& out, int defaultValue) {
+ for (int i = 0; i < argc - 1; ++i) {
+ if (strcmp(argv[i], option) == 0 && (out = atoi(argv[i + 1])) > 0) {
+ return;
+ }
+ }
+ out = defaultValue;
+}
+
void readInt(int argc, char** argv, int& out, int defaultValue) {
for (int i = 0; i < argc; ++i) {
if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) {
@@ -75,81 +87,144 @@ std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
return os;
}
-int main(int argc, char** argv) {
- bool softAes, lightClient, genAsm, compiled;
- int programCount;
- readOption("--softAes", argc, argv, softAes);
- readOption("--lightClient", argc, argv, lightClient);
- readOption("--genAsm", argc, argv, genAsm);
- readOption("--compiled", argc, argv, compiled);
- readInt(argc, argv, programCount, 1000);
+class AtomicHash {
+public:
+ AtomicHash() {
+ for (int i = 0; i < 4; ++i)
+ hash[i].store(0);
+ }
+ void xorWith(uint64_t update[4]) {
+ for (int i = 0; i < 4; ++i)
+ hash[i].fetch_xor(update[i]);
+ }
+ void print(std::ostream& os) {
+ for (int i = 0; i < 4; ++i)
+ print(hash[i], os);
+ os << std::endl;
+ }
+private:
+ void print(std::atomic& hash, std::ostream& os) {
+ auto h = hash.load();
+ outputHex(std::cout, (char*)&h, sizeof(h));
+ }
+ std::atomic hash[4];
+};
+void mine(RandomX::VirtualMachine* vm, std::atomic& atomicNonce, AtomicHash& result, int noncesCount, int thread) {
+ uint64_t hash[4];
unsigned char blockTemplate[] = {
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
0x5a, 0xc5, 0xfa, 0xd3, 0xaa, 0x3a, 0xf6, 0xea, 0x44, 0xc1, 0x18, 0x69, 0xdc, 0x4f, 0x85, 0x3f, 0x00, 0x2b, 0x2e,
0xea, 0x00, 0x00, 0x00, 0x00, 0x77, 0xb2, 0x06, 0xa0, 0x2c, 0xa5, 0xb1, 0xd4, 0xce, 0x6b, 0xbf, 0xdf, 0x0a, 0xca,
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
};
- int* nonce = (int*)(blockTemplate + 39);
- uint8_t hash[RandomX::ResultSize];
+ int* noncePtr = (int*)(blockTemplate + 39);
+ int nonce = atomicNonce.fetch_add(1);
- if (genAsm) {
- *nonce = programCount;
+ while (nonce < noncesCount) {
+ //std::cout << "Thread " << thread << " nonce " << nonce << std::endl;
+ *noncePtr = nonce;
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
- RandomX::AssemblyGeneratorX86 asmX86;
- asmX86.generateProgram(hash);
- asmX86.printCode(std::cout);
- return 0;
+ int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 63) << 8);
+ vm->initializeScratchpad(spIndex);
+ vm->initializeProgram(hash);
+ vm->execute();
+ vm->getResult(hash);
+ result.xorWith(hash);
+ if (RandomX::trace) {
+ std::cout << "Nonce: " << nonce << " ";
+ outputHex(std::cout, (char*)hash, sizeof(hash));
+ std::cout << std::endl;
+ }
+ nonce = atomicNonce.fetch_add(1);
}
+}
+
+int main(int argc, char** argv) {
+ bool softAes, lightClient, genAsm, compiled;
+ int programCount, threadCount;
+ readOption("--softAes", argc, argv, softAes);
+ readOption("--lightClient", argc, argv, lightClient);
+ readOption("--genAsm", argc, argv, genAsm);
+ readOption("--compiled", argc, argv, compiled);
+ readIntOption("--threads", argc, argv, threadCount, 1);
+ readIntOption("--nonces", argc, argv, programCount, 1000);
+
+ std::atomic atomicNonce(0);
+ AtomicHash result;
+ std::vector vms;
+ std::vector threads;
+ RandomX::dataset_t dataset;
if (softAes)
std::cout << "Using software AES." << std::endl;
-
- char cumulative[RandomX::ResultSize] = { 0 };
-
- RandomX::VirtualMachine* vm;
+ std::cout << "Initializing..." << std::endl;
try {
- if (compiled) {
- vm = new RandomX::CompiledVirtualMachine(softAes);
+ Stopwatch sw(true);
+ if (softAes) {
+ RandomX::datasetInitCache(seed, dataset);
}
else {
- vm = new RandomX::InterpretedVirtualMachine(softAes);
+ RandomX::datasetInitCache(seed, dataset);
}
- std::cout << "Initializing..." << std::endl;
- Stopwatch sw(true);
- vm->initializeDataset(seed, lightClient);
- if(lightClient)
+ if (RandomX::trace) {
+ std::cout << "Keys: " << std::endl;
+ for (int i = 0; i < dataset.cache->getKeys().size(); ++i) {
+ outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
+ }
+ std::cout << std::endl;
+ std::cout << "Cache: " << std::endl;
+ outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
+ std::cout << std::endl;
+ }
+ if (lightClient) {
std::cout << "Cache (64 MiB) initialized in " << sw.getElapsed() << " s" << std::endl;
- else
+ }
+ else {
+ RandomX::Cache* cache = dataset.cache;
+ RandomX::datasetAlloc(dataset);
+ auto perThread = RandomX::DatasetBlockCount / threadCount;
+ auto remainder = RandomX::DatasetBlockCount % threadCount;
+ for (int i = 0; i < threadCount; ++i) {
+ auto count = perThread + (i == threadCount - 1 ? remainder : 0);
+ if (softAes) {
+ threads.push_back(std::thread(&RandomX::datasetInit, cache, dataset, i * perThread, count));
+ }
+ else {
+ threads.push_back(std::thread(&RandomX::datasetInit, cache, dataset, i * perThread, count));
+ }
+ }
+ for (int i = 0; i < threads.size(); ++i) {
+ threads[i].join();
+ }
+ delete cache;
+ threads.clear();
std::cout << "Dataset (4 GiB) initialized in " << sw.getElapsed() << " s" << std::endl;
+ }
+ std::cout << "Initializing " << threadCount << " virtual machine(s)..." << std::endl;
+ for (int i = 0; i < threadCount; ++i) {
+ RandomX::VirtualMachine* vm;
+ if (compiled) {
+ vm = new RandomX::CompiledVirtualMachine(softAes);
+ }
+ else {
+ vm = new RandomX::InterpretedVirtualMachine(softAes);
+ }
+ vm->setDataset(dataset, lightClient);
+ vms.push_back(vm);
+ }
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
sw.restart();
- for (int i = 0; i < programCount; ++i) {
- *nonce = i;
- if (RandomX::trace) std::cout << "Nonce: " << i << " ";
- blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
- int spIndex = hash[24] | ((hash[25] & 63) << 8);
- vm->initializeScratchpad(spIndex);
- //dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, "scratchpad-before.txt");
- //return 0;
- vm->initializeProgram(hash);
- vm->execute();
- /*std::string fileName("scratchpad-after-");
- fileName = fileName + std::to_string(i) + ".txt";
- dump((const char *)vm.getScratchpad(), RandomX::ScratchpadSize, fileName.c_str());*/
- vm->getResult(hash);
- if (RandomX::trace) {
- outputHex(std::cout, (char*)hash, sizeof(hash));
- }
- ((uint64_t*)cumulative)[0] ^= ((uint64_t*)hash)[0];
- ((uint64_t*)cumulative)[1] ^= ((uint64_t*)hash)[1];
- ((uint64_t*)cumulative)[2] ^= ((uint64_t*)hash)[2];
- ((uint64_t*)cumulative)[3] ^= ((uint64_t*)hash)[3];
+ for (int i = 0; i < vms.size(); ++i) {
+ threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i));
+ }
+ for (int i = 0; i < threads.size(); ++i) {
+ threads[i].join();
}
double elapsed = sw.getElapsed();
std::cout << "Calculated result: ";
- outputHex(std::cout, cumulative, sizeof(cumulative));
+ result.print(std::cout);
if(programCount == 1000)
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;