Implemented virtual memory free

Removed legacy AES code
This commit is contained in:
tevador 2019-03-08 11:46:03 +01:00
parent 6e8c83fdb6
commit 096a7c0d7b
9 changed files with 44 additions and 134 deletions

View File

@ -17,11 +17,8 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
// Parts of this file are originally copyright (c) xmr-stak
#include <cstring>
#include "Cache.hpp"
#include "softAes.h"
#include "argon2.h"
#include "argon2_core.h"
@ -29,52 +26,6 @@ namespace RandomX {
static_assert(ArgonMemorySize % (ArgonLanes * ARGON2_SYNC_POINTS) == 0, "ArgonMemorySize - invalid value");
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static inline __m128i sl_xor(__m128i tmp1) {
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
template<uint8_t rcon, bool soft>
static inline void aesGenKeys(__m128i* xout0, __m128i* xout2) {
__m128i xout1 = soft ? soft_aeskeygenassist(*xout2, rcon) : _mm_aeskeygenassist_si128(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF);
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = soft ? soft_aeskeygenassist(*xout0, 0x00) : _mm_aeskeygenassist_si128(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA);
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
template<bool soft>
static inline void expandAesKeys(const __m128i* seed, __m128i* keys) {
__m128i xout0, xout2;
xout0 = _mm_load_si128(seed);
xout2 = _mm_load_si128(seed + 1);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x01, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x02, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x04, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
aesGenKeys<0x08, soft>(&xout0, &xout2);
*keys++ = xout0;
*keys++ = xout2;
}
void Cache::argonFill(const void* seed, size_t seedSize) {
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
@ -128,16 +79,8 @@ namespace RandomX {
fill_memory_blocks(&instance);
}
template<bool softAes>
void Cache::initialize(const void* seed, size_t seedSize) {
//Argon2d memory fill
argonFill(seed, seedSize);
//AES keys
expandAesKeys<softAes>((__m128i*)seed, keys.data());
}
template void Cache::initialize<true>(const void*, size_t);
template void Cache::initialize<false>(const void*, size_t);
}

View File

@ -42,7 +42,7 @@ namespace RandomX {
}
static void dealloc(Cache* cache, bool largePages) {
if (largePages) {
//allocLargePagesMemory(sizeof(Cache));
freePagedMemory(cache, sizeof(Cache));
}
else {
_mm_free(cache);
@ -59,18 +59,12 @@ namespace RandomX {
_mm_free(ptr);
}*/
template<bool softAes>
void initialize(const void* seed, size_t seedSize);
const KeysContainer& getKeys() const {
return keys;
}
const uint8_t* getCache() const {
return memory;
}
private:
alignas(16) KeysContainer keys;
uint8_t memory[CacheSize];
void argonFill(const void* seed, size_t seedSize);
};

View File

@ -350,7 +350,7 @@ namespace RandomX {
mem.mx &= CacheLineAlignMask;
Cache* cache = mem.ds.cache;
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize, cache->getKeys());
initBlock(cache->getCache(), (uint8_t*)datasetLine, mem.ma / CacheLineSize);
for (int i = 0; i < RegistersCount; ++i)
r[i] ^= datasetLine[i];
std::swap(mem.mx, mem.ma);

View File

@ -57,7 +57,7 @@ namespace RandomX {
#endif
uint32_t currentBlock = addr / CacheLineSize;
if (currentBlock != startBlock || output != currentLine.data()) {
initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock, cache->getKeys());
initBlock(cache->getCache(), (uint8_t*)currentLine.data(), currentBlock);
}
else {
sync();
@ -86,7 +86,7 @@ namespace RandomX {
template<bool softAes>
void LightClientAsyncWorker<softAes>::getBlocks(void* out, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = 0; i < blockCount; ++i) {
initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i, cache->getKeys());
initBlock(cache->getCache(), (uint8_t*)out + CacheLineSize * i, startBlock + i);
}
}
@ -108,7 +108,7 @@ namespace RandomX {
std::cout << sw.getElapsed() << ": runWorker-getBlocks " << startBlock << "/" << blockCount << std::endl;
#endif
//getBlocks(output, startBlock, blockCount);
initBlock(cache->getCache(), (uint8_t*)output, startBlock, cache->getKeys());
initBlock(cache->getCache(), (uint8_t*)output, startBlock);
hasWork = false;
#ifdef TRACE
std::cout << sw.getElapsed() << ": runWorker-finished " << startBlock << "/" << blockCount << std::endl;

View File

@ -39,36 +39,36 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX {
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
void initBlock(const uint8_t* cache, uint8_t* out, uint32_t blockNumber) {
uint64_t c0, c1, c2, c3, c4, c5, c6, c7;
r0 = 4ULL * blockNumber;
r1 = r2 = r3 = r4 = r5 = r6 = r7 = 0;
c0 = 4ULL * blockNumber;
c1 = c2 = c3 = c4 = c5 = c6 = c7 = 0;
constexpr uint32_t mask = (CacheSize - 1) & CacheLineAlignMask;
for (auto i = 0; i < DatasetIterations; ++i) {
const uint8_t* mixBlock = cache + (r0 & mask);
const uint8_t* mixBlock = cache + (c0 & mask);
PREFETCHNTA(mixBlock);
r0 = squareHash(r0);
r0 ^= load64(mixBlock + 0);
r1 ^= load64(mixBlock + 8);
r2 ^= load64(mixBlock + 16);
r3 ^= load64(mixBlock + 24);
r4 ^= load64(mixBlock + 32);
r5 ^= load64(mixBlock + 40);
r6 ^= load64(mixBlock + 48);
r7 ^= load64(mixBlock + 56);
c0 = squareHash(c0);
c0 ^= load64(mixBlock + 0);
c1 ^= load64(mixBlock + 8);
c2 ^= load64(mixBlock + 16);
c3 ^= load64(mixBlock + 24);
c4 ^= load64(mixBlock + 32);
c5 ^= load64(mixBlock + 40);
c6 ^= load64(mixBlock + 48);
c7 ^= load64(mixBlock + 56);
}
store64(out + 0, r0);
store64(out + 8, r1);
store64(out + 16, r2);
store64(out + 24, r3);
store64(out + 32, r4);
store64(out + 40, r5);
store64(out + 48, r6);
store64(out + 56, r7);
store64(out + 0, c0);
store64(out + 8, c1);
store64(out + 16, c2);
store64(out + 24, c3);
store64(out + 32, c4);
store64(out + 40, c5);
store64(out + 48, c6);
store64(out + 56, c7);
}
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile& reg) {
@ -86,7 +86,7 @@ namespace RandomX {
memory.mx &= CacheLineAlignMask; //align to cache line
Cache* cache = memory.ds.cache;
uint64_t datasetLine[CacheLineSize / sizeof(uint64_t)];
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize, cache->getKeys());
initBlock(cache->getCache(), (uint8_t*)datasetLine, memory.ma / CacheLineSize);
for (int i = 0; i < RegistersCount; ++i)
reg[i] ^= datasetLine[i];
std::swap(memory.mx, memory.ma);
@ -119,31 +119,12 @@ namespace RandomX {
void datasetInit(Cache* cache, dataset_t ds, uint32_t startBlock, uint32_t blockCount) {
for (uint32_t i = startBlock; i < startBlock + blockCount; ++i) {
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i, cache->getKeys());
initBlock(cache->getCache(), ds.dataset + i * CacheLineSize, i);
}
}
template<bool softAes>
void datasetInitCache(const void* seed, dataset_t& ds, bool largePages) {
ds.cache = new(Cache::alloc(largePages)) Cache();
ds.cache->initialize<softAes>(seed, SeedSize);
ds.cache->initialize(seed, SeedSize);
}
template
void datasetInitCache<false>(const void*, dataset_t&, bool);
template
void datasetInitCache<true>(const void*, dataset_t&, bool);
template<bool softAes>
void aesBench(uint32_t blockCount) {
alignas(16) KeysContainer keys;
alignas(16) uint8_t buffer[CacheLineSize];
for (uint32_t block = 0; block < blockCount; ++block) {
initBlock(buffer, buffer, 0, keys);
}
}
template void aesBench<false>(uint32_t blockCount);
template void aesBench<true>(uint32_t blockCount);
}

View File

@ -20,18 +20,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once
#include <cstdint>
#include <array>
#include "intrinPortable.h"
#include "common.hpp"
namespace RandomX {
using KeysContainer = std::array<__m128i, 10>;
template<bool soft, bool enc>
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys);
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber);
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber, const KeysContainer& keys);
void initBlock(const uint8_t* cache, uint8_t* block, uint32_t blockNumber);
void datasetAlloc(dataset_t& ds, bool largePages);
@ -39,14 +36,10 @@ namespace RandomX {
void datasetRead(addr_t addr, MemoryRegisters& memory, RegisterFile&);
template<bool softAes>
void datasetInitCache(const void* seed, dataset_t& dataset, bool largePages);
void datasetReadLight(addr_t addr, MemoryRegisters& memory, int_reg_t(&reg)[RegistersCount]);
void datasetReadLightAsync(addr_t addr, MemoryRegisters& memory, int_reg_t(&reg)[RegistersCount]);
template<bool softAes>
void aesBench(uint32_t blockCount);
}

View File

@ -256,18 +256,8 @@ int main(int argc, char** argv) {
try {
Stopwatch sw(true);
if (softAes) {
RandomX::datasetInitCache<true>(seed, dataset, largePages);
}
else {
RandomX::datasetInitCache<false>(seed, dataset, largePages);
}
RandomX::datasetInitCache(seed, dataset, largePages);
if (RandomX::trace) {
std::cout << "Keys: " << std::endl;
for (unsigned i = 0; i < dataset.cache->getKeys().size(); ++i) {
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
}
std::cout << std::endl;
std::cout << "Cache: " << std::endl;
outputHex(std::cout, (char*)dataset.cache->getCache(), sizeof(__m128i));
std::cout << std::endl;

View File

@ -109,4 +109,12 @@ void* allocLargePagesMemory(std::size_t bytes) {
throw std::runtime_error("allocLargePagesMemory - mmap failed");
#endif
return mem;
}
}
void freePagedMemory(void* ptr, std::size_t bytes) {
#ifdef _WIN32
VirtualFree(ptr, 0, MEM_RELEASE);
#else
munmap(ptr, bytes);
#endif
}

View File

@ -22,4 +22,5 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <cstddef>
void* allocExecutableMemory(std::size_t);
void* allocLargePagesMemory(std::size_t);
void* allocLargePagesMemory(std::size_t);
void freePagedMemory(void*, std::size_t);