mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Memory-bound dataset initialization
This commit is contained in:
parent
4fb168e249
commit
89bc68d093
2 changed files with 34 additions and 33 deletions
|
@ -34,8 +34,8 @@ namespace RandomX {
|
|||
constexpr int SeedSize = 32;
|
||||
constexpr int ResultSize = 32;
|
||||
|
||||
constexpr int ArgonIterations = 6;
|
||||
constexpr uint32_t ArgonMemorySize = 131072; //KiB
|
||||
constexpr int ArgonIterations = 3;
|
||||
constexpr uint32_t ArgonMemorySize = 262144; //KiB
|
||||
constexpr int ArgonLanes = 1;
|
||||
const char ArgonSalt[] = "Monero\x1A$";
|
||||
constexpr int ArgonSaltSize = sizeof(ArgonSalt) - 1;
|
||||
|
@ -46,7 +46,7 @@ namespace RandomX {
|
|||
constexpr int CacheBlockCount = CacheSize / CacheLineSize;
|
||||
constexpr int BlockExpansionRatio = DatasetSize / CacheSize;
|
||||
constexpr int DatasetBlockCount = BlockExpansionRatio * CacheBlockCount;
|
||||
constexpr int DatasetIterations = 32;
|
||||
constexpr int DatasetIterations = 10;
|
||||
|
||||
|
||||
#ifdef TRACE
|
||||
|
|
|
@ -62,42 +62,43 @@ namespace RandomX {
|
|||
x3 = aesenc<soft>(x3, keys[i])
|
||||
|
||||
template<bool soft>
|
||||
void initBlock(const uint8_t* in, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||
__m128i x0, x1, x2, x3, iv;
|
||||
//block number 0..67108863
|
||||
//Initialization vector = block number extended to 128 bits
|
||||
iv = _mm_cvtsi32_si128(blockNumber);
|
||||
uint32_t cacheBlockNumber = blockNumber / BlockExpansionRatio; //0..2097151
|
||||
__m128i* cacheCacheLine = (__m128i*)(in + cacheBlockNumber * CacheLineSize);
|
||||
__m128i* datasetCacheLine = (__m128i*)out;
|
||||
void initBlock(const uint8_t* intermediate, uint8_t* out, uint32_t blockNumber, const KeysContainer& keys) {
|
||||
__m128i x0, x1, x2, x3;
|
||||
|
||||
x0 = _mm_load_si128(cacheCacheLine + 0);
|
||||
x1 = _mm_load_si128(cacheCacheLine + 1);
|
||||
x2 = _mm_load_si128(cacheCacheLine + 2);
|
||||
x3 = _mm_load_si128(cacheCacheLine + 3);
|
||||
__m128i* xit = (__m128i*)intermediate;
|
||||
__m128i* xout = (__m128i*)out;
|
||||
|
||||
x0 = _mm_xor_si128(x0, iv);
|
||||
x1 = _mm_xor_si128(x1, iv);
|
||||
x2 = _mm_xor_si128(x2, iv);
|
||||
x3 = _mm_xor_si128(x3, iv);
|
||||
x0 = _mm_cvtsi32_si128(blockNumber);
|
||||
constexpr int mask = (CacheSize / CacheLineSize) - 1;
|
||||
|
||||
for (auto i = 0; i < DatasetIterations; ++i) {
|
||||
AES_ROUND(0);
|
||||
AES_ROUND(1);
|
||||
AES_ROUND(2);
|
||||
AES_ROUND(3);
|
||||
AES_ROUND(4);
|
||||
AES_ROUND(5);
|
||||
AES_ROUND(6);
|
||||
AES_ROUND(7);
|
||||
AES_ROUND(8);
|
||||
AES_ROUND(9);
|
||||
x0 = aesenc<soft>(x0, keys[0]);
|
||||
x0 = aesenc<soft>(x0, keys[1]);
|
||||
x1 = aesenc<soft>(x0, keys[2]);
|
||||
x1 = aesenc<soft>(x1, keys[3]);
|
||||
x2 = aesenc<soft>(x1, keys[4]);
|
||||
x2 = aesenc<soft>(x2, keys[5]);
|
||||
x3 = aesenc<soft>(x2, keys[6]);
|
||||
x3 = aesenc<soft>(x3, keys[7]);
|
||||
|
||||
int index = _mm_cvtsi128_si32(x3);
|
||||
index &= mask;
|
||||
|
||||
__m128i t0 = _mm_load_si128(xit + 4 * index + 0);
|
||||
__m128i t1 = _mm_load_si128(xit + 4 * index + 1);
|
||||
__m128i t2 = _mm_load_si128(xit + 4 * index + 2);
|
||||
__m128i t3 = _mm_load_si128(xit + 4 * index + 3);
|
||||
|
||||
x0 = _mm_xor_si128(x0, t0);
|
||||
x1 = _mm_xor_si128(x1, t1);
|
||||
x2 = _mm_xor_si128(x2, t2);
|
||||
x3 = _mm_xor_si128(x3, t3);
|
||||
}
|
||||
|
||||
_mm_store_si128(datasetCacheLine + 0, x0);
|
||||
_mm_store_si128(datasetCacheLine + 1, x1);
|
||||
_mm_store_si128(datasetCacheLine + 2, x2);
|
||||
_mm_store_si128(datasetCacheLine + 3, x3);
|
||||
_mm_store_si128(xout + 0, x0);
|
||||
_mm_store_si128(xout + 1, x1);
|
||||
_mm_store_si128(xout + 2, x2);
|
||||
_mm_store_si128(xout + 3, x3);
|
||||
}
|
||||
|
||||
template
|
||||
|
|
Loading…
Reference in a new issue