Updated documentation and test vectors

Added AesGenerator1R test
Added benchmark hints if large pages fail
This commit is contained in:
tevador 2019-06-22 17:42:26 +02:00
parent 91cd35ff13
commit 8282413154
5 changed files with 67 additions and 23 deletions

View file

@ -157,7 +157,11 @@ The IADD_RS instruction utilizes the address calculation logic of CPUs and can b
Because integer division is not fully pipelined in CPUs and can be made faster in ASICs, the IMUL_RCP instruction requires only one division per program to calculate the reciprocal. This forces an ASIC to include a hardware divider without giving them a performance advantage during program execution. Because integer division is not fully pipelined in CPUs and can be made faster in ASICs, the IMUL_RCP instruction requires only one division per program to calculate the reciprocal. This forces an ASIC to include a hardware divider without giving them a performance advantage during program execution.
#### 2.4.3 ISWAP_R #### 2.4.3 IROR_R/IROL_R
Rotation instructions are split between rotate right and rotate left with a 4:1 ratio. Rotate right has a higher frequency because some architecures (like ARM) don't support rotate left natively (it must be emulated using rotate right).
#### 2.4.4 ISWAP_R
This instruction can be executed efficiently by CPUs that support register renaming/move elimination. This instruction can be executed efficiently by CPUs that support register renaming/move elimination.

View file

@ -567,8 +567,8 @@ For integer instructions, the destination is always an integer register (registe
|2/256|INEG_R|R|-|-|`dst = -dst`| |2/256|INEG_R|R|-|-|`dst = -dst`|
|15/256|IXOR_R|R|R|`src = imm32`|`dst = dst ^ src`| |15/256|IXOR_R|R|R|`src = imm32`|`dst = dst ^ src`|
|5/256|IXOR_M|R|R|`src = 0`|`dst = dst ^ [mem]`| |5/256|IXOR_M|R|R|`src = 0`|`dst = dst ^ [mem]`|
|10/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`| |8/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`|
|0/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`| |2/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`|
|4/256|ISWAP_R|R|R|`src = dst`|`temp = src; src = dst; dst = temp`| |4/256|ISWAP_R|R|R|`src = dst`|`temp = src; src = dst; dst = temp`|
#### 5.2.1 IADD_RS #### 5.2.1 IADD_RS
@ -616,13 +616,13 @@ All floating point operations are rounded according to the current value of the
|frequency|instruction|dst|src|operation| |frequency|instruction|dst|src|operation|
|-|-|-|-|-| |-|-|-|-|-|
|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`| |4/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
|20/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`| |16/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`|
|5/256|FADD_M|F|R|`(dst0, dst1) = (dst0 + [mem][0], dst1 + [mem][1])`| |5/256|FADD_M|F|R|`(dst0, dst1) = (dst0 + [mem][0], dst1 + [mem][1])`|
|20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`| |16/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`|
|5/256|FSUB_M|F|R|`(dst0, dst1) = (dst0 - [mem][0], dst1 - [mem][1])`| |5/256|FSUB_M|F|R|`(dst0, dst1) = (dst0 - [mem][0], dst1 - [mem][1])`|
|6/256|FSCAL_R|F|-|<code>(dst0, dst1) = (-2<sup>x0</sup> * dst0, -2<sup>x1</sup> * dst1)</code>| |6/256|FSCAL_R|F|-|<code>(dst0, dst1) = (-2<sup>x0</sup> * dst0, -2<sup>x1</sup> * dst1)</code>|
|20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`| |32/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`|
|4/256|FDIV_M|E|R|`(dst0, dst1) = (dst0 / [mem][0], dst1 / [mem][1])`| |4/256|FDIV_M|E|R|`(dst0, dst1) = (dst0 / [mem][0], dst1 / [mem][1])`|
|6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`| |6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`|

View file

@ -38,6 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "utility.hpp" #include "utility.hpp"
#include "../randomx.h" #include "../randomx.h"
#include "../blake2/endian.h" #include "../blake2/endian.h"
#include "../common.hpp"
#ifdef _WIN32
#include <windows.h>
#include <VersionHelpers.h>
#endif
const uint8_t blockTemplate_[] = { const uint8_t blockTemplate_[] = {
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
@ -84,6 +89,19 @@ void printUsage(const char* executable) {
std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl; std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl;
} }
struct MemoryException : public std::exception {
};
struct CacheAllocException : public MemoryException {
const char * what() const throw () {
return "Cache allocation failed";
}
};
struct DatasetAllocException : public MemoryException {
const char * what() const throw () {
return "Dataset allocation failed";
}
};
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) { void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) {
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)]; uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
uint8_t blockTemplate[sizeof(blockTemplate_)]; uint8_t blockTemplate[sizeof(blockTemplate_)];
@ -118,7 +136,7 @@ int main(int argc, char** argv) {
store32(&seed, seedValue); store32(&seed, seedValue);
std::cout << "RandomX benchmark" << std::endl; std::cout << "RandomX benchmark v1.0.4" << std::endl;
if (help || (!miningMode && !verificationMode)) { if (help || (!miningMode && !verificationMode)) {
printUsage(argv[0]); printUsage(argv[0]);
@ -171,19 +189,20 @@ int main(int argc, char** argv) {
std::cout << " ..." << std::endl; std::cout << " ..." << std::endl;
try { try {
if (jit && !RANDOMX_HAVE_COMPILER) {
throw std::runtime_error("JIT compilation is not supported on this platform");
}
Stopwatch sw(true); Stopwatch sw(true);
cache = randomx_alloc_cache(flags); cache = randomx_alloc_cache(flags);
if (cache == nullptr) { if (cache == nullptr) {
if (jit) { throw CacheAllocException();
throw std::runtime_error("JIT compilation is not supported or cache allocation failed");
}
throw std::runtime_error("Cache allocation failed");
} }
randomx_init_cache(cache, &seed, sizeof(seed)); randomx_init_cache(cache, &seed, sizeof(seed));
if (miningMode) { if (miningMode) {
dataset = randomx_alloc_dataset(flags); dataset = randomx_alloc_dataset(flags);
if (dataset == nullptr) { if (dataset == nullptr) {
throw std::runtime_error("Dataset allocation failed"); throw DatasetAllocException();
} }
uint32_t datasetItemCount = randomx_dataset_item_count(); uint32_t datasetItemCount = randomx_dataset_item_count();
if (initThreadCount > 1) { if (initThreadCount > 1) {
@ -241,7 +260,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: "; std::cout << "Calculated result: ";
result.print(std::cout); result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0) if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: a925d346195ef38048e714709e0b24a88fef565fa02fa97127e00fac08ee6eb8" << std::endl; std::cout << "Reference result: 38d47ea494480bff8d621189e8e92747288bb1da6c75dc401f2ab4b6807b6010" << std::endl;
if (!miningMode) { if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
} }
@ -249,6 +268,20 @@ int main(int argc, char** argv) {
std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl; std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl;
} }
} }
catch (MemoryException& e) {
std::cout << "ERROR: " << e.what() << std::endl;
if (largePages) {
#ifdef _WIN32
std::cout << "To use large pages, please enable the \"Lock Pages in Memory\" policy and reboot." << std::endl;
if (!IsWindows8OrGreater()) {
std::cout << "Additionally, you have to run the benchmark from elevated command prompt." << std::endl;
}
#else
std::cout << "To use large pages, please run: sudo sysctl -w vm.nr_hugepages=1250" << std::endl;
#endif
}
return 1;
}
catch (std::exception& e) { catch (std::exception& e) {
std::cout << "ERROR: " << e.what() << std::endl; std::cout << "ERROR: " << e.what() << std::endl;
return 1; return 1;

View file

@ -14,6 +14,7 @@
#include "../reciprocal.h" #include "../reciprocal.h"
#include "../intrin_portable.h" #include "../intrin_portable.h"
#include "../jit_compiler.hpp" #include "../jit_compiler.hpp"
#include "../aes_hash.hpp"
struct CacheKey { struct CacheKey {
void* key; void* key;
@ -146,6 +147,13 @@ int main() {
assert(datasetItem[0] == 0x145a5091f7853099); assert(datasetItem[0] == 0x145a5091f7853099);
}); });
runTest("AesGenerator1R", true, []() {
char state[64] = { 0 };
hex2bin("6c19536eb2de31b6c0065f7f116e86f960d8af0c57210a6584c3237b9d064dc7", 64, state);
fillAes1Rx4<true>(state, sizeof(state), state);
assert(equalsHex(state, "fa89397dd6ca422513aeadba3f124b5540324c4ad4b6db434394307a17c833ab"));
});
runTest("randomx_reciprocal", true, []() { runTest("randomx_reciprocal", true, []() {
assert(randomx_reciprocal(3) == 12297829382473034410U); assert(randomx_reciprocal(3) == 12297829382473034410U);
assert(randomx_reciprocal(13) == 11351842506898185609U); assert(randomx_reciprocal(13) == 11351842506898185609U);
@ -959,35 +967,34 @@ int main() {
auto test_a = [&] { auto test_a = [&] {
char hash[RANDOMX_HASH_SIZE]; char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "This is a test", &hash); calcStringHash("test key 000", "This is a test", &hash);
assert(equalsHex(hash, "207d7cedf2a16590bd33d758e413ad129ce9888e05417984f46296252a7ba3d0")); assert(equalsHex(hash, "b33f8d10a8655d6f1925e3754adeb0a6da4c2f48a81cd4c220a412f1ef016a15"));
}; };
auto test_b = [&] { auto test_b = [&] {
char hash[RANDOMX_HASH_SIZE]; char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash); calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
assert(equalsHex(hash, "76dd2da840d56d38153e0beaca33e7f862c5ead91a052380d99f3a62bf84579b")); assert(equalsHex(hash, "62ac336786ad3a7aff990beb2f643bd748d81dba585a52149d0baebdea0e9823"));
}; };
auto test_c = [&] { auto test_c = [&] {
char hash[RANDOMX_HASH_SIZE]; char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); calcStringHash("test key 000", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash);
assert(equalsHex(hash, "109f6a405efe09d302336dce4389127e33aa62d4c782aca7797a628e87839a61")); assert(equalsHex(hash, "6c550ebe765f7b784d2c183552fbb6048b58f17a3f115baf2b968724eb2f7a23"));
}; };
auto test_d = [&] { auto test_d = [&] {
char hash[RANDOMX_HASH_SIZE]; char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 001", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); calcStringHash("test key 001", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash);
assert(equalsHex(hash, "3cbb82edf9541ab80233cdc47384cea719c8567a8bbaca8f3ff038488ce9c16c")); assert(equalsHex(hash, "cb602b9c498b67e31e519fbdc07e288de46f949b14ad620380df6250eaffbd4e"));
}; };
auto test_e = [&] { auto test_e = [&] {
char hash[RANDOMX_HASH_SIZE]; char hash[RANDOMX_HASH_SIZE];
calcHexHash("test key 001", "0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416", &hash); calcHexHash("test key 001", "0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416", &hash);
//std::cout << std::endl;
//outputHex(std::cout, (const char*)hash, sizeof(hash)); //outputHex(std::cout, (const char*)hash, sizeof(hash));
//std::cout << std::endl; //std::cout << std::endl;
assert(equalsHex(hash, "f60caf300917760337e8ce51487484e6a33d4aaa15aa79c985efb4ea00390918"));
assert(equalsHex(hash, "e003ef128b1f96d99d4a0490e03253ef11186002a8ec018cbd4e07b8ec8c82e8"));
}; };
runTest("Hash test 1a (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a); runTest("Hash test 1a (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a);

View file

@ -52,7 +52,7 @@ char parseNibble(char hex) {
return hex; return hex;
} }
void hex2bin(char *in, int length, char *out) { void hex2bin(const char *in, int length, char *out) {
for (int i = 0; i < length; i += 2) { for (int i = 0; i < length; i += 2) {
char nibble1 = parseNibble(*in++); char nibble1 = parseNibble(*in++);
char nibble2 = parseNibble(*in++); char nibble2 = parseNibble(*in++);
@ -67,7 +67,7 @@ constexpr bool stringsEqual(char const * a, char const * b) {
template<size_t N> template<size_t N>
bool equalsHex(const void* hash, const char (&hex)[N]) { bool equalsHex(const void* hash, const char (&hex)[N]) {
char reference[N / 2]; char reference[N / 2];
hex2bin((char*)hex, N - 1, reference); hex2bin(hex, N - 1, reference);
return memcmp(hash, reference, sizeof(reference)) == 0; return memcmp(hash, reference, sizeof(reference)) == 0;
} }