diff --git a/README.md b/README.md index 2c9b876..fed319c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ - - - # RandomX RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs. RandomX uses random code execution (hence the name) together with several memory-hard techniques to achieve the following goals: @@ -26,7 +23,7 @@ The structure of the VM mimics the components that are found in a typical genera The VM executes programs in a special instruction set, which was designed in such way that any random 8-byte word is a valid instruction and any sequence of valid instructions is a valid program. For more details see [RandomX ISA documentation](doc/isa.md). Because there are no "syntax" rules, generating a random program is as easy as filling the program buffer with random data. A RandomX program consists of 256 instructions. See [program.inc](src/program.inc) as an example of a RandomX program translated into x86-64 assembly. -#### Hash calculation +### Hash calculation Calculating a RandomX hash consists of initializing the 2 MiB scratchpad with random data, executing 8 RandomX loops and calculating a hash of the scratchpad. @@ -40,15 +37,27 @@ Hash of the register state after 2048 interations is used to initialize the rand The loads from the dataset are fully prefetched, so they don't slow down the loop. -RandomX uses the [Blake2b](https://en.wikipedia.org/wiki/BLAKE_%28hash_function%29#BLAKE2) cryptographic hash function. Special hashing functions based on [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) encryption are used to initialize and hash the scratchpad. +RandomX uses the [Blake2b](https://en.wikipedia.org/wiki/BLAKE_%28hash_function%29#BLAKE2) cryptographic hash function. Special hashing functions `fillAes1Rx4` and `hashAes1Rx4` based on [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) encryption are used to initialize and hash the scratchpad ([hashAes1Rx4.cpp](src/hashAes1Rx4.cpp)). -#### Hash verification +### Hash verification RandomX is a symmetric PoW algorithm, so the verifying party has to repeat the same steps as when a hash is calculated. However, to allow hash verification on devices that cannot store the whole 4 GiB dataset, RandomX allows a time-memory tradeoff by using just 256 MiB of memory at the cost of 16 times more random memory accesses. See [Dataset initialization](doc/dataset.md) for more details. -#### Documentation +### Performance +Preliminary mining performance with the x86-64 JIT compiled VM: + +|CPU|RAM|threads|hashrate [H/s]|comment| +|-----|-----|----|----------|-----| +|AMD Ryzen 1700|DDR4-2933|8|4100| +|Intel i5-3230M|DDR3-1333|1|280|without large pages +|Intel i7-8550U|DDR4-2400|4|1200|limited by thermals +|Intel i5-2500K|DDR3-1333|3|1350| + +Hash verification is performed using the portable interpreter in "light-client mode" and takes 30-70 ms depending on RAM latency and CPU clock speed. Hash verification in "mining mode" takes 2-4 ms. + +### Documentation * [RandomX ISA](doc/isa.md) * [RandomX instruction listing](doc/isa-ops.md) * [Dataset initialization](doc/dataset.md) diff --git a/makefile b/makefile index 87fef86..77788dc 100644 --- a/makefile +++ b/makefile @@ -11,12 +11,12 @@ SRCDIR=src OBJDIR=obj LDFLAGS=-lpthread TOBJS=$(addprefix $(OBJDIR)/,instructionsPortable.o TestAluFpu.o) -ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o t1ha2.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o AddressTransform.o hashAes1Rx4.o) +ROBJS=$(addprefix $(OBJDIR)/,argon2_core.o argon2_ref.o AssemblyGeneratorX86.o blake2b.o CompiledVirtualMachine.o dataset.o JitCompilerX86.o instructionsPortable.o Instruction.o InterpretedVirtualMachine.o main.o Program.o softAes.o VirtualMachine.o Cache.o virtualMemory.o divideByConstantCodegen.o LightClientAsyncWorker.o hashAes1Rx4.o) ifeq ($(PLATFORM),x86_64) ROBJS += $(OBJDIR)/JitCompilerX86-static.o $(OBJDIR)/squareHash.o endif -all: release test +all: release release: CXXFLAGS += -march=native -O3 -flto release: CCFLAGS += -march=native -O3 -flto @@ -41,11 +41,8 @@ $(BINDIR)/randomx: $(ROBJS) | $(BINDIR) $(BINDIR)/AluFpuTest: $(TOBJS) | $(BINDIR) $(CXX) $(TOBJS) $(LDFLAGS) -o $@ -$(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp Pcg32.hpp) | $(OBJDIR) +$(OBJDIR)/TestAluFpu.o: $(addprefix $(SRCDIR)/,TestAluFpu.cpp instructions.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/TestAluFpu.cpp -o $@ - -$(OBJDIR)/AddressTransform.o: $(addprefix $(SRCDIR)/,AddressTransform.cpp InterpretedVirtualMachine.hpp common.hpp) | $(OBJDIR) - $(CXX) $(CXXFLAGS) -c $(SRCDIR)/AddressTransform.cpp -o $@ $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blake2/blake2.h blake2/blake2-impl.h) | $(OBJDIR) $(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_core.c -o $@ @@ -53,16 +50,16 @@ $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blak $(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h) | $(OBJDIR) $(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@ -$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp Pcg32.hpp common.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR) +$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp common.hpp instructionWeights.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/AssemblyGeneratorX86.cpp -o $@ $(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h) | $(OBJDIR) $(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@ -$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR) +$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@ -$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR) +$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@ $(OBJDIR)/divideByConstantCodegen.o: $(addprefix $(SRCDIR)/,divideByConstantCodegen.c divideByConstantCodegen.h) | $(OBJDIR) @@ -74,19 +71,19 @@ $(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h) | $(O $(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@ -$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read.inc)) | $(OBJDIR) +$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR) $(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@ $(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc)) | $(OBJDIR) $(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@ -$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR) +$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@ $(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/Instruction.cpp -o $@ -$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR) +$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp instructionWeights.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@ $(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR) @@ -95,10 +92,10 @@ $(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorke $(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@ -$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp Pcg32.hpp) | $(OBJDIR) +$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@ -$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp Pcg32.hpp argon2_core.h) | $(OBJDIR) +$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp argon2_core.h) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@ $(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR) @@ -109,9 +106,6 @@ $(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMac $(OBJDIR)/virtualMemory.o: $(addprefix $(SRCDIR)/,virtualMemory.cpp virtualMemory.hpp) | $(OBJDIR) $(CXX) $(CXXFLAGS) -c $(SRCDIR)/virtualMemory.cpp -o $@ - -$(OBJDIR)/t1ha2.o: $(addprefix $(SRCDIR)/t1ha/,t1ha2.c t1ha.h t1ha_bits.h) | $(OBJDIR) - $(CC) $(CCFLAGS) -c $(SRCDIR)/t1ha/t1ha2.c -o $@ $(OBJDIR): mkdir $(OBJDIR) diff --git a/src/AddressTransform.cpp b/src/AddressTransform.cpp deleted file mode 100644 index b8070a0..0000000 --- a/src/AddressTransform.cpp +++ /dev/null @@ -1,292 +0,0 @@ -/* -Copyright (c) 2019 tevador - -This file is part of RandomX. - -RandomX is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -RandomX is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with RandomX. If not, see. -*/ - -#include "common.hpp" -#include "InterpretedVirtualMachine.hpp" - -#include -#include -#include - -namespace RandomX { - - class Mul9Transform : public ITransform { - public: - Mul9Transform(int32_t cc) : c(cc) { - std::ostringstream oss; - oss << "mul9_" << std::hex << (cc & 255); - name = oss.str(); - } - int32_t apply(int32_t x) const override { - return 9 * x + c; - } - const char* getName() const override { - return name.c_str(); - } - std::ostream& printAsm(std::ostream& os) const override { - os << "lea ecx, [rcx+rcx*8" << std::showpos << c << "]" << std::noshowpos << std::endl; - return os; - } - std::ostream& printCxx(std::ostream& os) const override { - os << "static const Mul9Transform " << name << "(" << c << ");" << std::endl; - return os; - } - private: - int32_t c; - std::string name; - }; - - class AddTransform : public ITransform { - public: - AddTransform(int32_t cc) : c(cc) { - std::ostringstream oss; - oss << "add_" << std::hex << (cc & 255); - name = oss.str(); - } - int32_t apply(int32_t x) const override { - return x + c; - } - const char* getName() const override { - return name.c_str(); - } - std::ostream& printAsm(std::ostream& os) const override { - os << "db 64" << std::endl; - os << "add ecx, " << c << std::endl; - return os; - } - std::ostream& printCxx(std::ostream& os) const override { - os << "static const AddTransform " << name << "(" << c << ");" << std::endl; - return os; - } - private: - int32_t c; - std::string name; - }; - - class XorTransform : public ITransform { - public: - XorTransform(int32_t cc) : c(cc) { - std::ostringstream oss; - oss << "xor_" << std::hex << (cc & 255); - name = oss.str(); - } - int32_t apply(int32_t x) const override { - return x ^ c; - } - const char* getName() const override { - return name.c_str(); - } - std::ostream& printAsm(std::ostream& os) const override { - os << "db 64" << std::endl; - os << "xor ecx, " << c << std::endl; - return os; - } - std::ostream& printCxx(std::ostream& os) const override { - os << "static const XorTransform " << name << "(" << c << ");" << std::endl; - return os; - } - private: - int32_t c; - std::string name; - }; - - static const Mul9Transform mul9_6d(109); - static const XorTransform xor_60(96); - static const Mul9Transform mul9_ed(-19); - static const AddTransform add_9e(-98); - static const AddTransform add_eb(-21); - static const XorTransform xor_b0(-80); - static const Mul9Transform mul9_a4(-92); - static const AddTransform add_71(113); - static const Mul9Transform mul9_64(100); - static const AddTransform add_d9(-39); - static const XorTransform xor_78(120); - static const Mul9Transform mul9_89(-119); - static const AddTransform add_8f(-113); - static const AddTransform add_6f(111); - static const XorTransform xor_68(104); - static const Mul9Transform mul9_ad(-83); - static const Mul9Transform mul9_7f(127); - static const XorTransform xor_90(-112); - static const AddTransform add_59(89); - static const AddTransform add_e0(-32); - static const AddTransform add_68(104); - static const XorTransform xor_88(-120); - static const XorTransform xor_18(24); - static const Mul9Transform mul9_9(9); - static const AddTransform add_e1(-31); - static const XorTransform xor_f0(-16); - static const AddTransform add_44(68); - static const Mul9Transform mul9_92(-110); - static const XorTransform xor_40(64); - static const XorTransform xor_d8(-40); - static const XorTransform xor_f8(-8); - static const AddTransform add_f6(-10); - static const XorTransform xor_e0(-32); - static const AddTransform add_e(14); - static const Mul9Transform mul9_d2(-46); - static const XorTransform xor_98(-104); - static const Mul9Transform mul9_24(36); - static const AddTransform add_64(100); - static const Mul9Transform mul9_bf(-65); - static const Mul9Transform mul9_1b(27); - static const Mul9Transform mul9_5b(91); - static const AddTransform add_9b(-101); - static const AddTransform add_a2(-94); - static const Mul9Transform mul9_f6(-10); - static const XorTransform xor_50(80); - static const AddTransform add_94(-108); - static const AddTransform add_c6(-58); - static const XorTransform xor_30(48); - static const Mul9Transform mul9_49(73); - static const XorTransform xor_d0(-48); - static const XorTransform xor_20(32); - static const XorTransform xor_a0(-96); - static const AddTransform add_76(118); - static const AddTransform add_5b(91); - static const Mul9Transform mul9_12(18); - static const AddTransform add_f5(-11); - static const Mul9Transform mul9_3f(63); - static const AddTransform add_72(114); - static const Mul9Transform mul9_2d(45); - static const AddTransform add_bd(-67); - static const AddTransform add_35(53); - static const Mul9Transform mul9_9b(-101); - static const Mul9Transform mul9_ff(-1); - static const XorTransform xor_10(16); - static const Mul9Transform mul9_db(-37); - static const Mul9Transform mul9_e4(-28); - static const Mul9Transform mul9_c9(-55); - static const XorTransform xor_a8(-88); - static const XorTransform xor_b8(-72); - static const AddTransform add_24(36); - static const XorTransform xor_c8(-56); - static const AddTransform add_74(116); - static const XorTransform xor_58(88); - static const XorTransform xor_80(-128); - static const AddTransform add_32(50); - static const AddTransform add_69(105); - static const AddTransform add_db(-37); - static const XorTransform xor_70(112); - static const XorTransform xor_8(8); - static const XorTransform xor_e8(-24); - static const Mul9Transform mul9_76(118); - static const XorTransform xor_48(72); - static const XorTransform xor_c0(-64); - static const AddTransform add_28(40); - static const Mul9Transform mul9_b6(-74); - static const Mul9Transform mul9_52(82); - static const Mul9Transform mul9_36(54); - static const XorTransform xor_38(56); - static const XorTransform xor_28(40); - static const AddTransform add_57(87); - - const ITransform* InterpretedVirtualMachine::addressTransformations[TransformationCount] = { - (ITransform*)&mul9_6d, - (ITransform*)&xor_60, - (ITransform*)&mul9_ed, - (ITransform*)&add_9e, - (ITransform*)&add_eb, - (ITransform*)&xor_b0, - (ITransform*)&mul9_a4, - (ITransform*)&add_71, - (ITransform*)&mul9_64, - (ITransform*)&add_d9, - (ITransform*)&xor_78, - (ITransform*)&mul9_89, - (ITransform*)&add_8f, - (ITransform*)&add_6f, - (ITransform*)&xor_68, - (ITransform*)&mul9_ad, - (ITransform*)&mul9_7f, - (ITransform*)&xor_90, - (ITransform*)&add_59, - (ITransform*)&add_e0, - (ITransform*)&add_68, - (ITransform*)&xor_88, - (ITransform*)&xor_18, - (ITransform*)&mul9_9, - (ITransform*)&add_e1, - (ITransform*)&xor_f0, - (ITransform*)&add_44, - (ITransform*)&mul9_92, - (ITransform*)&xor_40, - (ITransform*)&xor_d8, - (ITransform*)&xor_f8, - (ITransform*)&add_f6, - (ITransform*)&xor_e0, - (ITransform*)&add_e, - (ITransform*)&mul9_d2, - (ITransform*)&xor_98, - (ITransform*)&mul9_24, - (ITransform*)&add_64, - (ITransform*)&mul9_bf, - (ITransform*)&mul9_1b, - (ITransform*)&mul9_5b, - (ITransform*)&add_9b, - (ITransform*)&add_a2, - (ITransform*)&mul9_f6, - (ITransform*)&xor_50, - (ITransform*)&add_94, - (ITransform*)&add_c6, - (ITransform*)&xor_30, - (ITransform*)&mul9_49, - (ITransform*)&xor_d0, - (ITransform*)&xor_20, - (ITransform*)&xor_a0, - (ITransform*)&add_76, - (ITransform*)&add_5b, - (ITransform*)&mul9_12, - (ITransform*)&add_f5, - (ITransform*)&mul9_3f, - (ITransform*)&add_72, - (ITransform*)&mul9_2d, - (ITransform*)&add_bd, - (ITransform*)&add_35, - (ITransform*)&mul9_9b, - (ITransform*)&mul9_ff, - (ITransform*)&xor_10, - (ITransform*)&mul9_db, - (ITransform*)&mul9_e4, - (ITransform*)&mul9_c9, - (ITransform*)&xor_a8, - (ITransform*)&xor_b8, - (ITransform*)&add_24, - (ITransform*)&xor_c8, - (ITransform*)&add_74, - (ITransform*)&xor_58, - (ITransform*)&xor_80, - (ITransform*)&add_32, - (ITransform*)&add_69, - (ITransform*)&add_db, - (ITransform*)&xor_70, - (ITransform*)&xor_8, - (ITransform*)&xor_e8, - (ITransform*)&mul9_76, - (ITransform*)&xor_48, - (ITransform*)&xor_c0, - (ITransform*)&add_28, - (ITransform*)&mul9_b6, - (ITransform*)&mul9_52, - (ITransform*)&mul9_36, - (ITransform*)&xor_38, - (ITransform*)&xor_28, - (ITransform*)&add_57, - }; -} \ No newline at end of file diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index 9f03da1..bb50718 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -450,6 +450,8 @@ namespace RandomX { return "l"; case 7: return "ge"; + default: + UNREACHABLE; } } diff --git a/src/Instruction.cpp b/src/Instruction.cpp index 2fefcf3..bdcaf39 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -296,6 +296,8 @@ namespace RandomX { return "lt"; case 7: return "ge"; + default: + UNREACHABLE; } } diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index 0757f43..c5a6d53 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -27,6 +27,7 @@ along with RandomX. If not, see. #include #include #include +#include #include #include "intrinPortable.h" #ifdef STATS @@ -262,7 +263,7 @@ namespace RandomX { uint32_t spAddr0 = mem.mx; uint32_t spAddr1 = mem.ma; - for(int iter = 0; iter < InstructionCount; ++iter) { + for(unsigned iter = 0; iter < InstructionCount; ++iter) { //std::cout << "Iteration " << iter << std::endl; spAddr0 ^= r[readReg0]; spAddr0 &= ScratchpadL3Mask64; diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index b77da17..0c2fac0 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -705,6 +705,8 @@ namespace RandomX { return 0x9c; //setl case 7: return 0x9d; //setge + default: + UNREACHABLE; } } diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index e790cfe..fedcf20 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -83,7 +83,7 @@ namespace RandomX { template void emit(const uint8_t (&src)[N]) { - for (int i = 0; i < N; ++i) { + for (unsigned i = 0; i < N; ++i) { code[codePos + i] = src[i]; } codePos += N; diff --git a/src/VirtualMachine.cpp b/src/VirtualMachine.cpp index 2adf4e4..057026c 100644 --- a/src/VirtualMachine.cpp +++ b/src/VirtualMachine.cpp @@ -28,9 +28,15 @@ along with RandomX. If not, see. std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) { for (int i = 0; i < RandomX::RegistersCount; ++i) os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec; - for (int i = 0; i < RandomX::RegistersCount; ++i) + for (int i = 0; i < 4; ++i) os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl << " = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec; + for (int i = 0; i < 4; ++i) + os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl + << " = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec; + for (int i = 0; i < 4; ++i) + os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl + << " = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec; return os; } diff --git a/src/common.hpp b/src/common.hpp index 8c16825..1d7f597 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -93,6 +93,7 @@ namespace RandomX { class ILightClientAsyncWorker { public: + virtual ~ILightClientAsyncWorker() {} virtual void prepareBlock(addr_t) = 0; virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0; virtual const uint64_t* getBlock(addr_t) = 0; diff --git a/src/hashAes1Rx4.cpp b/src/hashAes1Rx4.cpp index 623d4b6..db1c6a2 100644 --- a/src/hashAes1Rx4.cpp +++ b/src/hashAes1Rx4.cpp @@ -19,6 +19,18 @@ along with RandomX. If not, see. #include "softAes.h" +/* + Calculate a 512-bit hash of 'input' using 4 lanes of AES. + The input is treated as a set of round keys for the encryption + of the initial state. + + 'inputSize' must be a multiple of 64. + + For a 2 MiB input, this has the same security as 32768-round + AES encryption. + + Hashing throughput: >20 GiB/s per CPU core with hardware AES +*/ template void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { const uint8_t* inptr = (uint8_t*)input; @@ -72,6 +84,16 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); +/* + Fill 'buffer' with pseudorandom data based on 512-bit 'state'. + The state is encrypted using a single AES round per 16 bytes of output + in 4 lanes. + + 'outputSize' must be a multiple of 64. + + The modified state is written back to 'state' to allow multiple + calls to this function. +*/ template void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { const uint8_t* outptr = (uint8_t*)buffer; diff --git a/src/main.cpp b/src/main.cpp index 0a10d8f..1229feb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -254,7 +254,7 @@ int main(int argc, char** argv) { } if (RandomX::trace) { std::cout << "Keys: " << std::endl; - for (int i = 0; i < dataset.cache->getKeys().size(); ++i) { + for (unsigned i = 0; i < dataset.cache->getKeys().size(); ++i) { outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i)); } std::cout << std::endl; @@ -280,7 +280,7 @@ int main(int argc, char** argv) { threads.push_back(std::thread(&RandomX::datasetInit, cache, dataset, i * perThread, count)); } } - for (int i = 0; i < threads.size(); ++i) { + for (unsigned i = 0; i < threads.size(); ++i) { threads[i].join(); } } @@ -318,10 +318,10 @@ int main(int argc, char** argv) { std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl; sw.restart(); if (threadCount > 1) { - for (int i = 0; i < vms.size(); ++i) { + for (unsigned i = 0; i < vms.size(); ++i) { threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i)); } - for (int i = 0; i < threads.size(); ++i) { + for (unsigned i = 0; i < threads.size(); ++i) { threads[i].join(); } } @@ -336,10 +336,10 @@ int main(int argc, char** argv) { if(programCount == 1000) std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl; if (lightClient) { - std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per program" << std::endl; + std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl; } else { - std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl; + std::cout << "Performance: " << programCount / elapsed << " hashes per second" << std::endl; } } catch (std::exception& e) {