mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
initBlock asm version (disabled)
This commit is contained in:
parent
91063aac91
commit
6b344b81fd
6 changed files with 194 additions and 170 deletions
22
makefile
22
makefile
|
@ -53,16 +53,16 @@ $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blak
|
||||||
$(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h blake2/endian.h) | $(OBJDIR)
|
$(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h blake2/endian.h) | $(OBJDIR)
|
||||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@
|
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@
|
||||||
|
|
||||||
$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp common.hpp instructionWeights.hpp blake2/endian.h reciprocal.h Program.hpp) | $(OBJDIR)
|
$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp common.hpp instructionWeights.hpp blake2/endian.h reciprocal.h Program.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AssemblyGeneratorX86.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AssemblyGeneratorX86.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR)
|
$(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR)
|
||||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@
|
$(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@
|
||||||
|
|
||||||
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp) | $(OBJDIR)
|
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp) | $(OBJDIR)
|
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/reciprocal.o: $(addprefix $(SRCDIR)/,reciprocal.c reciprocal.h) | $(OBJDIR)
|
$(OBJDIR)/reciprocal.o: $(addprefix $(SRCDIR)/,reciprocal.c reciprocal.h) | $(OBJDIR)
|
||||||
|
@ -71,40 +71,40 @@ $(OBJDIR)/reciprocal.o: $(addprefix $(SRCDIR)/,reciprocal.c reciprocal.h) | $(OB
|
||||||
$(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h intrinPortable.h blake2/endian.h) | $(OBJDIR)
|
$(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h intrinPortable.h blake2/endian.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/hashAes1Rx4.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/hashAes1Rx4.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp) | $(OBJDIR)
|
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR)
|
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR)
|
||||||
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
|
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
|
||||||
|
|
||||||
$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc)) | $(OBJDIR)
|
$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc) configuration.h) | $(OBJDIR)
|
||||||
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@
|
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@
|
||||||
|
|
||||||
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR)
|
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp instructionWeights.hpp blake2/endian.h common.hpp) | $(OBJDIR)
|
$(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp instructionWeights.hpp blake2/endian.h common.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Instruction.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Instruction.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp instructionWeights.hpp VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h dataset.hpp Cache.hpp virtualMemory.hpp LightClientAsyncWorker.hpp) | $(OBJDIR)
|
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp instructionWeights.hpp VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h dataset.hpp Cache.hpp virtualMemory.hpp LightClientAsyncWorker.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
|
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h) | $(OBJDIR)
|
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp) | $(OBJDIR)
|
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp argon2_core.h) | $(OBJDIR)
|
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp argon2_core.h configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMachine.hpp common.hpp dataset.hpp blake2/endian.h Program.hpp Instruction.hpp hashAes1Rx4.hpp softAes.h intrinPortable.h blake2/blake2.h) | $(OBJDIR)
|
$(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMachine.hpp common.hpp dataset.hpp blake2/endian.h Program.hpp Instruction.hpp hashAes1Rx4.hpp softAes.h intrinPortable.h blake2/blake2.h configuration.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/VirtualMachine.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/VirtualMachine.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/virtualMemory.o: $(addprefix $(SRCDIR)/,virtualMemory.cpp virtualMemory.hpp) | $(OBJDIR)
|
$(OBJDIR)/virtualMemory.o: $(addprefix $(SRCDIR)/,virtualMemory.cpp virtualMemory.hpp) | $(OBJDIR)
|
||||||
|
|
155
src/asm/initBlock.inc
Normal file
155
src/asm/initBlock.inc
Normal file
|
@ -0,0 +1,155 @@
|
||||||
|
prefetcht0 byte ptr [rbp]
|
||||||
|
xor r9, r9
|
||||||
|
xor r10, r10
|
||||||
|
xor r11, r11
|
||||||
|
xor r12, r12
|
||||||
|
xor r13, r13
|
||||||
|
xor r14, r14
|
||||||
|
xor r15, r15
|
||||||
|
initBlock_loop:
|
||||||
|
;# c0
|
||||||
|
mov rbx, r8
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r8+r9]
|
||||||
|
call squareHash
|
||||||
|
mov r9, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c1
|
||||||
|
mov rbx, r9
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r9+r10]
|
||||||
|
call squareHash
|
||||||
|
mov r10, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c2
|
||||||
|
mov rbx, r10
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r10+r11]
|
||||||
|
call squareHash
|
||||||
|
mov r11, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c3
|
||||||
|
mov rbx, r11
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r11+r12]
|
||||||
|
call squareHash
|
||||||
|
mov r12, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c4
|
||||||
|
mov rbx, r12
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r12+r13]
|
||||||
|
call squareHash
|
||||||
|
mov r13, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c5
|
||||||
|
mov rbx, r13
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r13+r14]
|
||||||
|
call squareHash
|
||||||
|
mov r14, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c6
|
||||||
|
mov rbx, r14
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r14+r15]
|
||||||
|
call squareHash
|
||||||
|
mov r15, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
;# c7
|
||||||
|
mov rbx, r15
|
||||||
|
and rbx, 4194303
|
||||||
|
shl rbx, 6
|
||||||
|
add rbx, rdi
|
||||||
|
prefetchnta byte ptr [rbx]
|
||||||
|
lea rcx, [r15+r8]
|
||||||
|
call squareHash
|
||||||
|
mov r8, rax
|
||||||
|
xor r8, qword ptr [rbx+0]
|
||||||
|
xor r9, qword ptr [rbx+8]
|
||||||
|
xor r10, qword ptr [rbx+16]
|
||||||
|
xor r11, qword ptr [rbx+24]
|
||||||
|
xor r12, qword ptr [rbx+32]
|
||||||
|
xor r13, qword ptr [rbx+40]
|
||||||
|
xor r14, qword ptr [rbx+48]
|
||||||
|
xor r15, qword ptr [rbx+56]
|
||||||
|
sub rsi, 1
|
||||||
|
jnz initBlock_loop
|
||||||
|
mov qword ptr [rbp+0], r8
|
||||||
|
mov qword ptr [rbp+8], r9
|
||||||
|
mov qword ptr [rbp+16], r10
|
||||||
|
mov qword ptr [rbp+24], r11
|
||||||
|
mov qword ptr [rbp+32], r12
|
||||||
|
mov qword ptr [rbp+40], r13
|
||||||
|
mov qword ptr [rbp+48], r14
|
||||||
|
mov qword ptr [rbp+56], r15
|
|
@ -40,7 +40,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
#if !defined(_M_X64)
|
#if true //RANDOMX_ARGON_GROWTH != 0 || (!defined(_M_X64) && !defined(__x86_64__))
|
||||||
static FORCE_INLINE uint8_t* selectMixBlock(const Cache& cache, uint64_t& currentIndex, uint64_t& nextIndex) {
|
static FORCE_INLINE uint8_t* selectMixBlock(const Cache& cache, uint64_t& currentIndex, uint64_t& nextIndex) {
|
||||||
uint8_t* mixBlock;
|
uint8_t* mixBlock;
|
||||||
if (RANDOMX_ARGON_GROWTH == 0) {
|
if (RANDOMX_ARGON_GROWTH == 0) {
|
||||||
|
@ -75,7 +75,7 @@ namespace RandomX {
|
||||||
|
|
||||||
uint8_t* mixBlock;
|
uint8_t* mixBlock;
|
||||||
|
|
||||||
for (auto i = 0; i < RANDOMX_CACHE_ACCESSES / 8; ++i) {
|
for (auto i = 0; i < iterations; ++i) {
|
||||||
mixBlock = selectMixBlock(cache, c0, c1);
|
mixBlock = selectMixBlock(cache, c0, c1);
|
||||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
namespace RandomX {
|
namespace RandomX {
|
||||||
|
|
||||||
#if defined(_M_X64)
|
#if false //RANDOMX_ARGON_GROWTH == 0 && (defined(_M_X64) || defined(__x86_64__))
|
||||||
extern "C"
|
extern "C"
|
||||||
#endif
|
#endif
|
||||||
void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations);
|
void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations);
|
||||||
|
|
|
@ -10,8 +10,31 @@
|
||||||
#define DECL(x) x
|
#define DECL(x) x
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "configuration.h"
|
||||||
|
|
||||||
.global DECL(squareHash)
|
.global DECL(squareHash)
|
||||||
|
.global DECL(initBlock)
|
||||||
|
|
||||||
DECL(squareHash):
|
DECL(squareHash):
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
#include "asm/squareHash.inc"
|
#include "asm/squareHash.inc"
|
||||||
|
|
||||||
|
DECL(initBlock):
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
mov rdi, qword ptr [rdi]
|
||||||
|
mov rbp, rsi
|
||||||
|
mov r8, rdx
|
||||||
|
mov rsi, rcx
|
||||||
|
#include "asm/initBlock.inc"
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
ret
|
|
@ -22,165 +22,11 @@ initBlock PROC
|
||||||
push r13
|
push r13
|
||||||
push r14
|
push r14
|
||||||
push r15
|
push r15
|
||||||
mov rsi, r9
|
|
||||||
mov rdi, qword ptr [rcx]
|
mov rdi, qword ptr [rcx]
|
||||||
mov rbp, rdx
|
mov rbp, rdx
|
||||||
prefetcht0 byte ptr [rbp]
|
|
||||||
; r8 = blockNumber
|
; r8 = blockNumber
|
||||||
xor r9, r9
|
mov rsi, r9
|
||||||
xor r10, r10
|
include asm/initBlock.inc
|
||||||
xor r11, r11
|
|
||||||
xor r12, r12
|
|
||||||
xor r13, r13
|
|
||||||
xor r14, r14
|
|
||||||
xor r15, r15
|
|
||||||
initBlock_loop:
|
|
||||||
; c0
|
|
||||||
mov rbx, r8
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r8+r9]
|
|
||||||
call squareHash
|
|
||||||
mov r9, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c1
|
|
||||||
mov rbx, r9
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r9+r10]
|
|
||||||
call squareHash
|
|
||||||
mov r10, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c2
|
|
||||||
mov rbx, r10
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r10+r11]
|
|
||||||
call squareHash
|
|
||||||
mov r11, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c3
|
|
||||||
mov rbx, r11
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r11+r12]
|
|
||||||
call squareHash
|
|
||||||
mov r12, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c4
|
|
||||||
mov rbx, r12
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r12+r13]
|
|
||||||
call squareHash
|
|
||||||
mov r13, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c5
|
|
||||||
mov rbx, r13
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r13+r14]
|
|
||||||
call squareHash
|
|
||||||
mov r14, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c6
|
|
||||||
mov rbx, r14
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r14+r15]
|
|
||||||
call squareHash
|
|
||||||
mov r15, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
; c7
|
|
||||||
mov rbx, r15
|
|
||||||
and rbx, 4194303
|
|
||||||
shl rbx, 6
|
|
||||||
add rbx, rdi
|
|
||||||
prefetchnta byte ptr [rbx]
|
|
||||||
lea rcx, [r15+r8]
|
|
||||||
call squareHash
|
|
||||||
mov r8, rax
|
|
||||||
xor r8, qword ptr [rbx+0]
|
|
||||||
xor r9, qword ptr [rbx+8]
|
|
||||||
xor r10, qword ptr [rbx+16]
|
|
||||||
xor r11, qword ptr [rbx+24]
|
|
||||||
xor r12, qword ptr [rbx+32]
|
|
||||||
xor r13, qword ptr [rbx+40]
|
|
||||||
xor r14, qword ptr [rbx+48]
|
|
||||||
xor r15, qword ptr [rbx+56]
|
|
||||||
sub rsi, 1
|
|
||||||
jnz initBlock_loop
|
|
||||||
mov qword ptr [rbp+0], r8
|
|
||||||
mov qword ptr [rbp+8], r9
|
|
||||||
mov qword ptr [rbp+16], r10
|
|
||||||
mov qword ptr [rbp+24], r11
|
|
||||||
mov qword ptr [rbp+32], r12
|
|
||||||
mov qword ptr [rbp+40], r13
|
|
||||||
mov qword ptr [rbp+48], r14
|
|
||||||
mov qword ptr [rbp+56], r15
|
|
||||||
pop r15
|
pop r15
|
||||||
pop r14
|
pop r14
|
||||||
pop r13
|
pop r13
|
||||||
|
|
Loading…
Reference in a new issue