mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
initBlock asm version (disabled)
This commit is contained in:
parent
91063aac91
commit
6b344b81fd
6 changed files with 194 additions and 170 deletions
22
makefile
22
makefile
|
@ -53,16 +53,16 @@ $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blak
|
|||
$(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h blake2/endian.h) | $(OBJDIR)
|
||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@
|
||||
|
||||
$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp common.hpp instructionWeights.hpp blake2/endian.h reciprocal.h Program.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp common.hpp instructionWeights.hpp blake2/endian.h reciprocal.h Program.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AssemblyGeneratorX86.cpp -o $@
|
||||
|
||||
$(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h endian.h) | $(OBJDIR)
|
||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/blake2/blake2b.c -o $@
|
||||
|
||||
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachine.cpp CompiledVirtualMachine.hpp common.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/CompiledVirtualMachine.cpp -o $@
|
||||
|
||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp blake2/endian.h dataset.hpp intrinPortable.h Cache.hpp virtualMemory.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
||||
|
||||
$(OBJDIR)/reciprocal.o: $(addprefix $(SRCDIR)/,reciprocal.c reciprocal.h) | $(OBJDIR)
|
||||
|
@ -71,40 +71,40 @@ $(OBJDIR)/reciprocal.o: $(addprefix $(SRCDIR)/,reciprocal.c reciprocal.h) | $(OB
|
|||
$(OBJDIR)/hashAes1Rx4.o: $(addprefix $(SRCDIR)/,hashAes1Rx4.cpp softAes.h intrinPortable.h blake2/endian.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/hashAes1Rx4.cpp -o $@
|
||||
|
||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp common.hpp blake2/endian.h Program.hpp reciprocal.h virtualMemory.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
||||
|
||||
$(OBJDIR)/JitCompilerX86-static.o: $(addprefix $(SRCDIR)/,JitCompilerX86-static.S $(addprefix asm/program_, prologue_linux.inc prologue_load.inc epilogue_linux.inc epilogue_store.inc read_dataset.inc loop_load.inc loop_store.inc xmm_constants.inc)) | $(OBJDIR)
|
||||
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/JitCompilerX86-static.S -o $@
|
||||
|
||||
$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc)) | $(OBJDIR)
|
||||
$(OBJDIR)/squareHash.o: $(addprefix $(SRCDIR)/,squareHash.S $(addprefix asm/, squareHash.inc) configuration.h) | $(OBJDIR)
|
||||
$(CXX) -x assembler-with-cpp -c $(SRCDIR)/squareHash.S -o $@
|
||||
|
||||
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp intrinPortable.h blake2/endian.h common.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
||||
|
||||
$(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp instructionWeights.hpp blake2/endian.h common.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp instructionWeights.hpp blake2/endian.h common.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Instruction.cpp -o $@
|
||||
|
||||
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp instructionWeights.hpp VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h dataset.hpp Cache.hpp virtualMemory.hpp LightClientAsyncWorker.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp instructionWeights.hpp VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h dataset.hpp Cache.hpp virtualMemory.hpp LightClientAsyncWorker.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
|
||||
|
||||
$(OBJDIR)/LightClientAsyncWorker.o: $(addprefix $(SRCDIR)/,LightClientAsyncWorker.cpp LightClientAsyncWorker.hpp common.hpp) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/LightClientAsyncWorker.cpp -o $@
|
||||
|
||||
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h) | $(OBJDIR)
|
||||
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h VirtualMachine.hpp common.hpp blake2/endian.h Program.hpp Instruction.hpp intrinPortable.h CompiledVirtualMachine.hpp JitCompilerX86.hpp AssemblyGeneratorX86.hpp dataset.hpp Cache.hpp virtualMemory.hpp hashAes1Rx4.hpp softAes.h configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/main.cpp -o $@
|
||||
|
||||
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp) | $(OBJDIR)
|
||||
$(OBJDIR)/Program.o: $(addprefix $(SRCDIR)/,Program.cpp Program.hpp configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Program.cpp -o $@
|
||||
|
||||
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp argon2_core.h) | $(OBJDIR)
|
||||
$(OBJDIR)/Cache.o: $(addprefix $(SRCDIR)/,Cache.cpp Cache.hpp argon2_core.h configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Cache.cpp -o $@
|
||||
|
||||
$(OBJDIR)/softAes.o: $(addprefix $(SRCDIR)/,softAes.cpp softAes.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/softAes.cpp -o $@
|
||||
|
||||
$(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMachine.hpp common.hpp dataset.hpp blake2/endian.h Program.hpp Instruction.hpp hashAes1Rx4.hpp softAes.h intrinPortable.h blake2/blake2.h) | $(OBJDIR)
|
||||
$(OBJDIR)/VirtualMachine.o: $(addprefix $(SRCDIR)/,VirtualMachine.cpp VirtualMachine.hpp common.hpp dataset.hpp blake2/endian.h Program.hpp Instruction.hpp hashAes1Rx4.hpp softAes.h intrinPortable.h blake2/blake2.h configuration.h) | $(OBJDIR)
|
||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/VirtualMachine.cpp -o $@
|
||||
|
||||
$(OBJDIR)/virtualMemory.o: $(addprefix $(SRCDIR)/,virtualMemory.cpp virtualMemory.hpp) | $(OBJDIR)
|
||||
|
|
155
src/asm/initBlock.inc
Normal file
155
src/asm/initBlock.inc
Normal file
|
@ -0,0 +1,155 @@
|
|||
prefetcht0 byte ptr [rbp]
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
initBlock_loop:
|
||||
;# c0
|
||||
mov rbx, r8
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r8+r9]
|
||||
call squareHash
|
||||
mov r9, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c1
|
||||
mov rbx, r9
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r9+r10]
|
||||
call squareHash
|
||||
mov r10, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c2
|
||||
mov rbx, r10
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r10+r11]
|
||||
call squareHash
|
||||
mov r11, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c3
|
||||
mov rbx, r11
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r11+r12]
|
||||
call squareHash
|
||||
mov r12, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c4
|
||||
mov rbx, r12
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r12+r13]
|
||||
call squareHash
|
||||
mov r13, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c5
|
||||
mov rbx, r13
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r13+r14]
|
||||
call squareHash
|
||||
mov r14, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c6
|
||||
mov rbx, r14
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r14+r15]
|
||||
call squareHash
|
||||
mov r15, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
;# c7
|
||||
mov rbx, r15
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r15+r8]
|
||||
call squareHash
|
||||
mov r8, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
sub rsi, 1
|
||||
jnz initBlock_loop
|
||||
mov qword ptr [rbp+0], r8
|
||||
mov qword ptr [rbp+8], r9
|
||||
mov qword ptr [rbp+16], r10
|
||||
mov qword ptr [rbp+24], r11
|
||||
mov qword ptr [rbp+32], r12
|
||||
mov qword ptr [rbp+40], r13
|
||||
mov qword ptr [rbp+48], r14
|
||||
mov qword ptr [rbp+56], r15
|
|
@ -40,7 +40,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
|
||||
namespace RandomX {
|
||||
|
||||
#if !defined(_M_X64)
|
||||
#if true //RANDOMX_ARGON_GROWTH != 0 || (!defined(_M_X64) && !defined(__x86_64__))
|
||||
static FORCE_INLINE uint8_t* selectMixBlock(const Cache& cache, uint64_t& currentIndex, uint64_t& nextIndex) {
|
||||
uint8_t* mixBlock;
|
||||
if (RANDOMX_ARGON_GROWTH == 0) {
|
||||
|
@ -75,7 +75,7 @@ namespace RandomX {
|
|||
|
||||
uint8_t* mixBlock;
|
||||
|
||||
for (auto i = 0; i < RANDOMX_CACHE_ACCESSES / 8; ++i) {
|
||||
for (auto i = 0; i < iterations; ++i) {
|
||||
mixBlock = selectMixBlock(cache, c0, c1);
|
||||
mixCache(mixBlock, c0, c1, c2, c3, c4, c5, c6, c7);
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
|
||||
namespace RandomX {
|
||||
|
||||
#if defined(_M_X64)
|
||||
#if false //RANDOMX_ARGON_GROWTH == 0 && (defined(_M_X64) || defined(__x86_64__))
|
||||
extern "C"
|
||||
#endif
|
||||
void initBlock(const Cache& cache, uint8_t* out, uint64_t blockNumber, unsigned iterations);
|
||||
|
|
|
@ -10,8 +10,31 @@
|
|||
#define DECL(x) x
|
||||
#endif
|
||||
|
||||
#include "configuration.h"
|
||||
|
||||
.global DECL(squareHash)
|
||||
.global DECL(initBlock)
|
||||
|
||||
DECL(squareHash):
|
||||
mov rcx, rdi
|
||||
#include "asm/squareHash.inc"
|
||||
|
||||
DECL(initBlock):
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, qword ptr [rdi]
|
||||
mov rbp, rsi
|
||||
mov r8, rdx
|
||||
mov rsi, rcx
|
||||
#include "asm/initBlock.inc"
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
|
@ -22,165 +22,11 @@ initBlock PROC
|
|||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rsi, r9
|
||||
mov rdi, qword ptr [rcx]
|
||||
mov rbp, rdx
|
||||
prefetcht0 byte ptr [rbp]
|
||||
; r8 = blockNumber
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
initBlock_loop:
|
||||
; c0
|
||||
mov rbx, r8
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r8+r9]
|
||||
call squareHash
|
||||
mov r9, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c1
|
||||
mov rbx, r9
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r9+r10]
|
||||
call squareHash
|
||||
mov r10, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c2
|
||||
mov rbx, r10
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r10+r11]
|
||||
call squareHash
|
||||
mov r11, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c3
|
||||
mov rbx, r11
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r11+r12]
|
||||
call squareHash
|
||||
mov r12, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c4
|
||||
mov rbx, r12
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r12+r13]
|
||||
call squareHash
|
||||
mov r13, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c5
|
||||
mov rbx, r13
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r13+r14]
|
||||
call squareHash
|
||||
mov r14, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c6
|
||||
mov rbx, r14
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r14+r15]
|
||||
call squareHash
|
||||
mov r15, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
; c7
|
||||
mov rbx, r15
|
||||
and rbx, 4194303
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
lea rcx, [r15+r8]
|
||||
call squareHash
|
||||
mov r8, rax
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
sub rsi, 1
|
||||
jnz initBlock_loop
|
||||
mov qword ptr [rbp+0], r8
|
||||
mov qword ptr [rbp+8], r9
|
||||
mov qword ptr [rbp+16], r10
|
||||
mov qword ptr [rbp+24], r11
|
||||
mov qword ptr [rbp+32], r12
|
||||
mov qword ptr [rbp+40], r13
|
||||
mov qword ptr [rbp+48], r14
|
||||
mov qword ptr [rbp+56], r15
|
||||
mov rsi, r9
|
||||
include asm/initBlock.inc
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
|
|
Loading…
Reference in a new issue