mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Merged load/store of integer and FP registers
This commit is contained in:
parent
8f2abd6c05
commit
20eb549725
14 changed files with 88 additions and 114 deletions
|
@ -27,13 +27,11 @@
|
||||||
#define DECL(x) x
|
#define DECL(x) x
|
||||||
#endif
|
#endif
|
||||||
.global DECL(randomx_program_prologue)
|
.global DECL(randomx_program_prologue)
|
||||||
.global DECL(randomx_loop_begin)
|
.global DECL(randomx_program_loop_begin)
|
||||||
.global DECL(randomx_program_load_int)
|
.global DECL(randomx_program_loop_load)
|
||||||
.global DECL(randomx_program_load_flt)
|
|
||||||
.global DECL(randomx_program_start)
|
.global DECL(randomx_program_start)
|
||||||
.global DECL(randomx_program_read_dataset)
|
.global DECL(randomx_program_read_dataset)
|
||||||
.global DECL(randomx_program_store_int)
|
.global DECL(randomx_program_loop_store)
|
||||||
.global DECL(randomx_program_store_flt)
|
|
||||||
.global DECL(randomx_program_loop_end)
|
.global DECL(randomx_program_loop_end)
|
||||||
.global DECL(randomx_program_epilogue)
|
.global DECL(randomx_program_epilogue)
|
||||||
.global DECL(randomx_program_end)
|
.global DECL(randomx_program_end)
|
||||||
|
@ -48,14 +46,11 @@ DECL(randomx_program_prologue):
|
||||||
#include "asm/program_xmm_constants.inc"
|
#include "asm/program_xmm_constants.inc"
|
||||||
|
|
||||||
.align 64
|
.align 64
|
||||||
DECL(randomx_loop_begin):
|
DECL(randomx_program_loop_begin):
|
||||||
nop
|
nop
|
||||||
|
|
||||||
DECL(randomx_program_load_int):
|
DECL(randomx_program_loop_load):
|
||||||
#include "asm/program_load_int.inc"
|
#include "asm/program_loop_load.inc"
|
||||||
|
|
||||||
DECL(randomx_program_load_flt):
|
|
||||||
#include "asm/program_load_flt.inc"
|
|
||||||
|
|
||||||
DECL(randomx_program_start):
|
DECL(randomx_program_start):
|
||||||
nop
|
nop
|
||||||
|
@ -63,11 +58,8 @@ DECL(randomx_program_start):
|
||||||
DECL(randomx_program_read_dataset):
|
DECL(randomx_program_read_dataset):
|
||||||
#include "asm/program_read_dataset.inc"
|
#include "asm/program_read_dataset.inc"
|
||||||
|
|
||||||
DECL(randomx_program_store_int):
|
DECL(randomx_program_loop_store):
|
||||||
#include "asm/program_store_int.inc"
|
#include "asm/program_loop_store.inc"
|
||||||
|
|
||||||
DECL(randomx_program_store_flt):
|
|
||||||
#include "asm/program_store_flt.inc"
|
|
||||||
|
|
||||||
DECL(randomx_program_loop_end):
|
DECL(randomx_program_loop_end):
|
||||||
nop
|
nop
|
||||||
|
|
|
@ -20,13 +20,11 @@ IFDEF RAX
|
||||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||||
|
|
||||||
PUBLIC randomx_program_prologue
|
PUBLIC randomx_program_prologue
|
||||||
PUBLIC randomx_loop_begin
|
PUBLIC randomx_program_loop_begin
|
||||||
PUBLIC randomx_program_load_int
|
PUBLIC randomx_program_loop_load
|
||||||
PUBLIC randomx_program_load_flt
|
|
||||||
PUBLIC randomx_program_start
|
PUBLIC randomx_program_start
|
||||||
PUBLIC randomx_program_read_dataset
|
PUBLIC randomx_program_read_dataset
|
||||||
PUBLIC randomx_program_store_int
|
PUBLIC randomx_program_loop_store
|
||||||
PUBLIC randomx_program_store_flt
|
|
||||||
PUBLIC randomx_program_loop_end
|
PUBLIC randomx_program_loop_end
|
||||||
PUBLIC randomx_program_epilogue
|
PUBLIC randomx_program_epilogue
|
||||||
PUBLIC randomx_program_end
|
PUBLIC randomx_program_end
|
||||||
|
@ -40,17 +38,13 @@ ALIGN 64
|
||||||
include asm/program_xmm_constants.inc
|
include asm/program_xmm_constants.inc
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
randomx_loop_begin PROC
|
randomx_program_loop_begin PROC
|
||||||
nop
|
nop
|
||||||
randomx_loop_begin ENDP
|
randomx_program_loop_begin ENDP
|
||||||
|
|
||||||
randomx_program_load_int PROC
|
randomx_program_loop_load PROC
|
||||||
include asm/program_load_int.inc
|
include asm/program_loop_load.inc
|
||||||
randomx_program_load_int ENDP
|
randomx_program_loop_load ENDP
|
||||||
|
|
||||||
randomx_program_load_flt PROC
|
|
||||||
include asm/program_load_flt.inc
|
|
||||||
randomx_program_load_flt ENDP
|
|
||||||
|
|
||||||
randomx_program_start PROC
|
randomx_program_start PROC
|
||||||
nop
|
nop
|
||||||
|
@ -60,13 +54,9 @@ randomx_program_read_dataset PROC
|
||||||
include asm/program_read_dataset.inc
|
include asm/program_read_dataset.inc
|
||||||
randomx_program_read_dataset ENDP
|
randomx_program_read_dataset ENDP
|
||||||
|
|
||||||
randomx_program_store_int PROC
|
randomx_program_loop_store PROC
|
||||||
include asm/program_store_int.inc
|
include asm/program_loop_store.inc
|
||||||
randomx_program_store_int ENDP
|
randomx_program_loop_store ENDP
|
||||||
|
|
||||||
randomx_program_store_flt PROC
|
|
||||||
include asm/program_store_flt.inc
|
|
||||||
randomx_program_store_flt ENDP
|
|
||||||
|
|
||||||
randomx_program_loop_end PROC
|
randomx_program_loop_end PROC
|
||||||
nop
|
nop
|
||||||
|
|
|
@ -19,13 +19,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void randomx_program_prologue();
|
void randomx_program_prologue();
|
||||||
void randomx_loop_begin();
|
void randomx_program_loop_begin();
|
||||||
void randomx_program_load_int();
|
void randomx_program_loop_load();
|
||||||
void randomx_program_load_flt();
|
|
||||||
void randomx_program_start();
|
void randomx_program_start();
|
||||||
void randomx_program_read_dataset();
|
void randomx_program_read_dataset();
|
||||||
void randomx_program_store_int();
|
void randomx_program_loop_store();
|
||||||
void randomx_program_store_flt();
|
|
||||||
void randomx_program_loop_end();
|
void randomx_program_loop_end();
|
||||||
void randomx_program_epilogue();
|
void randomx_program_epilogue();
|
||||||
void randomx_program_end();
|
void randomx_program_end();
|
||||||
|
|
|
@ -94,13 +94,11 @@ namespace RandomX {
|
||||||
#include "JitCompilerX86-static.hpp"
|
#include "JitCompilerX86-static.hpp"
|
||||||
|
|
||||||
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
||||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_loop_begin;
|
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
||||||
const uint8_t* codeLoadInt = (uint8_t*)&randomx_program_load_int;
|
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||||
const uint8_t* codeLoadFlt = (uint8_t*)&randomx_program_load_flt;
|
|
||||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||||
const uint8_t* codeStoreInt = (uint8_t*)&randomx_program_store_int;
|
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
|
||||||
const uint8_t* codeStoreFlt = (uint8_t*)&randomx_program_store_flt;
|
|
||||||
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
||||||
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
||||||
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
||||||
|
@ -108,11 +106,9 @@ namespace RandomX {
|
||||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||||
const int32_t epilogueSize = codeProgramEnd - codeEpilogue;
|
const int32_t epilogueSize = codeProgramEnd - codeEpilogue;
|
||||||
|
|
||||||
const int32_t loadIntSize = codeLoadFlt - codeLoadInt;
|
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||||
const int32_t loadFltSize = codeProgamStart - codeLoadFlt;
|
const int32_t readDatasetSize = codeLoopStore - codeReadDataset;
|
||||||
const int32_t readDatasetSize = codeStoreInt - codeReadDataset;
|
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||||
const int32_t storeIntSize = codeStoreFlt - codeStoreInt;
|
|
||||||
const int32_t storeFltSize = codeLoopEnd - codeStoreFlt;
|
|
||||||
|
|
||||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
||||||
|
|
||||||
|
@ -179,6 +175,7 @@ namespace RandomX {
|
||||||
static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
|
static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
|
||||||
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
||||||
static const uint8_t JMP = 0xe9;
|
static const uint8_t JMP = 0xe9;
|
||||||
|
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||||
|
|
||||||
size_t JitCompilerX86::getCodeSize() {
|
size_t JitCompilerX86::getCodeSize() {
|
||||||
return codePos - prologueSize;
|
return codePos - prologueSize;
|
||||||
|
@ -204,18 +201,16 @@ namespace RandomX {
|
||||||
addressRegisters >>= 1;
|
addressRegisters >>= 1;
|
||||||
int readReg2 = 2 + (addressRegisters & 1);
|
int readReg2 = 2 + (addressRegisters & 1);
|
||||||
addressRegisters >>= 1;
|
addressRegisters >>= 1;
|
||||||
int writeReg1 = 4 + (addressRegisters & 1);
|
int readReg3 = 4 + (addressRegisters & 1);
|
||||||
addressRegisters >>= 1;
|
addressRegisters >>= 1;
|
||||||
int writeReg2 = 6 + (addressRegisters & 1);
|
int readReg4 = 6 + (addressRegisters & 1);
|
||||||
codePos = prologueSize;
|
codePos = prologueSize;
|
||||||
emit(REX_XOR_EAX);
|
emit(REX_XOR_RAX_R64);
|
||||||
emitByte(0xc0 + readReg1);
|
emitByte(0xc0 + readReg1);
|
||||||
memcpy(code + codePos, codeLoadInt, loadIntSize);
|
emit(REX_XOR_RAX_R64);
|
||||||
codePos += loadIntSize;
|
|
||||||
emit(REX_XOR_EAX);
|
|
||||||
emitByte(0xc0 + readReg2);
|
emitByte(0xc0 + readReg2);
|
||||||
memcpy(code + codePos, codeLoadFlt, loadFltSize);
|
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||||
codePos += loadFltSize;
|
codePos += loopLoadSize;
|
||||||
Instruction instr;
|
Instruction instr;
|
||||||
for (unsigned i = 0; i < ProgramLength; ++i) {
|
for (unsigned i = 0; i < ProgramLength; ++i) {
|
||||||
for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) {
|
for (unsigned j = 0; j < sizeof(instr) / sizeof(Pcg32::result_type); ++j) {
|
||||||
|
@ -226,19 +221,13 @@ namespace RandomX {
|
||||||
generateCode(instr);
|
generateCode(instr);
|
||||||
}
|
}
|
||||||
emit(REX_MOV_RR);
|
emit(REX_MOV_RR);
|
||||||
emitByte(0xc0 + readReg1);
|
emitByte(0xc0 + readReg3);
|
||||||
emit(REX_XOR_EAX);
|
emit(REX_XOR_EAX);
|
||||||
emitByte(0xc0 + readReg2);
|
emitByte(0xc0 + readReg4);
|
||||||
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
||||||
codePos += readDatasetSize;
|
codePos += readDatasetSize;
|
||||||
emit(REX_MOV_RR);
|
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||||
emitByte(0xc0 + writeReg1);
|
codePos += loopStoreSize;
|
||||||
memcpy(code + codePos, codeStoreInt, storeIntSize);
|
|
||||||
codePos += storeIntSize;
|
|
||||||
emit(REX_XOR_EAX);
|
|
||||||
emitByte(0xc0 + writeReg2);
|
|
||||||
memcpy(code + codePos, codeStoreFlt, storeFltSize);
|
|
||||||
codePos += storeFltSize;
|
|
||||||
emit(SUB_EBX);
|
emit(SUB_EBX);
|
||||||
emit(JNZ);
|
emit(JNZ);
|
||||||
emit32(prologueSize - codePos - 4);
|
emit32(prologueSize - codePos - 4);
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
and eax, 1048512
|
|
||||||
lea rcx, [rsi+rax]
|
|
||||||
xor r8, qword ptr [rcx+0]
|
|
||||||
xor r9, qword ptr [rcx+8]
|
|
||||||
xor r10, qword ptr [rcx+16]
|
|
||||||
xor r11, qword ptr [rcx+24]
|
|
||||||
xor r12, qword ptr [rcx+32]
|
|
||||||
xor r13, qword ptr [rcx+40]
|
|
||||||
xor r14, qword ptr [rcx+48]
|
|
||||||
xor r15, qword ptr [rcx+56]
|
|
|
@ -1,5 +1,19 @@
|
||||||
|
mov rdx, rax
|
||||||
and eax, 1048512
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
|
push rcx
|
||||||
|
xor r8, qword ptr [rcx+0]
|
||||||
|
xor r9, qword ptr [rcx+8]
|
||||||
|
xor r10, qword ptr [rcx+16]
|
||||||
|
xor r11, qword ptr [rcx+24]
|
||||||
|
xor r12, qword ptr [rcx+32]
|
||||||
|
xor r13, qword ptr [rcx+40]
|
||||||
|
xor r14, qword ptr [rcx+48]
|
||||||
|
xor r15, qword ptr [rcx+56]
|
||||||
|
ror rdx, 32
|
||||||
|
and edx, 1048512
|
||||||
|
lea rcx, [rsi+rdx]
|
||||||
|
push rcx
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
cvtdq2pd xmm2, qword ptr [rcx+16]
|
18
src/asm/program_loop_store.inc
Normal file
18
src/asm/program_loop_store.inc
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
pop rcx
|
||||||
|
mov qword ptr [rcx+0], r8
|
||||||
|
mov qword ptr [rcx+8], r9
|
||||||
|
mov qword ptr [rcx+16], r10
|
||||||
|
mov qword ptr [rcx+24], r11
|
||||||
|
mov qword ptr [rcx+32], r12
|
||||||
|
mov qword ptr [rcx+40], r13
|
||||||
|
mov qword ptr [rcx+48], r14
|
||||||
|
mov qword ptr [rcx+56], r15
|
||||||
|
pop rcx
|
||||||
|
mulpd xmm0, xmm4
|
||||||
|
mulpd xmm1, xmm5
|
||||||
|
mulpd xmm2, xmm6
|
||||||
|
mulpd xmm3, xmm7
|
||||||
|
movapd xmmword ptr [rcx+0], xmm0
|
||||||
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
|
movapd xmmword ptr [rcx+48], xmm3
|
|
@ -11,10 +11,9 @@
|
||||||
push rdi ;# RegisterFile& registerFile
|
push rdi ;# RegisterFile& registerFile
|
||||||
mov rcx, rdi
|
mov rcx, rdi
|
||||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||||
mov eax, ebp ;# "mx"
|
|
||||||
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
|
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||||
mov rsi, rdx ;# convertible_t* scratchpad
|
mov rsi, rdx ;# convertible_t* scratchpad
|
||||||
|
|
||||||
#include "program_prologue_load.inc"
|
#include "program_prologue_load.inc"
|
||||||
|
|
||||||
jmp DECL(randomx_loop_begin)
|
jmp DECL(randomx_program_loop_begin)
|
|
@ -1,3 +1,5 @@
|
||||||
|
mov rax, rbp
|
||||||
|
|
||||||
;# zero integer registers
|
;# zero integer registers
|
||||||
xor r8, r8
|
xor r8, r8
|
||||||
xor r9, r9
|
xor r9, r9
|
||||||
|
|
|
@ -23,11 +23,10 @@
|
||||||
; function arguments
|
; function arguments
|
||||||
push rcx ; RegisterFile& registerFile
|
push rcx ; RegisterFile& registerFile
|
||||||
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
mov rbp, qword ptr [rdx] ; "mx", "ma"
|
||||||
mov eax, ebp ; "mx"
|
|
||||||
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
mov rdi, qword ptr [rdx+8] ; uint8_t* dataset
|
||||||
mov rsi, r8 ; convertible_t* scratchpad
|
mov rsi, r8 ; convertible_t* scratchpad
|
||||||
mov rbx, r9 ; loop counter
|
mov rbx, r9 ; loop counter
|
||||||
|
|
||||||
include program_prologue_load.inc
|
include program_prologue_load.inc
|
||||||
|
|
||||||
jmp randomx_loop_begin
|
jmp randomx_program_loop_begin
|
|
@ -1,4 +1,5 @@
|
||||||
xor rbp, rax ;# modify "mx"
|
xor rbp, rax ;# modify "mx"
|
||||||
|
xor eax, eax
|
||||||
and rbp, -64 ;# align "mx" to the start of a cache line
|
and rbp, -64 ;# align "mx" to the start of a cache line
|
||||||
mov edx, ebp ;# edx = mx
|
mov edx, ebp ;# edx = mx
|
||||||
prefetchnta byte ptr [rdi+rdx]
|
prefetchnta byte ptr [rdi+rdx]
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
and eax, 1048512
|
|
||||||
lea rcx, [rsi+rax]
|
|
||||||
mulpd xmm0, xmm4
|
|
||||||
mulpd xmm1, xmm5
|
|
||||||
mulpd xmm2, xmm6
|
|
||||||
mulpd xmm3, xmm7
|
|
||||||
movapd xmmword ptr [rcx+0], xmm0
|
|
||||||
movapd xmmword ptr [rcx+16], xmm1
|
|
||||||
movapd xmmword ptr [rcx+32], xmm2
|
|
||||||
movapd xmmword ptr [rcx+48], xmm3
|
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
and eax, 1048512
|
|
||||||
lea rcx, [rsi+rax]
|
|
||||||
mov qword ptr [rcx+0], r8
|
|
||||||
mov qword ptr [rcx+8], r9
|
|
||||||
mov qword ptr [rcx+16], r10
|
|
||||||
mov qword ptr [rcx+24], r11
|
|
||||||
mov qword ptr [rcx+32], r12
|
|
||||||
mov qword ptr [rcx+40], r13
|
|
||||||
mov qword ptr [rcx+48], r14
|
|
||||||
mov qword ptr [rcx+56], r15
|
|
|
@ -118,8 +118,11 @@ signMask:
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
program_begin:
|
program_begin:
|
||||||
xor eax, r8d ;# read address register 1
|
xor rax, r8 ;# read address register 1
|
||||||
|
xor rax, r9
|
||||||
|
mov rdx, rax
|
||||||
and eax, 1048512
|
and eax, 1048512
|
||||||
|
push rax
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
xor r8, qword ptr [rcx+0]
|
xor r8, qword ptr [rcx+0]
|
||||||
xor r9, qword ptr [rcx+8]
|
xor r9, qword ptr [rcx+8]
|
||||||
|
@ -129,9 +132,10 @@ program_begin:
|
||||||
xor r13, qword ptr [rcx+40]
|
xor r13, qword ptr [rcx+40]
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
xor eax, r9d ;# read address register 2
|
ror rdx, 32
|
||||||
and eax, 1048512
|
and edx, 1048512
|
||||||
lea rcx, [rsi+rax]
|
push rdx
|
||||||
|
lea rcx, [rsi+rdx]
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||||
|
@ -165,8 +169,7 @@ program_begin:
|
||||||
xor r13, qword ptr [rcx+40]
|
xor r13, qword ptr [rcx+40]
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
mov eax, r12d ;# write address register 1
|
pop rax
|
||||||
and eax, 1048512
|
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
mov qword ptr [rcx+0], r8
|
mov qword ptr [rcx+0], r8
|
||||||
mov qword ptr [rcx+8], r9
|
mov qword ptr [rcx+8], r9
|
||||||
|
@ -176,8 +179,7 @@ program_begin:
|
||||||
mov qword ptr [rcx+40], r13
|
mov qword ptr [rcx+40], r13
|
||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
xor eax, r13d ;# write address register 2
|
pop rax
|
||||||
and eax, 1048512
|
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
mulpd xmm0, xmm4
|
mulpd xmm0, xmm4
|
||||||
mulpd xmm1, xmm5
|
mulpd xmm1, xmm5
|
||||||
|
@ -187,6 +189,7 @@ program_begin:
|
||||||
movapd xmmword ptr [rcx+16], xmm1
|
movapd xmmword ptr [rcx+16], xmm1
|
||||||
movapd xmmword ptr [rcx+32], xmm2
|
movapd xmmword ptr [rcx+32], xmm2
|
||||||
movapd xmmword ptr [rcx+48], xmm3
|
movapd xmmword ptr [rcx+48], xmm3
|
||||||
|
xor eax, eax
|
||||||
dec ebx
|
dec ebx
|
||||||
jnz program_begin
|
jnz program_begin
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue