mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
NOP instruction
register load/store from L3
This commit is contained in:
parent
005c67f64c
commit
8f2abd6c05
15 changed files with 233 additions and 624 deletions
|
@ -491,6 +491,10 @@ namespace RandomX {
|
||||||
asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl;
|
asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) {
|
||||||
|
asmCode << "\tnop" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
||||||
|
|
||||||
|
@ -540,5 +544,7 @@ namespace RandomX {
|
||||||
|
|
||||||
INST_HANDLE(ISTORE)
|
INST_HANDLE(ISTORE)
|
||||||
INST_HANDLE(FSTORE)
|
INST_HANDLE(FSTORE)
|
||||||
|
|
||||||
|
INST_HANDLE(NOP)
|
||||||
};
|
};
|
||||||
}
|
}
|
|
@ -79,5 +79,6 @@ namespace RandomX {
|
||||||
void h_CFROUND(Instruction&, int);
|
void h_CFROUND(Instruction&, int);
|
||||||
void h_ISTORE(Instruction&, int);
|
void h_ISTORE(Instruction&, int);
|
||||||
void h_FSTORE(Instruction&, int);
|
void h_FSTORE(Instruction&, int);
|
||||||
|
void h_NOP(Instruction&, int);
|
||||||
};
|
};
|
||||||
}
|
}
|
|
@ -327,6 +327,10 @@ namespace RandomX {
|
||||||
os << ", " << reg << srcIndex << std::endl;
|
os << ", " << reg << srcIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Instruction::h_NOP(std::ostream& os) const {
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_NAME(x) REPN(#x, WT(x))
|
#define INST_NAME(x) REPN(#x, WT(x))
|
||||||
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
|
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
|
||||||
|
@ -377,6 +381,8 @@ namespace RandomX {
|
||||||
|
|
||||||
INST_NAME(ISTORE)
|
INST_NAME(ISTORE)
|
||||||
INST_NAME(FSTORE)
|
INST_NAME(FSTORE)
|
||||||
|
|
||||||
|
INST_NAME(NOP)
|
||||||
};
|
};
|
||||||
|
|
||||||
InstructionVisualizer Instruction::engine[256] = {
|
InstructionVisualizer Instruction::engine[256] = {
|
||||||
|
@ -425,6 +431,8 @@ namespace RandomX {
|
||||||
|
|
||||||
INST_HANDLE(ISTORE)
|
INST_HANDLE(ISTORE)
|
||||||
INST_HANDLE(FSTORE)
|
INST_HANDLE(FSTORE)
|
||||||
|
|
||||||
|
INST_HANDLE(NOP)
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -86,6 +86,7 @@ namespace RandomX {
|
||||||
void h_CFROUND(std::ostream&) const;
|
void h_CFROUND(std::ostream&) const;
|
||||||
void h_ISTORE(std::ostream&) const;
|
void h_ISTORE(std::ostream&) const;
|
||||||
void h_FSTORE(std::ostream&) const;
|
void h_FSTORE(std::ostream&) const;
|
||||||
|
void h_NOP(std::ostream&) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction");
|
static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction");
|
||||||
|
|
|
@ -181,7 +181,7 @@ namespace RandomX {
|
||||||
static const uint8_t JMP = 0xe9;
|
static const uint8_t JMP = 0xe9;
|
||||||
|
|
||||||
size_t JitCompilerX86::getCodeSize() {
|
size_t JitCompilerX86::getCodeSize() {
|
||||||
return codePos - prologueSize + readDatasetSize;
|
return codePos - prologueSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
JitCompilerX86::JitCompilerX86() {
|
JitCompilerX86::JitCompilerX86() {
|
||||||
|
@ -761,6 +761,10 @@ namespace RandomX {
|
||||||
emitByte(0x06);
|
emitByte(0x06);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JitCompilerX86::h_NOP(Instruction& instr) {
|
||||||
|
emitByte(0x90);
|
||||||
|
}
|
||||||
|
|
||||||
#include "instructionWeights.hpp"
|
#include "instructionWeights.hpp"
|
||||||
#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
|
#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
|
||||||
|
|
||||||
|
@ -800,6 +804,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(CFROUND)
|
INST_HANDLE(CFROUND)
|
||||||
INST_HANDLE(ISTORE)
|
INST_HANDLE(ISTORE)
|
||||||
INST_HANDLE(FSTORE)
|
INST_HANDLE(FSTORE)
|
||||||
|
INST_HANDLE(NOP)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -125,6 +125,7 @@ namespace RandomX {
|
||||||
void h_CFROUND(Instruction&);
|
void h_CFROUND(Instruction&);
|
||||||
void h_ISTORE(Instruction&);
|
void h_ISTORE(Instruction&);
|
||||||
void h_FSTORE(Instruction&);
|
void h_FSTORE(Instruction&);
|
||||||
|
void h_NOP(Instruction&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
xor r8, qword ptr [rcx+0]
|
xor r8, qword ptr [rcx+0]
|
||||||
xor r9, qword ptr [rcx+8]
|
xor r9, qword ptr [rcx+8]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
mulpd xmm0, xmm4
|
mulpd xmm0, xmm4
|
||||||
mulpd xmm1, xmm5
|
mulpd xmm1, xmm5
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
mov qword ptr [rcx+0], r8
|
mov qword ptr [rcx+0], r8
|
||||||
mov qword ptr [rcx+8], r9
|
mov qword ptr [rcx+8], r9
|
||||||
|
|
|
@ -72,7 +72,7 @@ namespace RandomX {
|
||||||
convertible_t hi;
|
convertible_t hi;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr int ProgramLength = 256;
|
constexpr int ProgramLength = 128;
|
||||||
constexpr uint32_t InstructionCount = 1024;
|
constexpr uint32_t InstructionCount = 1024;
|
||||||
constexpr uint32_t ScratchpadSize = 1024 * 1024;
|
constexpr uint32_t ScratchpadSize = 1024 * 1024;
|
||||||
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t);
|
||||||
|
|
|
@ -119,7 +119,7 @@ signMask:
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
program_begin:
|
program_begin:
|
||||||
xor eax, r8d ;# read address register 1
|
xor eax, r8d ;# read address register 1
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
xor r8, qword ptr [rcx+0]
|
xor r8, qword ptr [rcx+0]
|
||||||
xor r9, qword ptr [rcx+8]
|
xor r9, qword ptr [rcx+8]
|
||||||
|
@ -130,7 +130,7 @@ program_begin:
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
xor eax, r9d ;# read address register 2
|
xor eax, r9d ;# read address register 2
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||||
|
@ -166,7 +166,7 @@ program_begin:
|
||||||
xor r14, qword ptr [rcx+48]
|
xor r14, qword ptr [rcx+48]
|
||||||
xor r15, qword ptr [rcx+56]
|
xor r15, qword ptr [rcx+56]
|
||||||
mov eax, r12d ;# write address register 1
|
mov eax, r12d ;# write address register 1
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
mov qword ptr [rcx+0], r8
|
mov qword ptr [rcx+0], r8
|
||||||
mov qword ptr [rcx+8], r9
|
mov qword ptr [rcx+8], r9
|
||||||
|
@ -177,7 +177,7 @@ program_begin:
|
||||||
mov qword ptr [rcx+48], r14
|
mov qword ptr [rcx+48], r14
|
||||||
mov qword ptr [rcx+56], r15
|
mov qword ptr [rcx+56], r15
|
||||||
xor eax, r13d ;# write address register 2
|
xor eax, r13d ;# write address register 2
|
||||||
and eax, 262080
|
and eax, 1048512
|
||||||
lea rcx, [rsi+rax]
|
lea rcx, [rsi+rax]
|
||||||
mulpd xmm0, xmm4
|
mulpd xmm0, xmm4
|
||||||
mulpd xmm1, xmm5
|
mulpd xmm1, xmm5
|
||||||
|
|
|
@ -20,51 +20,51 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
//Integer
|
//Integer
|
||||||
#define WT_IADD_R 10
|
#define WT_IADD_R 12
|
||||||
#define WT_IADD_M 3
|
#define WT_IADD_M 3
|
||||||
#define WT_IADD_RC 10
|
#define WT_IADD_RC 12
|
||||||
#define WT_ISUB_R 10
|
#define WT_ISUB_R 12
|
||||||
#define WT_ISUB_M 3
|
#define WT_ISUB_M 3
|
||||||
#define WT_IMUL_9C 10
|
#define WT_IMUL_9C 10
|
||||||
#define WT_IMUL_R 20
|
#define WT_IMUL_R 16
|
||||||
#define WT_IMUL_M 6
|
#define WT_IMUL_M 4
|
||||||
#define WT_IMULH_R 6
|
#define WT_IMULH_R 4
|
||||||
#define WT_IMULH_M 2
|
#define WT_IMULH_M 1
|
||||||
#define WT_ISMULH_R 6
|
#define WT_ISMULH_R 4
|
||||||
#define WT_ISMULH_M 2
|
#define WT_ISMULH_M 1
|
||||||
#define WT_IDIV_C 4
|
#define WT_IDIV_C 4
|
||||||
#define WT_ISDIV_C 4
|
#define WT_ISDIV_C 4
|
||||||
#define WT_INEG_R 2
|
#define WT_INEG_R 2
|
||||||
#define WT_IXOR_R 12
|
#define WT_IXOR_R 12
|
||||||
#define WT_IXOR_M 4
|
#define WT_IXOR_M 3
|
||||||
#define WT_IROR_R 10
|
#define WT_IROR_R 12
|
||||||
#define WT_IROL_R 10
|
#define WT_IROL_R 12
|
||||||
|
|
||||||
//Common floating point
|
//Common floating point
|
||||||
#define WT_FPSWAP_R 6
|
#define WT_FPSWAP_R 8
|
||||||
|
|
||||||
//Floating point group F
|
//Floating point group F
|
||||||
#define WT_FPADD_R 18
|
#define WT_FPADD_R 20
|
||||||
#define WT_FPADD_M 3
|
#define WT_FPADD_M 5
|
||||||
#define WT_FPSUB_R 18
|
#define WT_FPSUB_R 20
|
||||||
#define WT_FPSUB_M 3
|
#define WT_FPSUB_M 5
|
||||||
#define WT_FPNEG_R 5
|
#define WT_FPNEG_R 6
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
#define WT_FPMUL_R 18
|
#define WT_FPMUL_R 16
|
||||||
#define WT_FPMUL_M 3
|
#define WT_FPMUL_M 4
|
||||||
#define WT_FPDIV_R 6
|
#define WT_FPDIV_R 7
|
||||||
#define WT_FPDIV_M 1
|
#define WT_FPDIV_M 1
|
||||||
#define WT_FPSQRT_R 6
|
#define WT_FPSQRT_R 6
|
||||||
|
|
||||||
//Control
|
//Control
|
||||||
#define WT_COND_R 12
|
#define WT_COND_R 7
|
||||||
#define WT_COND_M 4
|
#define WT_COND_M 1
|
||||||
#define WT_CFROUND 1
|
#define WT_CFROUND 1
|
||||||
|
|
||||||
//Store
|
//Store
|
||||||
#define WT_ISTORE 12
|
#define WT_ISTORE 18
|
||||||
#define WT_FSTORE 6
|
#define WT_FSTORE 0
|
||||||
|
|
||||||
#define WT_NOP 0
|
#define WT_NOP 0
|
||||||
|
|
||||||
|
@ -115,6 +115,7 @@ static_assert(wtSum == 256,
|
||||||
#define REP33(x) REP32(x) x,
|
#define REP33(x) REP32(x) x,
|
||||||
#define REP40(x) REP32(x) REP8(x)
|
#define REP40(x) REP32(x) REP8(x)
|
||||||
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
|
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
|
||||||
|
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
|
||||||
#define REP256(x) REP128(x) REP128(x)
|
#define REP256(x) REP128(x) REP128(x)
|
||||||
#define REPNX(x,N) REP##N(x)
|
#define REPNX(x,N) REP##N(x)
|
||||||
#define REPN(x,N) REPNX(x,N)
|
#define REPN(x,N) REPNX(x,N)
|
||||||
|
|
|
@ -169,12 +169,10 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash
|
||||||
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0);
|
||||||
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8);
|
||||||
vm->initializeScratchpad(scratchpad, spIndex);
|
vm->initializeScratchpad(scratchpad, spIndex);
|
||||||
//vm->initializeProgram(hash);
|
vm->setScratchpad(scratchpad);
|
||||||
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
|
||||||
for (int chain = 0; chain < 16; ++chain) {
|
for (int chain = 0; chain < 16; ++chain) {
|
||||||
vm->initializeProgram(hash);
|
vm->initializeProgram(hash);
|
||||||
int segment = hash[3] & 3;
|
|
||||||
vm->setScratchpad(scratchpad + segment * RandomX::ScratchpadSize / 4);
|
|
||||||
vm->execute();
|
vm->execute();
|
||||||
vm->getResult(nullptr, 0, hash);
|
vm->getResult(nullptr, 0, hash);
|
||||||
}
|
}
|
||||||
|
|
760
src/program.inc
760
src/program.inc
|
@ -10,54 +10,54 @@
|
||||||
mulpd xmm6, xmm10
|
mulpd xmm6, xmm10
|
||||||
; IMUL_R r6, r3
|
; IMUL_R r6, r3
|
||||||
imul r14, r11
|
imul r14, r11
|
||||||
; FPMUL_R e1, a0
|
; FPSUB_M f1, L1[r4]
|
||||||
mulpd xmm5, xmm8
|
mov eax, r12d
|
||||||
; IROR_R r5, r3
|
and eax, 16376
|
||||||
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
|
subpd xmm1, xmm12
|
||||||
|
; IROL_R r5, r3
|
||||||
mov ecx, r11d
|
mov ecx, r11d
|
||||||
ror r13, cl
|
rol r13, cl
|
||||||
; FPMUL_R e2, a0
|
; FPMUL_R e2, a0
|
||||||
mulpd xmm6, xmm8
|
mulpd xmm6, xmm8
|
||||||
; FPNEG_R f3
|
; FPSUB_R f3, a0
|
||||||
xorps xmm3, xmm15
|
subpd xmm3, xmm8
|
||||||
; IXOR_R r0, r4
|
; IXOR_R r0, r4
|
||||||
xor r8, r12
|
xor r8, r12
|
||||||
; ISMULH_R r3, r7
|
; ISMULH_M r3, L1[r7]
|
||||||
|
mov ecx, r15d
|
||||||
|
and ecx, 16376
|
||||||
mov rax, r11
|
mov rax, r11
|
||||||
imul r15
|
imul qword ptr [rsi+rcx]
|
||||||
mov r11, rdx
|
mov r11, rdx
|
||||||
; FPSWAP_R f2
|
; FPSWAP_R f2
|
||||||
shufpd xmm2, xmm2, 1
|
shufpd xmm2, xmm2, 1
|
||||||
; ISMULH_R r6, r0
|
; IDIV_C r6, 1248528248
|
||||||
mov rax, r14
|
mov rax, 15864311168205210203
|
||||||
imul r8
|
mul r14
|
||||||
mov r14, rdx
|
shr rdx, 30
|
||||||
|
add r14, rdx
|
||||||
; FPMUL_R e0, a2
|
; FPMUL_R e0, a2
|
||||||
mulpd xmm4, xmm10
|
mulpd xmm4, xmm10
|
||||||
; ISUB_R r3, r4
|
; IADD_RC r3, r4, -52260428
|
||||||
sub r11, r12
|
lea r11, [r11+r12-52260428]
|
||||||
; IADD_R r7, -1138617760
|
; IADD_R r7, -1138617760
|
||||||
add r15, -1138617760
|
add r15, -1138617760
|
||||||
; IROR_R r2, r6
|
; IROL_R r2, r6
|
||||||
mov ecx, r14d
|
mov ecx, r14d
|
||||||
ror r10, cl
|
rol r10, cl
|
||||||
; FPMUL_R e2, a1
|
; FPNEG_R f2
|
||||||
mulpd xmm6, xmm9
|
xorps xmm2, xmm15
|
||||||
; IROR_R r7, r1
|
; IROR_R r7, r1
|
||||||
mov ecx, r9d
|
mov ecx, r9d
|
||||||
ror r15, cl
|
ror r15, cl
|
||||||
; COND_M r2, lt(L1[r7], -41618808)
|
; COND_R r2, lt(r7, -41618808)
|
||||||
xor ecx, ecx
|
xor ecx, ecx
|
||||||
mov eax, r15d
|
cmp r15d, -41618808
|
||||||
and eax, 16376
|
|
||||||
cmp dword ptr [rsi+rax], -41618808
|
|
||||||
setl cl
|
setl cl
|
||||||
add r10, rcx
|
add r10, rcx
|
||||||
; FPMUL_M e3, L1[r0]
|
; FPMUL_R e3, a0
|
||||||
mov eax, r8d
|
mulpd xmm7, xmm8
|
||||||
and eax, 16376
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
mulpd xmm7, xmm12
|
|
||||||
maxpd xmm7, xmm13
|
|
||||||
; CFROUND r1, 43
|
; CFROUND r1, 43
|
||||||
mov rax, r9
|
mov rax, r9
|
||||||
rol rax, 34
|
rol rax, 34
|
||||||
|
@ -67,14 +67,17 @@
|
||||||
ldmxcsr dword ptr [rsp-8]
|
ldmxcsr dword ptr [rsp-8]
|
||||||
; FPADD_R f2, a1
|
; FPADD_R f2, a1
|
||||||
addpd xmm2, xmm9
|
addpd xmm2, xmm9
|
||||||
; FPNEG_R f0
|
; FPSUB_M f0, L1[r7]
|
||||||
xorps xmm0, xmm15
|
mov eax, r15d
|
||||||
; FSTORE L1[r6], f2
|
and eax, 16376
|
||||||
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
|
subpd xmm0, xmm12
|
||||||
|
; ISTORE L1[r6], r2
|
||||||
mov eax, r14d
|
mov eax, r14d
|
||||||
and eax, 16368
|
and eax, 16376
|
||||||
movapd xmmword ptr [rsi+rax], xmm2
|
mov qword ptr [rsi+rax], r10
|
||||||
; IMUL_9C r6, -45112665
|
; ISUB_R r6, r5
|
||||||
lea r14, [r14+r14*8-45112665]
|
sub r14, r13
|
||||||
; IADD_M r0, L1[r4]
|
; IADD_M r0, L1[r4]
|
||||||
mov eax, r12d
|
mov eax, r12d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
|
@ -87,41 +90,30 @@
|
||||||
mov eax, r14d
|
mov eax, r14d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
mov qword ptr [rsi+rax], r14
|
mov qword ptr [rsi+rax], r14
|
||||||
; COND_R r4, sg(r1, -1189096105)
|
; FPSQRT_R e0
|
||||||
xor ecx, ecx
|
sqrtpd xmm4, xmm4
|
||||||
cmp r9d, -1189096105
|
|
||||||
sets cl
|
|
||||||
add r12, rcx
|
|
||||||
; IXOR_R r2, r5
|
; IXOR_R r2, r5
|
||||||
xor r10, r13
|
xor r10, r13
|
||||||
; COND_R r1, be(r5, -965180434)
|
; FPSQRT_R e1
|
||||||
xor ecx, ecx
|
sqrtpd xmm5, xmm5
|
||||||
cmp r13d, -965180434
|
; FPMUL_R e1, a3
|
||||||
setbe cl
|
mulpd xmm5, xmm11
|
||||||
add r9, rcx
|
|
||||||
; FPMUL_M e1, L2[r3]
|
|
||||||
mov eax, r11d
|
|
||||||
and eax, 262136
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
mulpd xmm5, xmm12
|
|
||||||
maxpd xmm5, xmm13
|
|
||||||
; IMULH_R r7, r6
|
; IMULH_R r7, r6
|
||||||
mov rax, r15
|
mov rax, r15
|
||||||
mul r14
|
mul r14
|
||||||
mov r15, rdx
|
mov r15, rdx
|
||||||
; ISMULH_M r0, L1[r4]
|
; ISDIV_C r0, -1706892622
|
||||||
mov ecx, r12d
|
mov rax, -5802075764249827661
|
||||||
and ecx, 16376
|
imul r8
|
||||||
mov rax, r8
|
xor eax, eax
|
||||||
imul qword ptr [rsi+rcx]
|
sar rdx, 29
|
||||||
mov r8, rdx
|
sets al
|
||||||
|
add rdx, rax
|
||||||
|
add r8, rdx
|
||||||
; IMUL_R r5, r3
|
; IMUL_R r5, r3
|
||||||
imul r13, r11
|
imul r13, r11
|
||||||
; COND_R r2, of(r0, -1045938770)
|
; FPSQRT_R e2
|
||||||
xor ecx, ecx
|
sqrtpd xmm6, xmm6
|
||||||
cmp r8d, -1045938770
|
|
||||||
seto cl
|
|
||||||
add r10, rcx
|
|
||||||
; FPADD_M f3, L1[r4]
|
; FPADD_M f3, L1[r4]
|
||||||
mov eax, r12d
|
mov eax, r12d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
|
@ -131,18 +123,19 @@
|
||||||
add r11, r10
|
add r11, r10
|
||||||
; FPADD_R f1, a0
|
; FPADD_R f1, a0
|
||||||
addpd xmm1, xmm8
|
addpd xmm1, xmm8
|
||||||
; FPSQRT_R e3
|
; FPDIV_R e3, a2
|
||||||
sqrtpd xmm7, xmm7
|
divpd xmm7, xmm10
|
||||||
|
maxpd xmm7, xmm13
|
||||||
; FPSUB_R f0, a1
|
; FPSUB_R f0, a1
|
||||||
subpd xmm0, xmm9
|
subpd xmm0, xmm9
|
||||||
; IMUL_M r5, L1[r6]
|
; IMUL_M r5, L1[r6]
|
||||||
mov eax, r14d
|
mov eax, r14d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
imul r13, qword ptr [rsi+rax]
|
imul r13, qword ptr [rsi+rax]
|
||||||
; ISUB_R r1, r2
|
; IADD_RC r1, r2, -1263285243
|
||||||
sub r9, r10
|
lea r9, [r9+r10-1263285243]
|
||||||
; IMUL_R r4, r6
|
; IMUL_9C r4, 1994773931
|
||||||
imul r12, r14
|
lea r12, [r12+r12*8+1994773931]
|
||||||
; FPSWAP_R e3
|
; FPSWAP_R e3
|
||||||
shufpd xmm7, xmm7, 1
|
shufpd xmm7, xmm7, 1
|
||||||
; IMUL_M r0, L1[r7]
|
; IMUL_M r0, L1[r7]
|
||||||
|
@ -152,69 +145,72 @@
|
||||||
; IROR_R r1, r6
|
; IROR_R r1, r6
|
||||||
mov ecx, r14d
|
mov ecx, r14d
|
||||||
ror r9, cl
|
ror r9, cl
|
||||||
; IROR_R r2, r4
|
; IROL_R r2, r4
|
||||||
mov ecx, r12d
|
mov ecx, r12d
|
||||||
ror r10, cl
|
rol r10, cl
|
||||||
; FPSUB_R f3, a1
|
; FPSUB_R f3, a1
|
||||||
subpd xmm3, xmm9
|
subpd xmm3, xmm9
|
||||||
; FSTORE L1[r0], e1
|
; ISTORE L1[r0], r5
|
||||||
mov eax, r8d
|
mov eax, r8d
|
||||||
and eax, 16368
|
and eax, 16376
|
||||||
movapd xmmword ptr [rsi+rax], xmm5
|
mov qword ptr [rsi+rax], r13
|
||||||
; COND_R r2, sg(r3, 1269153133)
|
; FPDIV_M e2, L2[r3]
|
||||||
xor ecx, ecx
|
mov eax, r11d
|
||||||
cmp r11d, 1269153133
|
and eax, 262136
|
||||||
sets cl
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
add r10, rcx
|
divpd xmm6, xmm12
|
||||||
|
maxpd xmm6, xmm13
|
||||||
; FPSWAP_R f2
|
; FPSWAP_R f2
|
||||||
shufpd xmm2, xmm2, 1
|
shufpd xmm2, xmm2, 1
|
||||||
; IADD_R r7, r5
|
; IADD_R r7, r5
|
||||||
add r15, r13
|
add r15, r13
|
||||||
; COND_R r0, be(r4, -1486502150)
|
; FPDIV_M e0, L1[r4]
|
||||||
xor ecx, ecx
|
mov eax, r12d
|
||||||
cmp r12d, -1486502150
|
and eax, 16376
|
||||||
setbe cl
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
add r8, rcx
|
divpd xmm4, xmm12
|
||||||
; FPSUB_R f3, a1
|
maxpd xmm4, xmm13
|
||||||
subpd xmm3, xmm9
|
; FPADD_M f3, L1[r5]
|
||||||
|
mov eax, r13d
|
||||||
|
and eax, 16376
|
||||||
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
|
addpd xmm3, xmm12
|
||||||
; FPADD_R f0, a3
|
; FPADD_R f0, a3
|
||||||
addpd xmm0, xmm11
|
addpd xmm0, xmm11
|
||||||
; IADD_R r2, r0
|
; IADD_R r2, r0
|
||||||
add r10, r8
|
add r10, r8
|
||||||
; FSTORE L1[r3], e2
|
; ISTORE L1[r3], r6
|
||||||
mov eax, r11d
|
mov eax, r11d
|
||||||
and eax, 16368
|
and eax, 16376
|
||||||
movapd xmmword ptr [rsi+rax], xmm6
|
mov qword ptr [rsi+rax], r14
|
||||||
; IXOR_R r1, r7
|
; IROR_R r1, r7
|
||||||
xor r9, r15
|
mov ecx, r15d
|
||||||
; IMUL_R r5, r7
|
ror r9, cl
|
||||||
imul r13, r15
|
; IMUL_9C r5, 301671287
|
||||||
|
lea r13, [r13+r13*8+301671287]
|
||||||
; IXOR_R r7, 266992378
|
; IXOR_R r7, 266992378
|
||||||
xor r15, 266992378
|
xor r15, 266992378
|
||||||
; COND_R r7, no(r4, 1983804692)
|
; FPSQRT_R e3
|
||||||
xor ecx, ecx
|
sqrtpd xmm7, xmm7
|
||||||
cmp r12d, 1983804692
|
|
||||||
setno cl
|
|
||||||
add r15, rcx
|
|
||||||
; IMUL_M r2, L2[r0]
|
; IMUL_M r2, L2[r0]
|
||||||
mov eax, r8d
|
mov eax, r8d
|
||||||
and eax, 262136
|
and eax, 262136
|
||||||
imul r10, qword ptr [rsi+rax]
|
imul r10, qword ptr [rsi+rax]
|
||||||
; FPDIV_R e3, a2
|
; FPMUL_R e3, a2
|
||||||
divpd xmm7, xmm10
|
mulpd xmm7, xmm10
|
||||||
maxpd xmm7, xmm13
|
; IMUL_R r0, r6
|
||||||
; IMUL_M r0, L2[r6]
|
imul r8, r14
|
||||||
mov eax, r14d
|
|
||||||
and eax, 262136
|
|
||||||
imul r8, qword ptr [rsi+rax]
|
|
||||||
; ISTORE L1[r0], r7
|
; ISTORE L1[r0], r7
|
||||||
mov eax, r8d
|
mov eax, r8d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
mov qword ptr [rsi+rax], r15
|
mov qword ptr [rsi+rax], r15
|
||||||
; FPMUL_R e0, a1
|
; FPNEG_R f0
|
||||||
mulpd xmm4, xmm9
|
xorps xmm0, xmm15
|
||||||
; FPSUB_R f3, a1
|
; FPADD_M f3, L1[r5]
|
||||||
subpd xmm3, xmm9
|
mov eax, r13d
|
||||||
|
and eax, 16376
|
||||||
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
|
addpd xmm3, xmm12
|
||||||
; IROR_R r5, r4
|
; IROR_R r5, r4
|
||||||
mov ecx, r12d
|
mov ecx, r12d
|
||||||
ror r13, cl
|
ror r13, cl
|
||||||
|
@ -222,17 +218,20 @@
|
||||||
mov eax, r15d
|
mov eax, r15d
|
||||||
and eax, 262136
|
and eax, 262136
|
||||||
mov qword ptr [rsi+rax], r10
|
mov qword ptr [rsi+rax], r10
|
||||||
; FPSWAP_R e2
|
; FPADD_R f2, a3
|
||||||
shufpd xmm6, xmm6, 1
|
addpd xmm2, xmm11
|
||||||
; FPADD_M f3, L1[r2]
|
; FPADD_M f3, L1[r2]
|
||||||
mov eax, r10d
|
mov eax, r10d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
addpd xmm3, xmm12
|
addpd xmm3, xmm12
|
||||||
; IDIV_C r5, 2218798981
|
; ISDIV_C r5, -2076168315
|
||||||
mov rax, 17853839665672790751
|
mov rax, -4770095103914078469
|
||||||
mul r13
|
imul r13
|
||||||
shr rdx, 31
|
xor eax, eax
|
||||||
|
sar rdx, 29
|
||||||
|
sets al
|
||||||
|
add rdx, rax
|
||||||
add r13, rdx
|
add r13, rdx
|
||||||
; IADD_RC r0, r4, -1321374359
|
; IADD_RC r0, r4, -1321374359
|
||||||
lea r8, [r8+r12-1321374359]
|
lea r8, [r8+r12-1321374359]
|
||||||
|
@ -250,28 +249,26 @@
|
||||||
rol r15, cl
|
rol r15, cl
|
||||||
; ISUB_R r2, r4
|
; ISUB_R r2, r4
|
||||||
sub r10, r12
|
sub r10, r12
|
||||||
; IMULH_M r0, L1[12400]
|
; ISMULH_R r0, -1500893068
|
||||||
mov rax, r8
|
mov rax, -1500893068
|
||||||
mul qword ptr [rsi+12400]
|
imul r8
|
||||||
mov r8, rdx
|
add r8, rdx
|
||||||
; IADD_R r2, r3
|
; IADD_R r2, r3
|
||||||
add r10, r11
|
add r10, r11
|
||||||
; COND_R r6, lt(r1, -1124202227)
|
; FPSQRT_R e2
|
||||||
xor ecx, ecx
|
sqrtpd xmm6, xmm6
|
||||||
cmp r9d, -1124202227
|
; IROL_R r7, r4
|
||||||
setl cl
|
|
||||||
add r14, rcx
|
|
||||||
; IROR_R r7, r4
|
|
||||||
mov ecx, r12d
|
mov ecx, r12d
|
||||||
ror r15, cl
|
rol r15, cl
|
||||||
; IMUL_R r4, r2
|
; IMUL_R r4, r2
|
||||||
imul r12, r10
|
imul r12, r10
|
||||||
; ISUB_R r3, r7
|
; ISUB_R r3, r7
|
||||||
sub r11, r15
|
sub r11, r15
|
||||||
; IADD_R r2, r7
|
; IADD_R r2, r7
|
||||||
add r10, r15
|
add r10, r15
|
||||||
; FPSQRT_R e3
|
; FPDIV_R e3, a0
|
||||||
sqrtpd xmm7, xmm7
|
divpd xmm7, xmm8
|
||||||
|
maxpd xmm7, xmm13
|
||||||
; ISUB_R r6, 540663146
|
; ISUB_R r6, 540663146
|
||||||
sub r14, 540663146
|
sub r14, 540663146
|
||||||
; IROL_R r5, 58
|
; IROL_R r5, 58
|
||||||
|
@ -280,67 +277,65 @@
|
||||||
addpd xmm2, xmm9
|
addpd xmm2, xmm9
|
||||||
; FPADD_R f2, a2
|
; FPADD_R f2, a2
|
||||||
addpd xmm2, xmm10
|
addpd xmm2, xmm10
|
||||||
; FPSQRT_R e1
|
; FPDIV_R e1, a2
|
||||||
sqrtpd xmm5, xmm5
|
divpd xmm5, xmm10
|
||||||
|
maxpd xmm5, xmm13
|
||||||
; FPADD_R f1, a2
|
; FPADD_R f1, a2
|
||||||
addpd xmm1, xmm10
|
addpd xmm1, xmm10
|
||||||
; IADD_R r5, r3
|
; IADD_R r5, r3
|
||||||
add r13, r11
|
add r13, r11
|
||||||
; IADD_M r7, L1[880]
|
; IADD_R r7, -1780268176
|
||||||
add r15, qword ptr [rsi+880]
|
add r15, -1780268176
|
||||||
; ISUB_R r7, r0
|
; ISUB_R r7, r0
|
||||||
sub r15, r8
|
sub r15, r8
|
||||||
; ISTORE L2[r0], r7
|
; ISTORE L2[r0], r7
|
||||||
mov eax, r8d
|
mov eax, r8d
|
||||||
and eax, 262136
|
and eax, 262136
|
||||||
mov qword ptr [rsi+rax], r15
|
mov qword ptr [rsi+rax], r15
|
||||||
; IDIV_C r2, 1014940364
|
; INEG_R r2
|
||||||
mov rax, r10
|
neg r10
|
||||||
shr rax, 2
|
; FPNEG_R f0
|
||||||
mov rcx, 1219717022984988185
|
xorps xmm0, xmm15
|
||||||
mul rcx
|
; INEG_R r2
|
||||||
shr rdx, 24
|
neg r10
|
||||||
add r10, rdx
|
|
||||||
; FPMUL_R e0, a2
|
|
||||||
mulpd xmm4, xmm10
|
|
||||||
; IDIV_C r2, 3059159304
|
|
||||||
mov rax, 12949335853590502915
|
|
||||||
mul r10
|
|
||||||
shr rdx, 31
|
|
||||||
add r10, rdx
|
|
||||||
; IADD_R r0, r3
|
; IADD_R r0, r3
|
||||||
add r8, r11
|
add r8, r11
|
||||||
; IMUL_9C r7, -2124093035
|
; IMUL_9C r7, -2124093035
|
||||||
lea r15, [r15+r15*8-2124093035]
|
lea r15, [r15+r15*8-2124093035]
|
||||||
; FPSUB_R f2, a0
|
; FPADD_M f2, L1[r0]
|
||||||
subpd xmm2, xmm8
|
mov eax, r8d
|
||||||
; FPDIV_R e0, a2
|
and eax, 16376
|
||||||
divpd xmm4, xmm10
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
|
addpd xmm2, xmm12
|
||||||
|
; FPMUL_M e0, L1[r6]
|
||||||
|
mov eax, r14d
|
||||||
|
and eax, 16376
|
||||||
|
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
||||||
|
mulpd xmm4, xmm12
|
||||||
maxpd xmm4, xmm13
|
maxpd xmm4, xmm13
|
||||||
; FPSUB_R f2, a3
|
; FPSUB_R f2, a3
|
||||||
subpd xmm2, xmm11
|
subpd xmm2, xmm11
|
||||||
; IMUL_R r1, r2
|
; IMUL_R r1, r2
|
||||||
imul r9, r10
|
imul r9, r10
|
||||||
; ISMULH_R r7, r5
|
; IDIV_C r7, 3214009572
|
||||||
mov rax, r15
|
mov rax, 12325439725582798855
|
||||||
imul r13
|
mul r15
|
||||||
mov r15, rdx
|
shr rdx, 31
|
||||||
|
add r15, rdx
|
||||||
; IMULH_R r3, r2
|
; IMULH_R r3, r2
|
||||||
mov rax, r11
|
mov rax, r11
|
||||||
mul r10
|
mul r10
|
||||||
mov r11, rdx
|
mov r11, rdx
|
||||||
; IXOR_M r1, L2[r0]
|
; IROR_R r1, r0
|
||||||
mov eax, r8d
|
mov ecx, r8d
|
||||||
and eax, 262136
|
ror r9, cl
|
||||||
xor r9, qword ptr [rsi+rax]
|
|
||||||
; FPMUL_R e0, a1
|
; FPMUL_R e0, a1
|
||||||
mulpd xmm4, xmm9
|
mulpd xmm4, xmm9
|
||||||
; ISUB_R r4, 1456841848
|
; IADD_RC r4, r4, 1456841848
|
||||||
sub r12, 1456841848
|
lea r12, [r12+r12+1456841848]
|
||||||
; IXOR_M r3, L2[r2]
|
; IROR_R r3, r2
|
||||||
mov eax, r10d
|
mov ecx, r10d
|
||||||
and eax, 262136
|
ror r11, cl
|
||||||
xor r11, qword ptr [rsi+rax]
|
|
||||||
; COND_M r0, of(L1[r4], 1678513610)
|
; COND_M r0, of(L1[r4], 1678513610)
|
||||||
xor ecx, ecx
|
xor ecx, ecx
|
||||||
mov eax, r12d
|
mov eax, r12d
|
||||||
|
@ -348,446 +343,39 @@
|
||||||
cmp dword ptr [rsi+rax], 1678513610
|
cmp dword ptr [rsi+rax], 1678513610
|
||||||
seto cl
|
seto cl
|
||||||
add r8, rcx
|
add r8, rcx
|
||||||
; IDIV_C r4, 2674394209
|
; INEG_R r4
|
||||||
mov rax, 925772300223658071
|
neg r12
|
||||||
mul r12
|
|
||||||
shr rdx, 27
|
|
||||||
add r12, rdx
|
|
||||||
; IMUL_R r4, r1
|
; IMUL_R r4, r1
|
||||||
imul r12, r9
|
imul r12, r9
|
||||||
; FPADD_R f1, a2
|
; FPADD_R f1, a2
|
||||||
addpd xmm1, xmm10
|
addpd xmm1, xmm10
|
||||||
; FPSUB_R f2, a0
|
; FPSUB_R f2, a0
|
||||||
subpd xmm2, xmm8
|
subpd xmm2, xmm8
|
||||||
; FPMUL_M e1, L2[r6]
|
; FPMUL_R e1, a2
|
||||||
mov eax, r14d
|
mulpd xmm5, xmm10
|
||||||
and eax, 262136
|
; FPSUB_R f0, a3
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
subpd xmm0, xmm11
|
||||||
mulpd xmm5, xmm12
|
|
||||||
maxpd xmm5, xmm13
|
|
||||||
; FPSUB_M f0, L2[r3]
|
|
||||||
mov eax, r11d
|
|
||||||
and eax, 262136
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
subpd xmm0, xmm12
|
|
||||||
; IROR_R r0, r7
|
; IROR_R r0, r7
|
||||||
mov ecx, r15d
|
mov ecx, r15d
|
||||||
ror r8, cl
|
ror r8, cl
|
||||||
; FSTORE L2[r1], e0
|
; ISTORE L2[r1], r4
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
and eax, 262128
|
and eax, 262136
|
||||||
movapd xmmword ptr [rsi+rax], xmm4
|
mov qword ptr [rsi+rax], r12
|
||||||
; IROR_R r7, r6
|
; IROL_R r7, r6
|
||||||
mov ecx, r14d
|
mov ecx, r14d
|
||||||
ror r15, cl
|
rol r15, cl
|
||||||
; IMUL_9C r2, 266593902
|
; IMUL_9C r2, 266593902
|
||||||
lea r10, [r10+r10*8+266593902]
|
lea r10, [r10+r10*8+266593902]
|
||||||
; IMUL_R r4, r6
|
; IMUL_R r4, r6
|
||||||
imul r12, r14
|
imul r12, r14
|
||||||
; FPSUB_R f2, a2
|
; FPSUB_R f2, a2
|
||||||
subpd xmm2, xmm10
|
subpd xmm2, xmm10
|
||||||
; FPMUL_R e3, a0
|
; FPNEG_R f3
|
||||||
mulpd xmm7, xmm8
|
xorps xmm3, xmm15
|
||||||
; IXOR_M r7, L1[r2]
|
; IROR_R r7, r2
|
||||||
mov eax, r10d
|
mov ecx, r10d
|
||||||
and eax, 16376
|
ror r15, cl
|
||||||
xor r15, qword ptr [rsi+rax]
|
|
||||||
; IROR_R r0, r5
|
; IROR_R r0, r5
|
||||||
mov ecx, r13d
|
mov ecx, r13d
|
||||||
ror r8, cl
|
ror r8, cl
|
||||||
; FPADD_R f1, a2
|
|
||||||
addpd xmm1, xmm10
|
|
||||||
; FPSQRT_R e3
|
|
||||||
sqrtpd xmm7, xmm7
|
|
||||||
; FPADD_R f3, a1
|
|
||||||
addpd xmm3, xmm9
|
|
||||||
; FPADD_R f1, a0
|
|
||||||
addpd xmm1, xmm8
|
|
||||||
; COND_M r2, ge(L2[r2], -226330940)
|
|
||||||
xor ecx, ecx
|
|
||||||
mov eax, r10d
|
|
||||||
and eax, 262136
|
|
||||||
cmp dword ptr [rsi+rax], -226330940
|
|
||||||
setge cl
|
|
||||||
add r10, rcx
|
|
||||||
; FPDIV_R e2, a3
|
|
||||||
divpd xmm6, xmm11
|
|
||||||
maxpd xmm6, xmm13
|
|
||||||
; FPMUL_R e2, a1
|
|
||||||
mulpd xmm6, xmm9
|
|
||||||
; FPSUB_R f1, a0
|
|
||||||
subpd xmm1, xmm8
|
|
||||||
; IMUL_R r7, r5
|
|
||||||
imul r15, r13
|
|
||||||
; IMUL_R r0, r1
|
|
||||||
imul r8, r9
|
|
||||||
; FPSUB_R f3, a1
|
|
||||||
subpd xmm3, xmm9
|
|
||||||
; IROL_R r3, r5
|
|
||||||
mov ecx, r13d
|
|
||||||
rol r11, cl
|
|
||||||
; IADD_RC r5, r2, 795784298
|
|
||||||
lea r13, [r13+r10+795784298]
|
|
||||||
; ISUB_R r0, r4
|
|
||||||
sub r8, r12
|
|
||||||
; IMUL_R r5, r4
|
|
||||||
imul r13, r12
|
|
||||||
; FPSUB_R f0, a2
|
|
||||||
subpd xmm0, xmm10
|
|
||||||
; FPMUL_R e3, a1
|
|
||||||
mulpd xmm7, xmm9
|
|
||||||
; ISDIV_C r3, 1662492575
|
|
||||||
mov rax, 2978515652703905219
|
|
||||||
imul r11
|
|
||||||
xor eax, eax
|
|
||||||
sar rdx, 28
|
|
||||||
sets al
|
|
||||||
add rdx, rax
|
|
||||||
add r11, rdx
|
|
||||||
; ISMULH_R r5, r0
|
|
||||||
mov rax, r13
|
|
||||||
imul r8
|
|
||||||
mov r13, rdx
|
|
||||||
; ISDIV_C r4, 1963597892
|
|
||||||
mov rax, -8359627607928540073
|
|
||||||
imul r12
|
|
||||||
xor eax, eax
|
|
||||||
add rdx, r12
|
|
||||||
sar rdx, 30
|
|
||||||
sets al
|
|
||||||
add rdx, rax
|
|
||||||
add r12, rdx
|
|
||||||
; IMUL_R r7, r0
|
|
||||||
imul r15, r8
|
|
||||||
; IMULH_M r0, L1[r3]
|
|
||||||
mov ecx, r11d
|
|
||||||
and ecx, 16376
|
|
||||||
mov rax, r8
|
|
||||||
mul qword ptr [rsi+rcx]
|
|
||||||
mov r8, rdx
|
|
||||||
; IXOR_R r3, r7
|
|
||||||
xor r11, r15
|
|
||||||
; IDIV_C r4, 1146125335
|
|
||||||
mov rax, 8640870253760721727
|
|
||||||
mul r12
|
|
||||||
shr rdx, 29
|
|
||||||
add r12, rdx
|
|
||||||
; FPSWAP_R f3
|
|
||||||
shufpd xmm3, xmm3, 1
|
|
||||||
; IXOR_M r2, L1[r0]
|
|
||||||
mov eax, r8d
|
|
||||||
and eax, 16376
|
|
||||||
xor r10, qword ptr [rsi+rax]
|
|
||||||
; IROR_R r0, r1
|
|
||||||
mov ecx, r9d
|
|
||||||
ror r8, cl
|
|
||||||
; IXOR_R r7, r4
|
|
||||||
xor r15, r12
|
|
||||||
; ISMULH_R r6, r2
|
|
||||||
mov rax, r14
|
|
||||||
imul r10
|
|
||||||
mov r14, rdx
|
|
||||||
; FPMUL_R e3, a2
|
|
||||||
mulpd xmm7, xmm10
|
|
||||||
; IADD_RC r4, r2, 1704868083
|
|
||||||
lea r12, [r12+r10+1704868083]
|
|
||||||
; FPSUB_R f2, a0
|
|
||||||
subpd xmm2, xmm8
|
|
||||||
; ISTORE L1[r0], r0
|
|
||||||
mov eax, r8d
|
|
||||||
and eax, 16376
|
|
||||||
mov qword ptr [rsi+rax], r8
|
|
||||||
; FPSUB_R f0, a3
|
|
||||||
subpd xmm0, xmm11
|
|
||||||
; FPDIV_R e0, a3
|
|
||||||
divpd xmm4, xmm11
|
|
||||||
maxpd xmm4, xmm13
|
|
||||||
; FPMUL_R e3, a2
|
|
||||||
mulpd xmm7, xmm10
|
|
||||||
; ISUB_R r7, 1302457878
|
|
||||||
sub r15, 1302457878
|
|
||||||
; IMUL_9C r1, 1330165941
|
|
||||||
lea r9, [r9+r9*8+1330165941]
|
|
||||||
; FPMUL_R e1, a3
|
|
||||||
mulpd xmm5, xmm11
|
|
||||||
; IROL_R r0, r4
|
|
||||||
mov ecx, r12d
|
|
||||||
rol r8, cl
|
|
||||||
; FPSUB_M f1, L1[r0]
|
|
||||||
mov eax, r8d
|
|
||||||
and eax, 16376
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
subpd xmm1, xmm12
|
|
||||||
; IROL_R r5, r6
|
|
||||||
mov ecx, r14d
|
|
||||||
rol r13, cl
|
|
||||||
; COND_M r0, ab(L1[r1], -310933871)
|
|
||||||
xor ecx, ecx
|
|
||||||
mov eax, r9d
|
|
||||||
and eax, 16376
|
|
||||||
cmp dword ptr [rsi+rax], -310933871
|
|
||||||
seta cl
|
|
||||||
add r8, rcx
|
|
||||||
; CFROUND r7, 39
|
|
||||||
mov rax, r15
|
|
||||||
rol rax, 38
|
|
||||||
and eax, 24576
|
|
||||||
or eax, 40896
|
|
||||||
mov dword ptr [rsp-8], eax
|
|
||||||
ldmxcsr dword ptr [rsp-8]
|
|
||||||
; FPDIV_R e0, a1
|
|
||||||
divpd xmm4, xmm9
|
|
||||||
maxpd xmm4, xmm13
|
|
||||||
; IMUL_M r1, L1[r3]
|
|
||||||
mov eax, r11d
|
|
||||||
and eax, 16376
|
|
||||||
imul r9, qword ptr [rsi+rax]
|
|
||||||
; IMUL_9C r3, 1573236728
|
|
||||||
lea r11, [r11+r11*8+1573236728]
|
|
||||||
; FPNEG_R f3
|
|
||||||
xorps xmm3, xmm15
|
|
||||||
; COND_R r1, lt(r4, -1805702334)
|
|
||||||
xor ecx, ecx
|
|
||||||
cmp r12d, -1805702334
|
|
||||||
setl cl
|
|
||||||
add r9, rcx
|
|
||||||
; FPSWAP_R f1
|
|
||||||
shufpd xmm1, xmm1, 1
|
|
||||||
; IADD_R r7, -1421188024
|
|
||||||
add r15, -1421188024
|
|
||||||
; FPMUL_R e3, a2
|
|
||||||
mulpd xmm7, xmm10
|
|
||||||
; FPSUB_M f2, L2[r7]
|
|
||||||
mov eax, r15d
|
|
||||||
and eax, 262136
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
subpd xmm2, xmm12
|
|
||||||
; FPSUB_R f3, a1
|
|
||||||
subpd xmm3, xmm9
|
|
||||||
; FPSQRT_R e1
|
|
||||||
sqrtpd xmm5, xmm5
|
|
||||||
; ISUB_R r2, r4
|
|
||||||
sub r10, r12
|
|
||||||
; ISMULH_R r4, r5
|
|
||||||
mov rax, r12
|
|
||||||
imul r13
|
|
||||||
mov r12, rdx
|
|
||||||
; COND_R r1, of(r7, 1294727006)
|
|
||||||
xor ecx, ecx
|
|
||||||
cmp r15d, 1294727006
|
|
||||||
seto cl
|
|
||||||
add r9, rcx
|
|
||||||
; IADD_M r5, L2[r2]
|
|
||||||
mov eax, r10d
|
|
||||||
and eax, 262136
|
|
||||||
add r13, qword ptr [rsi+rax]
|
|
||||||
; IMUL_9C r4, 401020510
|
|
||||||
lea r12, [r12+r12*8+401020510]
|
|
||||||
; IROL_R r3, r0
|
|
||||||
mov ecx, r8d
|
|
||||||
rol r11, cl
|
|
||||||
; ISTORE L1[r7], r0
|
|
||||||
mov eax, r15d
|
|
||||||
and eax, 16376
|
|
||||||
mov qword ptr [rsi+rax], r8
|
|
||||||
; FPSUB_R f2, a1
|
|
||||||
subpd xmm2, xmm9
|
|
||||||
; FPSQRT_R e3
|
|
||||||
sqrtpd xmm7, xmm7
|
|
||||||
; IMUL_R r3, 720965215
|
|
||||||
imul r11, 720965215
|
|
||||||
; IMUL_R r6, r2
|
|
||||||
imul r14, r10
|
|
||||||
; ISTORE L1[r7], r3
|
|
||||||
mov eax, r15d
|
|
||||||
and eax, 16376
|
|
||||||
mov qword ptr [rsi+rax], r11
|
|
||||||
; IROR_R r2, r6
|
|
||||||
mov ecx, r14d
|
|
||||||
ror r10, cl
|
|
||||||
; FPSQRT_R e3
|
|
||||||
sqrtpd xmm7, xmm7
|
|
||||||
; IMUL_9C r4, 788211341
|
|
||||||
lea r12, [r12+r12*8+788211341]
|
|
||||||
; IMUL_9C r3, -67993446
|
|
||||||
lea r11, [r11+r11*8-67993446]
|
|
||||||
; FPSWAP_R e3
|
|
||||||
shufpd xmm7, xmm7, 1
|
|
||||||
; IMUL_M r2, L1[r6]
|
|
||||||
mov eax, r14d
|
|
||||||
and eax, 16376
|
|
||||||
imul r10, qword ptr [rsi+rax]
|
|
||||||
; COND_M r2, ge(L1[r2], -1892157506)
|
|
||||||
xor ecx, ecx
|
|
||||||
mov eax, r10d
|
|
||||||
and eax, 16376
|
|
||||||
cmp dword ptr [rsi+rax], -1892157506
|
|
||||||
setge cl
|
|
||||||
add r10, rcx
|
|
||||||
; FPADD_M f1, L1[r3]
|
|
||||||
mov eax, r11d
|
|
||||||
and eax, 16376
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
addpd xmm1, xmm12
|
|
||||||
; IADD_M r7, L1[r0]
|
|
||||||
mov eax, r8d
|
|
||||||
and eax, 16376
|
|
||||||
add r15, qword ptr [rsi+rax]
|
|
||||||
; ISDIV_C r1, 624867857
|
|
||||||
mov rax, 7924491717200811467
|
|
||||||
imul r9
|
|
||||||
xor eax, eax
|
|
||||||
sar rdx, 28
|
|
||||||
sets al
|
|
||||||
add rdx, rax
|
|
||||||
add r9, rdx
|
|
||||||
; FPADD_R f0, a1
|
|
||||||
addpd xmm0, xmm9
|
|
||||||
; ISUB_R r5, r7
|
|
||||||
sub r13, r15
|
|
||||||
; FPNEG_R f0
|
|
||||||
xorps xmm0, xmm15
|
|
||||||
; IMUL_R r6, r2
|
|
||||||
imul r14, r10
|
|
||||||
; FPMUL_M e3, L1[r1]
|
|
||||||
mov eax, r9d
|
|
||||||
and eax, 16376
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
mulpd xmm7, xmm12
|
|
||||||
maxpd xmm7, xmm13
|
|
||||||
; IADD_R r0, r4
|
|
||||||
add r8, r12
|
|
||||||
; FPSUB_M f3, L1[r1]
|
|
||||||
mov eax, r9d
|
|
||||||
and eax, 16376
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
subpd xmm3, xmm12
|
|
||||||
; FPMUL_R e2, a0
|
|
||||||
mulpd xmm6, xmm8
|
|
||||||
; INEG_R r2
|
|
||||||
neg r10
|
|
||||||
; FPMUL_R e2, a2
|
|
||||||
mulpd xmm6, xmm10
|
|
||||||
; FPSUB_M f3, L1[r6]
|
|
||||||
mov eax, r14d
|
|
||||||
and eax, 16376
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
subpd xmm3, xmm12
|
|
||||||
; FPADD_R f1, a3
|
|
||||||
addpd xmm1, xmm11
|
|
||||||
; IMULH_R r3, r2
|
|
||||||
mov rax, r11
|
|
||||||
mul r10
|
|
||||||
mov r11, rdx
|
|
||||||
; FPSUB_R f0, a3
|
|
||||||
subpd xmm0, xmm11
|
|
||||||
; IDIV_C r5, 2887845607
|
|
||||||
mov rax, 13717520480010955377
|
|
||||||
mul r13
|
|
||||||
shr rdx, 31
|
|
||||||
add r13, rdx
|
|
||||||
; ISMULH_M r6, L1[r2]
|
|
||||||
mov ecx, r10d
|
|
||||||
and ecx, 16376
|
|
||||||
mov rax, r14
|
|
||||||
imul qword ptr [rsi+rcx]
|
|
||||||
mov r14, rdx
|
|
||||||
; FPSUB_R f3, a3
|
|
||||||
subpd xmm3, xmm11
|
|
||||||
; IMUL_M r6, L1[r7]
|
|
||||||
mov eax, r15d
|
|
||||||
and eax, 16376
|
|
||||||
imul r14, qword ptr [rsi+rax]
|
|
||||||
; FPNEG_R f0
|
|
||||||
xorps xmm0, xmm15
|
|
||||||
; FPMUL_R e2, a0
|
|
||||||
mulpd xmm6, xmm8
|
|
||||||
; IMUL_9C r6, 295130073
|
|
||||||
lea r14, [r14+r14*8+295130073]
|
|
||||||
; FPADD_R f1, a1
|
|
||||||
addpd xmm1, xmm9
|
|
||||||
; IXOR_R r0, r5
|
|
||||||
xor r8, r13
|
|
||||||
; FPADD_R f2, a1
|
|
||||||
addpd xmm2, xmm9
|
|
||||||
; FPSWAP_R e3
|
|
||||||
shufpd xmm7, xmm7, 1
|
|
||||||
; FPSQRT_R e3
|
|
||||||
sqrtpd xmm7, xmm7
|
|
||||||
; IADD_RC r3, r6, -1317630728
|
|
||||||
lea r11, [r11+r14-1317630728]
|
|
||||||
; IMUL_M r2, L1[r3]
|
|
||||||
mov eax, r11d
|
|
||||||
and eax, 16376
|
|
||||||
imul r10, qword ptr [rsi+rax]
|
|
||||||
; IADD_RC r1, r4, 894105694
|
|
||||||
lea r9, [r9+r12+894105694]
|
|
||||||
; IMUL_R r7, r0
|
|
||||||
imul r15, r8
|
|
||||||
; FPSUB_R f1, a0
|
|
||||||
subpd xmm1, xmm8
|
|
||||||
; IMUL_M r7, L1[r1]
|
|
||||||
mov eax, r9d
|
|
||||||
and eax, 16376
|
|
||||||
imul r15, qword ptr [rsi+rax]
|
|
||||||
; IXOR_R r2, r4
|
|
||||||
xor r10, r12
|
|
||||||
; ISUB_M r0, L1[r1]
|
|
||||||
mov eax, r9d
|
|
||||||
and eax, 16376
|
|
||||||
sub r8, qword ptr [rsi+rax]
|
|
||||||
; INEG_R r4
|
|
||||||
neg r12
|
|
||||||
; IMUL_9C r4, -285272388
|
|
||||||
lea r12, [r12+r12*8-285272388]
|
|
||||||
; IMUL_R r7, r4
|
|
||||||
imul r15, r12
|
|
||||||
; IMULH_M r5, L1[r7]
|
|
||||||
mov ecx, r15d
|
|
||||||
and ecx, 16376
|
|
||||||
mov rax, r13
|
|
||||||
mul qword ptr [rsi+rcx]
|
|
||||||
mov r13, rdx
|
|
||||||
; IROL_R r1, r7
|
|
||||||
mov ecx, r15d
|
|
||||||
rol r9, cl
|
|
||||||
; IXOR_R r4, -757532727
|
|
||||||
xor r12, -757532727
|
|
||||||
; IMUL_R r3, 1863959234
|
|
||||||
imul r11, 1863959234
|
|
||||||
; IROL_R r4, 59
|
|
||||||
rol r12, 59
|
|
||||||
; ISMULH_R r1, 2122681086
|
|
||||||
mov rax, 2122681086
|
|
||||||
imul r9
|
|
||||||
add r9, rdx
|
|
||||||
; ISTORE L2[r6], r7
|
|
||||||
mov eax, r14d
|
|
||||||
and eax, 262136
|
|
||||||
mov qword ptr [rsi+rax], r15
|
|
||||||
; ISTORE L1[r1], r5
|
|
||||||
mov eax, r9d
|
|
||||||
and eax, 16376
|
|
||||||
mov qword ptr [rsi+rax], r13
|
|
||||||
; FPMUL_R e0, a1
|
|
||||||
mulpd xmm4, xmm9
|
|
||||||
; COND_R r2, ns(r1, 486049737)
|
|
||||||
xor ecx, ecx
|
|
||||||
cmp r9d, 486049737
|
|
||||||
setns cl
|
|
||||||
add r10, rcx
|
|
||||||
; FPMUL_M e0, L2[r7]
|
|
||||||
mov eax, r15d
|
|
||||||
and eax, 262136
|
|
||||||
cvtdq2pd xmm12, qword ptr [rsi+rax]
|
|
||||||
mulpd xmm4, xmm12
|
|
||||||
maxpd xmm4, xmm13
|
|
||||||
; FPMUL_R e3, a2
|
|
||||||
mulpd xmm7, xmm10
|
|
||||||
; IROL_R r5, r2
|
|
||||||
mov ecx, r10d
|
|
||||||
rol r13, cl
|
|
||||||
; IADD_M r0, L1[r4]
|
|
||||||
mov eax, r12d
|
|
||||||
and eax, 16376
|
|
||||||
add r8, qword ptr [rsi+rax]
|
|
||||||
|
|
Loading…
Reference in a new issue