mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	NOP instruction
register load/store from L3
This commit is contained in:
		
							parent
							
								
									005c67f64c
								
							
						
					
					
						commit
						8f2abd6c05
					
				
					 15 changed files with 233 additions and 624 deletions
				
			
		|  | @ -491,6 +491,10 @@ namespace RandomX { | |||
| 		asmCode << "\tmovapd xmmword ptr [rsi+rax], " << regFE[instr.src] << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) { | ||||
| 		asmCode << "\tnop" << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| #define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x)) | ||||
| 
 | ||||
|  | @ -540,5 +544,7 @@ namespace RandomX { | |||
| 
 | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 		INST_HANDLE(FSTORE) | ||||
| 
 | ||||
| 		INST_HANDLE(NOP) | ||||
| 	}; | ||||
| } | ||||
|  | @ -79,5 +79,6 @@ namespace RandomX { | |||
| 		void  h_CFROUND(Instruction&, int); | ||||
| 		void  h_ISTORE(Instruction&, int); | ||||
| 		void  h_FSTORE(Instruction&, int); | ||||
| 		void  h_NOP(Instruction&, int); | ||||
| 	}; | ||||
| } | ||||
|  | @ -327,6 +327,10 @@ namespace RandomX { | |||
| 		os << ", " << reg << srcIndex << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| 	void  Instruction::h_NOP(std::ostream& os) const { | ||||
| 		os << std::endl; | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| #define INST_NAME(x) REPN(#x, WT(x)) | ||||
| #define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x)) | ||||
|  | @ -377,6 +381,8 @@ namespace RandomX { | |||
| 
 | ||||
| 		INST_NAME(ISTORE) | ||||
| 		INST_NAME(FSTORE) | ||||
| 
 | ||||
| 		INST_NAME(NOP) | ||||
| 	}; | ||||
| 
 | ||||
| 	InstructionVisualizer Instruction::engine[256] = { | ||||
|  | @ -425,6 +431,8 @@ namespace RandomX { | |||
| 
 | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 		INST_HANDLE(FSTORE) | ||||
| 
 | ||||
| 		INST_HANDLE(NOP) | ||||
| 	}; | ||||
| 
 | ||||
| } | ||||
|  | @ -86,6 +86,7 @@ namespace RandomX { | |||
| 		void  h_CFROUND(std::ostream&) const; | ||||
| 		void  h_ISTORE(std::ostream&) const; | ||||
| 		void  h_FSTORE(std::ostream&) const; | ||||
| 		void  h_NOP(std::ostream&) const; | ||||
| 	}; | ||||
| 
 | ||||
| 	static_assert(sizeof(Instruction) == 8, "Invalid alignment of struct Instruction"); | ||||
|  |  | |||
|  | @ -181,7 +181,7 @@ namespace RandomX { | |||
| 	static const uint8_t JMP = 0xe9; | ||||
| 
 | ||||
| 	size_t JitCompilerX86::getCodeSize() { | ||||
| 		return codePos - prologueSize + readDatasetSize; | ||||
| 		return codePos - prologueSize; | ||||
| 	} | ||||
| 
 | ||||
| 	JitCompilerX86::JitCompilerX86() { | ||||
|  | @ -761,6 +761,10 @@ namespace RandomX { | |||
| 		emitByte(0x06); | ||||
| 	} | ||||
| 
 | ||||
| 	void JitCompilerX86::h_NOP(Instruction& instr) { | ||||
| 		emitByte(0x90); | ||||
| 	} | ||||
| 
 | ||||
| #include "instructionWeights.hpp" | ||||
| #define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x)) | ||||
| 
 | ||||
|  | @ -800,6 +804,7 @@ namespace RandomX { | |||
| 		INST_HANDLE(CFROUND) | ||||
| 		INST_HANDLE(ISTORE) | ||||
| 		INST_HANDLE(FSTORE) | ||||
| 		INST_HANDLE(NOP) | ||||
| 	}; | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -125,6 +125,7 @@ namespace RandomX { | |||
| 		void  h_CFROUND(Instruction&); | ||||
| 		void  h_ISTORE(Instruction&); | ||||
| 		void  h_FSTORE(Instruction&); | ||||
| 		void  h_NOP(Instruction&); | ||||
| 	}; | ||||
| 
 | ||||
| } | ||||
|  | @ -1,4 +1,4 @@ | |||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	cvtdq2pd xmm0, qword ptr [rcx+0] | ||||
| 	cvtdq2pd xmm1, qword ptr [rcx+8] | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	xor r8,  qword ptr [rcx+0] | ||||
| 	xor r9,  qword ptr [rcx+8] | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	mulpd xmm0, xmm4 | ||||
| 	mulpd xmm1, xmm5 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	mov qword ptr [rcx+0], r8 | ||||
| 	mov qword ptr [rcx+8], r9 | ||||
|  |  | |||
|  | @ -72,7 +72,7 @@ namespace RandomX { | |||
| 		convertible_t hi; | ||||
| 	}; | ||||
| 
 | ||||
| 	constexpr int ProgramLength = 256; | ||||
| 	constexpr int ProgramLength = 128; | ||||
| 	constexpr uint32_t InstructionCount = 1024; | ||||
| 	constexpr uint32_t ScratchpadSize = 1024 * 1024; | ||||
| 	constexpr uint32_t ScratchpadLength = ScratchpadSize / sizeof(convertible_t); | ||||
|  |  | |||
|  | @ -119,7 +119,7 @@ signMask: | |||
| ALIGN 64 | ||||
| program_begin: | ||||
| 	xor eax, r8d                      ;# read address register 1 | ||||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	xor r8,  qword ptr [rcx+0] | ||||
| 	xor r9,  qword ptr [rcx+8] | ||||
|  | @ -130,7 +130,7 @@ program_begin: | |||
| 	xor r14, qword ptr [rcx+48] | ||||
| 	xor r15, qword ptr [rcx+56] | ||||
| 	xor eax, r9d                      ;# read address register 2 | ||||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	cvtdq2pd xmm0, qword ptr [rcx+0] | ||||
| 	cvtdq2pd xmm1, qword ptr [rcx+8] | ||||
|  | @ -166,7 +166,7 @@ program_begin: | |||
| 	xor r14, qword ptr [rcx+48] | ||||
| 	xor r15, qword ptr [rcx+56]                  | ||||
| 	mov eax, r12d                      ;# write address register 1 | ||||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	mov qword ptr [rcx+0], r8 | ||||
| 	mov qword ptr [rcx+8], r9 | ||||
|  | @ -177,7 +177,7 @@ program_begin: | |||
| 	mov qword ptr [rcx+48], r14 | ||||
| 	mov qword ptr [rcx+56], r15 | ||||
| 	xor eax, r13d                      ;# write address register 2 | ||||
| 	and eax, 262080 | ||||
| 	and eax, 1048512 | ||||
| 	lea rcx, [rsi+rax] | ||||
| 	mulpd xmm0, xmm4 | ||||
| 	mulpd xmm1, xmm5 | ||||
|  |  | |||
|  | @ -20,51 +20,51 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #pragma once | ||||
| 
 | ||||
| //Integer
 | ||||
| #define WT_IADD_R 10 | ||||
| #define WT_IADD_R 12 | ||||
| #define WT_IADD_M 3 | ||||
| #define WT_IADD_RC 10 | ||||
| #define WT_ISUB_R 10 | ||||
| #define WT_IADD_RC 12 | ||||
| #define WT_ISUB_R 12 | ||||
| #define WT_ISUB_M 3 | ||||
| #define WT_IMUL_9C 10 | ||||
| #define WT_IMUL_R 20 | ||||
| #define WT_IMUL_M 6 | ||||
| #define WT_IMULH_R 6 | ||||
| #define WT_IMULH_M 2 | ||||
| #define WT_ISMULH_R 6 | ||||
| #define WT_ISMULH_M 2 | ||||
| #define WT_IMUL_R 16 | ||||
| #define WT_IMUL_M 4 | ||||
| #define WT_IMULH_R 4 | ||||
| #define WT_IMULH_M 1 | ||||
| #define WT_ISMULH_R 4 | ||||
| #define WT_ISMULH_M 1 | ||||
| #define WT_IDIV_C 4 | ||||
| #define WT_ISDIV_C 4 | ||||
| #define WT_INEG_R 2 | ||||
| #define WT_IXOR_R 12 | ||||
| #define WT_IXOR_M 4 | ||||
| #define WT_IROR_R 10 | ||||
| #define WT_IROL_R 10 | ||||
| #define WT_IXOR_M 3 | ||||
| #define WT_IROR_R 12 | ||||
| #define WT_IROL_R 12 | ||||
| 
 | ||||
| //Common floating point
 | ||||
| #define WT_FPSWAP_R 6 | ||||
| #define WT_FPSWAP_R 8 | ||||
| 
 | ||||
| //Floating point group F
 | ||||
| #define WT_FPADD_R 18 | ||||
| #define WT_FPADD_M 3 | ||||
| #define WT_FPSUB_R 18 | ||||
| #define WT_FPSUB_M 3 | ||||
| #define WT_FPNEG_R 5 | ||||
| #define WT_FPADD_R 20 | ||||
| #define WT_FPADD_M 5 | ||||
| #define WT_FPSUB_R 20 | ||||
| #define WT_FPSUB_M 5 | ||||
| #define WT_FPNEG_R 6 | ||||
| 
 | ||||
| //Floating point group E
 | ||||
| #define WT_FPMUL_R 18 | ||||
| #define WT_FPMUL_M 3 | ||||
| #define WT_FPDIV_R 6 | ||||
| #define WT_FPMUL_R 16 | ||||
| #define WT_FPMUL_M 4 | ||||
| #define WT_FPDIV_R 7 | ||||
| #define WT_FPDIV_M 1 | ||||
| #define WT_FPSQRT_R 6 | ||||
| 
 | ||||
| //Control
 | ||||
| #define WT_COND_R 12 | ||||
| #define WT_COND_M 4 | ||||
| #define WT_COND_R 7 | ||||
| #define WT_COND_M 1 | ||||
| #define WT_CFROUND 1 | ||||
| 
 | ||||
| //Store
 | ||||
| #define WT_ISTORE 12 | ||||
| #define WT_FSTORE 6 | ||||
| #define WT_ISTORE 18 | ||||
| #define WT_FSTORE 0 | ||||
| 
 | ||||
| #define WT_NOP 0 | ||||
| 
 | ||||
|  | @ -115,6 +115,7 @@ static_assert(wtSum == 256, | |||
| #define REP33(x) REP32(x) x, | ||||
| #define REP40(x) REP32(x) REP8(x) | ||||
| #define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x) | ||||
| #define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x) | ||||
| #define REP256(x) REP128(x) REP128(x) | ||||
| #define REPNX(x,N) REP##N(x) | ||||
| #define REPN(x,N) REPNX(x,N) | ||||
|  |  | |||
|  | @ -169,12 +169,10 @@ void mine(RandomX::VirtualMachine* vm, std::atomic<int>& atomicNonce, AtomicHash | |||
| 		blake2b(hash, sizeof(hash), blockTemplate, sizeof(blockTemplate), nullptr, 0); | ||||
| 		int spIndex = ((uint8_t*)hash)[24] | ((((uint8_t*)hash)[25] & 15) << 8); | ||||
| 		vm->initializeScratchpad(scratchpad, spIndex); | ||||
| 		//vm->initializeProgram(hash);
 | ||||
| 		vm->setScratchpad(scratchpad); | ||||
| 		//dump((char*)((RandomX::CompiledVirtualMachine*)vm)->getProgram(), RandomX::CodeSize, "code-1337-jmp.txt");
 | ||||
| 		for (int chain = 0; chain < 16; ++chain) { | ||||
| 			vm->initializeProgram(hash); | ||||
| 			int segment = hash[3] & 3; | ||||
| 			vm->setScratchpad(scratchpad + segment * RandomX::ScratchpadSize / 4); | ||||
| 			vm->execute(); | ||||
| 			vm->getResult(nullptr, 0, hash); | ||||
| 		} | ||||
|  |  | |||
							
								
								
									
										760
									
								
								src/program.inc
									
										
									
									
									
								
							
							
						
						
									
										760
									
								
								src/program.inc
									
										
									
									
									
								
							|  | @ -10,54 +10,54 @@ | |||
| 	mulpd xmm6, xmm10 | ||||
| 	; IMUL_R r6, r3 | ||||
| 	imul r14, r11 | ||||
| 	; FPMUL_R e1, a0 | ||||
| 	mulpd xmm5, xmm8 | ||||
| 	; IROR_R r5, r3 | ||||
| 	; FPSUB_M f1, L1[r4] | ||||
| 	mov eax, r12d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm1, xmm12 | ||||
| 	; IROL_R r5, r3 | ||||
| 	mov ecx, r11d | ||||
| 	ror r13, cl | ||||
| 	rol r13, cl | ||||
| 	; FPMUL_R e2, a0 | ||||
| 	mulpd xmm6, xmm8 | ||||
| 	; FPNEG_R f3 | ||||
| 	xorps xmm3, xmm15 | ||||
| 	; FPSUB_R f3, a0 | ||||
| 	subpd xmm3, xmm8 | ||||
| 	; IXOR_R r0, r4 | ||||
| 	xor r8, r12 | ||||
| 	; ISMULH_R r3, r7 | ||||
| 	; ISMULH_M r3, L1[r7] | ||||
| 	mov ecx, r15d | ||||
| 	and ecx, 16376 | ||||
| 	mov rax, r11 | ||||
| 	imul r15 | ||||
| 	imul qword ptr [rsi+rcx] | ||||
| 	mov r11, rdx | ||||
| 	; FPSWAP_R f2 | ||||
| 	shufpd xmm2, xmm2, 1 | ||||
| 	; ISMULH_R r6, r0 | ||||
| 	mov rax, r14 | ||||
| 	imul r8 | ||||
| 	mov r14, rdx | ||||
| 	; IDIV_C r6, 1248528248 | ||||
| 	mov rax, 15864311168205210203 | ||||
| 	mul r14 | ||||
| 	shr rdx, 30 | ||||
| 	add r14, rdx | ||||
| 	; FPMUL_R e0, a2 | ||||
| 	mulpd xmm4, xmm10 | ||||
| 	; ISUB_R r3, r4 | ||||
| 	sub r11, r12 | ||||
| 	; IADD_RC r3, r4, -52260428 | ||||
| 	lea r11, [r11+r12-52260428] | ||||
| 	; IADD_R r7, -1138617760 | ||||
| 	add r15, -1138617760 | ||||
| 	; IROR_R r2, r6 | ||||
| 	; IROL_R r2, r6 | ||||
| 	mov ecx, r14d | ||||
| 	ror r10, cl | ||||
| 	; FPMUL_R e2, a1 | ||||
| 	mulpd xmm6, xmm9 | ||||
| 	rol r10, cl | ||||
| 	; FPNEG_R f2 | ||||
| 	xorps xmm2, xmm15 | ||||
| 	; IROR_R r7, r1 | ||||
| 	mov ecx, r9d | ||||
| 	ror r15, cl | ||||
| 	; COND_M r2, lt(L1[r7], -41618808) | ||||
| 	; COND_R r2, lt(r7, -41618808) | ||||
| 	xor ecx, ecx | ||||
| 	mov eax, r15d | ||||
| 	and eax, 16376 | ||||
| 	cmp dword ptr [rsi+rax], -41618808 | ||||
| 	cmp r15d, -41618808 | ||||
| 	setl cl | ||||
| 	add r10, rcx | ||||
| 	; FPMUL_M e3, L1[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	mulpd xmm7, xmm12 | ||||
| 	maxpd xmm7, xmm13 | ||||
| 	; FPMUL_R e3, a0 | ||||
| 	mulpd xmm7, xmm8 | ||||
| 	; CFROUND r1, 43 | ||||
| 	mov rax, r9 | ||||
| 	rol rax, 34 | ||||
|  | @ -67,14 +67,17 @@ | |||
| 	ldmxcsr dword ptr [rsp-8] | ||||
| 	; FPADD_R f2, a1 | ||||
| 	addpd xmm2, xmm9 | ||||
| 	; FPNEG_R f0 | ||||
| 	xorps xmm0, xmm15 | ||||
| 	; FSTORE L1[r6], f2 | ||||
| 	; FPSUB_M f0, L1[r7] | ||||
| 	mov eax, r15d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm0, xmm12 | ||||
| 	; ISTORE L1[r6], r2 | ||||
| 	mov eax, r14d | ||||
| 	and eax, 16368 | ||||
| 	movapd xmmword ptr [rsi+rax], xmm2 | ||||
| 	; IMUL_9C r6, -45112665 | ||||
| 	lea r14, [r14+r14*8-45112665] | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r10 | ||||
| 	; ISUB_R r6, r5 | ||||
| 	sub r14, r13 | ||||
| 	; IADD_M r0, L1[r4] | ||||
| 	mov eax, r12d | ||||
| 	and eax, 16376 | ||||
|  | @ -87,41 +90,30 @@ | |||
| 	mov eax, r14d | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r14 | ||||
| 	; COND_R r4, sg(r1, -1189096105) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r9d, -1189096105 | ||||
| 	sets cl | ||||
| 	add r12, rcx | ||||
| 	; FPSQRT_R e0 | ||||
| 	sqrtpd xmm4, xmm4 | ||||
| 	; IXOR_R r2, r5 | ||||
| 	xor r10, r13 | ||||
| 	; COND_R r1, be(r5, -965180434) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r13d, -965180434 | ||||
| 	setbe cl | ||||
| 	add r9, rcx | ||||
| 	; FPMUL_M e1, L2[r3] | ||||
| 	mov eax, r11d | ||||
| 	and eax, 262136 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	mulpd xmm5, xmm12 | ||||
| 	maxpd xmm5, xmm13 | ||||
| 	; FPSQRT_R e1 | ||||
| 	sqrtpd xmm5, xmm5 | ||||
| 	; FPMUL_R e1, a3 | ||||
| 	mulpd xmm5, xmm11 | ||||
| 	; IMULH_R r7, r6 | ||||
| 	mov rax, r15 | ||||
| 	mul r14 | ||||
| 	mov r15, rdx | ||||
| 	; ISMULH_M r0, L1[r4] | ||||
| 	mov ecx, r12d | ||||
| 	and ecx, 16376 | ||||
| 	mov rax, r8 | ||||
| 	imul qword ptr [rsi+rcx] | ||||
| 	mov r8, rdx | ||||
| 	; ISDIV_C r0, -1706892622 | ||||
| 	mov rax, -5802075764249827661 | ||||
| 	imul r8 | ||||
| 	xor eax, eax | ||||
| 	sar rdx, 29 | ||||
| 	sets al | ||||
| 	add rdx, rax | ||||
| 	add r8, rdx | ||||
| 	; IMUL_R r5, r3 | ||||
| 	imul r13, r11 | ||||
| 	; COND_R r2, of(r0, -1045938770) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r8d, -1045938770 | ||||
| 	seto cl | ||||
| 	add r10, rcx | ||||
| 	; FPSQRT_R e2 | ||||
| 	sqrtpd xmm6, xmm6 | ||||
| 	; FPADD_M f3, L1[r4] | ||||
| 	mov eax, r12d | ||||
| 	and eax, 16376 | ||||
|  | @ -131,18 +123,19 @@ | |||
| 	add r11, r10 | ||||
| 	; FPADD_R f1, a0 | ||||
| 	addpd xmm1, xmm8 | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; FPDIV_R e3, a2 | ||||
| 	divpd xmm7, xmm10 | ||||
| 	maxpd xmm7, xmm13 | ||||
| 	; FPSUB_R f0, a1 | ||||
| 	subpd xmm0, xmm9 | ||||
| 	; IMUL_M r5, L1[r6] | ||||
| 	mov eax, r14d | ||||
| 	and eax, 16376 | ||||
| 	imul r13, qword ptr [rsi+rax] | ||||
| 	; ISUB_R r1, r2 | ||||
| 	sub r9, r10 | ||||
| 	; IMUL_R r4, r6 | ||||
| 	imul r12, r14 | ||||
| 	; IADD_RC r1, r2, -1263285243 | ||||
| 	lea r9, [r9+r10-1263285243] | ||||
| 	; IMUL_9C r4, 1994773931 | ||||
| 	lea r12, [r12+r12*8+1994773931] | ||||
| 	; FPSWAP_R e3 | ||||
| 	shufpd xmm7, xmm7, 1 | ||||
| 	; IMUL_M r0, L1[r7] | ||||
|  | @ -152,69 +145,72 @@ | |||
| 	; IROR_R r1, r6 | ||||
| 	mov ecx, r14d | ||||
| 	ror r9, cl | ||||
| 	; IROR_R r2, r4 | ||||
| 	; IROL_R r2, r4 | ||||
| 	mov ecx, r12d | ||||
| 	ror r10, cl | ||||
| 	rol r10, cl | ||||
| 	; FPSUB_R f3, a1 | ||||
| 	subpd xmm3, xmm9 | ||||
| 	; FSTORE L1[r0], e1 | ||||
| 	; ISTORE L1[r0], r5 | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16368 | ||||
| 	movapd xmmword ptr [rsi+rax], xmm5 | ||||
| 	; COND_R r2, sg(r3, 1269153133) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r11d, 1269153133 | ||||
| 	sets cl | ||||
| 	add r10, rcx | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r13 | ||||
| 	; FPDIV_M e2, L2[r3] | ||||
| 	mov eax, r11d | ||||
| 	and eax, 262136 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	divpd xmm6, xmm12 | ||||
| 	maxpd xmm6, xmm13 | ||||
| 	; FPSWAP_R f2 | ||||
| 	shufpd xmm2, xmm2, 1 | ||||
| 	; IADD_R r7, r5 | ||||
| 	add r15, r13 | ||||
| 	; COND_R r0, be(r4, -1486502150) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r12d, -1486502150 | ||||
| 	setbe cl | ||||
| 	add r8, rcx | ||||
| 	; FPSUB_R f3, a1 | ||||
| 	subpd xmm3, xmm9 | ||||
| 	; FPDIV_M e0, L1[r4] | ||||
| 	mov eax, r12d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	divpd xmm4, xmm12 | ||||
| 	maxpd xmm4, xmm13 | ||||
| 	; FPADD_M f3, L1[r5] | ||||
| 	mov eax, r13d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm3, xmm12 | ||||
| 	; FPADD_R f0, a3 | ||||
| 	addpd xmm0, xmm11 | ||||
| 	; IADD_R r2, r0 | ||||
| 	add r10, r8 | ||||
| 	; FSTORE L1[r3], e2 | ||||
| 	; ISTORE L1[r3], r6 | ||||
| 	mov eax, r11d | ||||
| 	and eax, 16368 | ||||
| 	movapd xmmword ptr [rsi+rax], xmm6 | ||||
| 	; IXOR_R r1, r7 | ||||
| 	xor r9, r15 | ||||
| 	; IMUL_R r5, r7 | ||||
| 	imul r13, r15 | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r14 | ||||
| 	; IROR_R r1, r7 | ||||
| 	mov ecx, r15d | ||||
| 	ror r9, cl | ||||
| 	; IMUL_9C r5, 301671287 | ||||
| 	lea r13, [r13+r13*8+301671287] | ||||
| 	; IXOR_R r7, 266992378 | ||||
| 	xor r15, 266992378 | ||||
| 	; COND_R r7, no(r4, 1983804692) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r12d, 1983804692 | ||||
| 	setno cl | ||||
| 	add r15, rcx | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; IMUL_M r2, L2[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 262136 | ||||
| 	imul r10, qword ptr [rsi+rax] | ||||
| 	; FPDIV_R e3, a2 | ||||
| 	divpd xmm7, xmm10 | ||||
| 	maxpd xmm7, xmm13 | ||||
| 	; IMUL_M r0, L2[r6] | ||||
| 	mov eax, r14d | ||||
| 	and eax, 262136 | ||||
| 	imul r8, qword ptr [rsi+rax] | ||||
| 	; FPMUL_R e3, a2 | ||||
| 	mulpd xmm7, xmm10 | ||||
| 	; IMUL_R r0, r6 | ||||
| 	imul r8, r14 | ||||
| 	; ISTORE L1[r0], r7 | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r15 | ||||
| 	; FPMUL_R e0, a1 | ||||
| 	mulpd xmm4, xmm9 | ||||
| 	; FPSUB_R f3, a1 | ||||
| 	subpd xmm3, xmm9 | ||||
| 	; FPNEG_R f0 | ||||
| 	xorps xmm0, xmm15 | ||||
| 	; FPADD_M f3, L1[r5] | ||||
| 	mov eax, r13d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm3, xmm12 | ||||
| 	; IROR_R r5, r4 | ||||
| 	mov ecx, r12d | ||||
| 	ror r13, cl | ||||
|  | @ -222,17 +218,20 @@ | |||
| 	mov eax, r15d | ||||
| 	and eax, 262136 | ||||
| 	mov qword ptr [rsi+rax], r10 | ||||
| 	; FPSWAP_R e2 | ||||
| 	shufpd xmm6, xmm6, 1 | ||||
| 	; FPADD_R f2, a3 | ||||
| 	addpd xmm2, xmm11 | ||||
| 	; FPADD_M f3, L1[r2] | ||||
| 	mov eax, r10d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm3, xmm12 | ||||
| 	; IDIV_C r5, 2218798981 | ||||
| 	mov rax, 17853839665672790751 | ||||
| 	mul r13 | ||||
| 	shr rdx, 31 | ||||
| 	; ISDIV_C r5, -2076168315 | ||||
| 	mov rax, -4770095103914078469 | ||||
| 	imul r13 | ||||
| 	xor eax, eax | ||||
| 	sar rdx, 29 | ||||
| 	sets al | ||||
| 	add rdx, rax | ||||
| 	add r13, rdx | ||||
| 	; IADD_RC r0, r4, -1321374359 | ||||
| 	lea r8, [r8+r12-1321374359] | ||||
|  | @ -250,28 +249,26 @@ | |||
| 	rol r15, cl | ||||
| 	; ISUB_R r2, r4 | ||||
| 	sub r10, r12 | ||||
| 	; IMULH_M r0, L1[12400] | ||||
| 	mov rax, r8 | ||||
| 	mul qword ptr [rsi+12400] | ||||
| 	mov r8, rdx | ||||
| 	; ISMULH_R r0, -1500893068 | ||||
| 	mov rax, -1500893068 | ||||
| 	imul r8 | ||||
| 	add r8, rdx | ||||
| 	; IADD_R r2, r3 | ||||
| 	add r10, r11 | ||||
| 	; COND_R r6, lt(r1, -1124202227) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r9d, -1124202227 | ||||
| 	setl cl | ||||
| 	add r14, rcx | ||||
| 	; IROR_R r7, r4 | ||||
| 	; FPSQRT_R e2 | ||||
| 	sqrtpd xmm6, xmm6 | ||||
| 	; IROL_R r7, r4 | ||||
| 	mov ecx, r12d | ||||
| 	ror r15, cl | ||||
| 	rol r15, cl | ||||
| 	; IMUL_R r4, r2 | ||||
| 	imul r12, r10 | ||||
| 	; ISUB_R r3, r7 | ||||
| 	sub r11, r15 | ||||
| 	; IADD_R r2, r7 | ||||
| 	add r10, r15 | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; FPDIV_R e3, a0 | ||||
| 	divpd xmm7, xmm8 | ||||
| 	maxpd xmm7, xmm13 | ||||
| 	; ISUB_R r6, 540663146 | ||||
| 	sub r14, 540663146 | ||||
| 	; IROL_R r5, 58 | ||||
|  | @ -280,67 +277,65 @@ | |||
| 	addpd xmm2, xmm9 | ||||
| 	; FPADD_R f2, a2 | ||||
| 	addpd xmm2, xmm10 | ||||
| 	; FPSQRT_R e1 | ||||
| 	sqrtpd xmm5, xmm5 | ||||
| 	; FPDIV_R e1, a2 | ||||
| 	divpd xmm5, xmm10 | ||||
| 	maxpd xmm5, xmm13 | ||||
| 	; FPADD_R f1, a2 | ||||
| 	addpd xmm1, xmm10 | ||||
| 	; IADD_R r5, r3 | ||||
| 	add r13, r11 | ||||
| 	; IADD_M r7, L1[880] | ||||
| 	add r15, qword ptr [rsi+880] | ||||
| 	; IADD_R r7, -1780268176 | ||||
| 	add r15, -1780268176 | ||||
| 	; ISUB_R r7, r0 | ||||
| 	sub r15, r8 | ||||
| 	; ISTORE L2[r0], r7 | ||||
| 	mov eax, r8d | ||||
| 	and eax, 262136 | ||||
| 	mov qword ptr [rsi+rax], r15 | ||||
| 	; IDIV_C r2, 1014940364 | ||||
| 	mov rax, r10 | ||||
| 	shr rax, 2 | ||||
| 	mov rcx, 1219717022984988185 | ||||
| 	mul rcx | ||||
| 	shr rdx, 24 | ||||
| 	add r10, rdx | ||||
| 	; FPMUL_R e0, a2 | ||||
| 	mulpd xmm4, xmm10 | ||||
| 	; IDIV_C r2, 3059159304 | ||||
| 	mov rax, 12949335853590502915 | ||||
| 	mul r10 | ||||
| 	shr rdx, 31 | ||||
| 	add r10, rdx | ||||
| 	; INEG_R r2 | ||||
| 	neg r10 | ||||
| 	; FPNEG_R f0 | ||||
| 	xorps xmm0, xmm15 | ||||
| 	; INEG_R r2 | ||||
| 	neg r10 | ||||
| 	; IADD_R r0, r3 | ||||
| 	add r8, r11 | ||||
| 	; IMUL_9C r7, -2124093035 | ||||
| 	lea r15, [r15+r15*8-2124093035] | ||||
| 	; FPSUB_R f2, a0 | ||||
| 	subpd xmm2, xmm8 | ||||
| 	; FPDIV_R e0, a2 | ||||
| 	divpd xmm4, xmm10 | ||||
| 	; FPADD_M f2, L1[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm2, xmm12 | ||||
| 	; FPMUL_M e0, L1[r6] | ||||
| 	mov eax, r14d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	mulpd xmm4, xmm12 | ||||
| 	maxpd xmm4, xmm13 | ||||
| 	; FPSUB_R f2, a3 | ||||
| 	subpd xmm2, xmm11 | ||||
| 	; IMUL_R r1, r2 | ||||
| 	imul r9, r10 | ||||
| 	; ISMULH_R r7, r5 | ||||
| 	mov rax, r15 | ||||
| 	imul r13 | ||||
| 	mov r15, rdx | ||||
| 	; IDIV_C r7, 3214009572 | ||||
| 	mov rax, 12325439725582798855 | ||||
| 	mul r15 | ||||
| 	shr rdx, 31 | ||||
| 	add r15, rdx | ||||
| 	; IMULH_R r3, r2 | ||||
| 	mov rax, r11 | ||||
| 	mul r10 | ||||
| 	mov r11, rdx | ||||
| 	; IXOR_M r1, L2[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 262136 | ||||
| 	xor r9, qword ptr [rsi+rax] | ||||
| 	; IROR_R r1, r0 | ||||
| 	mov ecx, r8d | ||||
| 	ror r9, cl | ||||
| 	; FPMUL_R e0, a1 | ||||
| 	mulpd xmm4, xmm9 | ||||
| 	; ISUB_R r4, 1456841848 | ||||
| 	sub r12, 1456841848 | ||||
| 	; IXOR_M r3, L2[r2] | ||||
| 	mov eax, r10d | ||||
| 	and eax, 262136 | ||||
| 	xor r11, qword ptr [rsi+rax] | ||||
| 	; IADD_RC r4, r4, 1456841848 | ||||
| 	lea r12, [r12+r12+1456841848] | ||||
| 	; IROR_R r3, r2 | ||||
| 	mov ecx, r10d | ||||
| 	ror r11, cl | ||||
| 	; COND_M r0, of(L1[r4], 1678513610) | ||||
| 	xor ecx, ecx | ||||
| 	mov eax, r12d | ||||
|  | @ -348,446 +343,39 @@ | |||
| 	cmp dword ptr [rsi+rax], 1678513610 | ||||
| 	seto cl | ||||
| 	add r8, rcx | ||||
| 	; IDIV_C r4, 2674394209 | ||||
| 	mov rax, 925772300223658071 | ||||
| 	mul r12 | ||||
| 	shr rdx, 27 | ||||
| 	add r12, rdx | ||||
| 	; INEG_R r4 | ||||
| 	neg r12 | ||||
| 	; IMUL_R r4, r1 | ||||
| 	imul r12, r9 | ||||
| 	; FPADD_R f1, a2 | ||||
| 	addpd xmm1, xmm10 | ||||
| 	; FPSUB_R f2, a0 | ||||
| 	subpd xmm2, xmm8 | ||||
| 	; FPMUL_M e1, L2[r6] | ||||
| 	mov eax, r14d | ||||
| 	and eax, 262136 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	mulpd xmm5, xmm12 | ||||
| 	maxpd xmm5, xmm13 | ||||
| 	; FPSUB_M f0, L2[r3] | ||||
| 	mov eax, r11d | ||||
| 	and eax, 262136 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm0, xmm12 | ||||
| 	; FPMUL_R e1, a2 | ||||
| 	mulpd xmm5, xmm10 | ||||
| 	; FPSUB_R f0, a3 | ||||
| 	subpd xmm0, xmm11 | ||||
| 	; IROR_R r0, r7 | ||||
| 	mov ecx, r15d | ||||
| 	ror r8, cl | ||||
| 	; FSTORE L2[r1], e0 | ||||
| 	; ISTORE L2[r1], r4 | ||||
| 	mov eax, r9d | ||||
| 	and eax, 262128 | ||||
| 	movapd xmmword ptr [rsi+rax], xmm4 | ||||
| 	; IROR_R r7, r6 | ||||
| 	and eax, 262136 | ||||
| 	mov qword ptr [rsi+rax], r12 | ||||
| 	; IROL_R r7, r6 | ||||
| 	mov ecx, r14d | ||||
| 	ror r15, cl | ||||
| 	rol r15, cl | ||||
| 	; IMUL_9C r2, 266593902 | ||||
| 	lea r10, [r10+r10*8+266593902] | ||||
| 	; IMUL_R r4, r6 | ||||
| 	imul r12, r14 | ||||
| 	; FPSUB_R f2, a2 | ||||
| 	subpd xmm2, xmm10 | ||||
| 	; FPMUL_R e3, a0 | ||||
| 	mulpd xmm7, xmm8 | ||||
| 	; IXOR_M r7, L1[r2] | ||||
| 	mov eax, r10d | ||||
| 	and eax, 16376 | ||||
| 	xor r15, qword ptr [rsi+rax] | ||||
| 	; FPNEG_R f3 | ||||
| 	xorps xmm3, xmm15 | ||||
| 	; IROR_R r7, r2 | ||||
| 	mov ecx, r10d | ||||
| 	ror r15, cl | ||||
| 	; IROR_R r0, r5 | ||||
| 	mov ecx, r13d | ||||
| 	ror r8, cl | ||||
| 	; FPADD_R f1, a2 | ||||
| 	addpd xmm1, xmm10 | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; FPADD_R f3, a1 | ||||
| 	addpd xmm3, xmm9 | ||||
| 	; FPADD_R f1, a0 | ||||
| 	addpd xmm1, xmm8 | ||||
| 	; COND_M r2, ge(L2[r2], -226330940) | ||||
| 	xor ecx, ecx | ||||
| 	mov eax, r10d | ||||
| 	and eax, 262136 | ||||
| 	cmp dword ptr [rsi+rax], -226330940 | ||||
| 	setge cl | ||||
| 	add r10, rcx | ||||
| 	; FPDIV_R e2, a3 | ||||
| 	divpd xmm6, xmm11 | ||||
| 	maxpd xmm6, xmm13 | ||||
| 	; FPMUL_R e2, a1 | ||||
| 	mulpd xmm6, xmm9 | ||||
| 	; FPSUB_R f1, a0 | ||||
| 	subpd xmm1, xmm8 | ||||
| 	; IMUL_R r7, r5 | ||||
| 	imul r15, r13 | ||||
| 	; IMUL_R r0, r1 | ||||
| 	imul r8, r9 | ||||
| 	; FPSUB_R f3, a1 | ||||
| 	subpd xmm3, xmm9 | ||||
| 	; IROL_R r3, r5 | ||||
| 	mov ecx, r13d | ||||
| 	rol r11, cl | ||||
| 	; IADD_RC r5, r2, 795784298 | ||||
| 	lea r13, [r13+r10+795784298] | ||||
| 	; ISUB_R r0, r4 | ||||
| 	sub r8, r12 | ||||
| 	; IMUL_R r5, r4 | ||||
| 	imul r13, r12 | ||||
| 	; FPSUB_R f0, a2 | ||||
| 	subpd xmm0, xmm10 | ||||
| 	; FPMUL_R e3, a1 | ||||
| 	mulpd xmm7, xmm9 | ||||
| 	; ISDIV_C r3, 1662492575 | ||||
| 	mov rax, 2978515652703905219 | ||||
| 	imul r11 | ||||
| 	xor eax, eax | ||||
| 	sar rdx, 28 | ||||
| 	sets al | ||||
| 	add rdx, rax | ||||
| 	add r11, rdx | ||||
| 	; ISMULH_R r5, r0 | ||||
| 	mov rax, r13 | ||||
| 	imul r8 | ||||
| 	mov r13, rdx | ||||
| 	; ISDIV_C r4, 1963597892 | ||||
| 	mov rax, -8359627607928540073 | ||||
| 	imul r12 | ||||
| 	xor eax, eax | ||||
| 	add rdx, r12 | ||||
| 	sar rdx, 30 | ||||
| 	sets al | ||||
| 	add rdx, rax | ||||
| 	add r12, rdx | ||||
| 	; IMUL_R r7, r0 | ||||
| 	imul r15, r8 | ||||
| 	; IMULH_M r0, L1[r3] | ||||
| 	mov ecx, r11d | ||||
| 	and ecx, 16376 | ||||
| 	mov rax, r8 | ||||
| 	mul qword ptr [rsi+rcx] | ||||
| 	mov r8, rdx | ||||
| 	; IXOR_R r3, r7 | ||||
| 	xor r11, r15 | ||||
| 	; IDIV_C r4, 1146125335 | ||||
| 	mov rax, 8640870253760721727 | ||||
| 	mul r12 | ||||
| 	shr rdx, 29 | ||||
| 	add r12, rdx | ||||
| 	; FPSWAP_R f3 | ||||
| 	shufpd xmm3, xmm3, 1 | ||||
| 	; IXOR_M r2, L1[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	xor r10, qword ptr [rsi+rax] | ||||
| 	; IROR_R r0, r1 | ||||
| 	mov ecx, r9d | ||||
| 	ror r8, cl | ||||
| 	; IXOR_R r7, r4 | ||||
| 	xor r15, r12 | ||||
| 	; ISMULH_R r6, r2 | ||||
| 	mov rax, r14 | ||||
| 	imul r10 | ||||
| 	mov r14, rdx | ||||
| 	; FPMUL_R e3, a2 | ||||
| 	mulpd xmm7, xmm10 | ||||
| 	; IADD_RC r4, r2, 1704868083 | ||||
| 	lea r12, [r12+r10+1704868083] | ||||
| 	; FPSUB_R f2, a0 | ||||
| 	subpd xmm2, xmm8 | ||||
| 	; ISTORE L1[r0], r0 | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r8 | ||||
| 	; FPSUB_R f0, a3 | ||||
| 	subpd xmm0, xmm11 | ||||
| 	; FPDIV_R e0, a3 | ||||
| 	divpd xmm4, xmm11 | ||||
| 	maxpd xmm4, xmm13 | ||||
| 	; FPMUL_R e3, a2 | ||||
| 	mulpd xmm7, xmm10 | ||||
| 	; ISUB_R r7, 1302457878 | ||||
| 	sub r15, 1302457878 | ||||
| 	; IMUL_9C r1, 1330165941 | ||||
| 	lea r9, [r9+r9*8+1330165941] | ||||
| 	; FPMUL_R e1, a3 | ||||
| 	mulpd xmm5, xmm11 | ||||
| 	; IROL_R r0, r4 | ||||
| 	mov ecx, r12d | ||||
| 	rol r8, cl | ||||
| 	; FPSUB_M f1, L1[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm1, xmm12 | ||||
| 	; IROL_R r5, r6 | ||||
| 	mov ecx, r14d | ||||
| 	rol r13, cl | ||||
| 	; COND_M r0, ab(L1[r1], -310933871) | ||||
| 	xor ecx, ecx | ||||
| 	mov eax, r9d | ||||
| 	and eax, 16376 | ||||
| 	cmp dword ptr [rsi+rax], -310933871 | ||||
| 	seta cl | ||||
| 	add r8, rcx | ||||
| 	; CFROUND r7, 39 | ||||
| 	mov rax, r15 | ||||
| 	rol rax, 38 | ||||
| 	and eax, 24576 | ||||
| 	or eax, 40896 | ||||
| 	mov dword ptr [rsp-8], eax | ||||
| 	ldmxcsr dword ptr [rsp-8] | ||||
| 	; FPDIV_R e0, a1 | ||||
| 	divpd xmm4, xmm9 | ||||
| 	maxpd xmm4, xmm13 | ||||
| 	; IMUL_M r1, L1[r3] | ||||
| 	mov eax, r11d | ||||
| 	and eax, 16376 | ||||
| 	imul r9, qword ptr [rsi+rax] | ||||
| 	; IMUL_9C r3, 1573236728 | ||||
| 	lea r11, [r11+r11*8+1573236728] | ||||
| 	; FPNEG_R f3 | ||||
| 	xorps xmm3, xmm15 | ||||
| 	; COND_R r1, lt(r4, -1805702334) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r12d, -1805702334 | ||||
| 	setl cl | ||||
| 	add r9, rcx | ||||
| 	; FPSWAP_R f1 | ||||
| 	shufpd xmm1, xmm1, 1 | ||||
| 	; IADD_R r7, -1421188024 | ||||
| 	add r15, -1421188024 | ||||
| 	; FPMUL_R e3, a2 | ||||
| 	mulpd xmm7, xmm10 | ||||
| 	; FPSUB_M f2, L2[r7] | ||||
| 	mov eax, r15d | ||||
| 	and eax, 262136 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm2, xmm12 | ||||
| 	; FPSUB_R f3, a1 | ||||
| 	subpd xmm3, xmm9 | ||||
| 	; FPSQRT_R e1 | ||||
| 	sqrtpd xmm5, xmm5 | ||||
| 	; ISUB_R r2, r4 | ||||
| 	sub r10, r12 | ||||
| 	; ISMULH_R r4, r5 | ||||
| 	mov rax, r12 | ||||
| 	imul r13 | ||||
| 	mov r12, rdx | ||||
| 	; COND_R r1, of(r7, 1294727006) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r15d, 1294727006 | ||||
| 	seto cl | ||||
| 	add r9, rcx | ||||
| 	; IADD_M r5, L2[r2] | ||||
| 	mov eax, r10d | ||||
| 	and eax, 262136 | ||||
| 	add r13, qword ptr [rsi+rax] | ||||
| 	; IMUL_9C r4, 401020510 | ||||
| 	lea r12, [r12+r12*8+401020510] | ||||
| 	; IROL_R r3, r0 | ||||
| 	mov ecx, r8d | ||||
| 	rol r11, cl | ||||
| 	; ISTORE L1[r7], r0 | ||||
| 	mov eax, r15d | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r8 | ||||
| 	; FPSUB_R f2, a1 | ||||
| 	subpd xmm2, xmm9 | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; IMUL_R r3, 720965215 | ||||
| 	imul r11, 720965215 | ||||
| 	; IMUL_R r6, r2 | ||||
| 	imul r14, r10 | ||||
| 	; ISTORE L1[r7], r3 | ||||
| 	mov eax, r15d | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r11 | ||||
| 	; IROR_R r2, r6 | ||||
| 	mov ecx, r14d | ||||
| 	ror r10, cl | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; IMUL_9C r4, 788211341 | ||||
| 	lea r12, [r12+r12*8+788211341] | ||||
| 	; IMUL_9C r3, -67993446 | ||||
| 	lea r11, [r11+r11*8-67993446] | ||||
| 	; FPSWAP_R e3 | ||||
| 	shufpd xmm7, xmm7, 1 | ||||
| 	; IMUL_M r2, L1[r6] | ||||
| 	mov eax, r14d | ||||
| 	and eax, 16376 | ||||
| 	imul r10, qword ptr [rsi+rax] | ||||
| 	; COND_M r2, ge(L1[r2], -1892157506) | ||||
| 	xor ecx, ecx | ||||
| 	mov eax, r10d | ||||
| 	and eax, 16376 | ||||
| 	cmp dword ptr [rsi+rax], -1892157506 | ||||
| 	setge cl | ||||
| 	add r10, rcx | ||||
| 	; FPADD_M f1, L1[r3] | ||||
| 	mov eax, r11d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	addpd xmm1, xmm12 | ||||
| 	; IADD_M r7, L1[r0] | ||||
| 	mov eax, r8d | ||||
| 	and eax, 16376 | ||||
| 	add r15, qword ptr [rsi+rax] | ||||
| 	; ISDIV_C r1, 624867857 | ||||
| 	mov rax, 7924491717200811467 | ||||
| 	imul r9 | ||||
| 	xor eax, eax | ||||
| 	sar rdx, 28 | ||||
| 	sets al | ||||
| 	add rdx, rax | ||||
| 	add r9, rdx | ||||
| 	; FPADD_R f0, a1 | ||||
| 	addpd xmm0, xmm9 | ||||
| 	; ISUB_R r5, r7 | ||||
| 	sub r13, r15 | ||||
| 	; FPNEG_R f0 | ||||
| 	xorps xmm0, xmm15 | ||||
| 	; IMUL_R r6, r2 | ||||
| 	imul r14, r10 | ||||
| 	; FPMUL_M e3, L1[r1] | ||||
| 	mov eax, r9d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	mulpd xmm7, xmm12 | ||||
| 	maxpd xmm7, xmm13 | ||||
| 	; IADD_R r0, r4 | ||||
| 	add r8, r12 | ||||
| 	; FPSUB_M f3, L1[r1] | ||||
| 	mov eax, r9d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm3, xmm12 | ||||
| 	; FPMUL_R e2, a0 | ||||
| 	mulpd xmm6, xmm8 | ||||
| 	; INEG_R r2 | ||||
| 	neg r10 | ||||
| 	; FPMUL_R e2, a2 | ||||
| 	mulpd xmm6, xmm10 | ||||
| 	; FPSUB_M f3, L1[r6] | ||||
| 	mov eax, r14d | ||||
| 	and eax, 16376 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	subpd xmm3, xmm12 | ||||
| 	; FPADD_R f1, a3 | ||||
| 	addpd xmm1, xmm11 | ||||
| 	; IMULH_R r3, r2 | ||||
| 	mov rax, r11 | ||||
| 	mul r10 | ||||
| 	mov r11, rdx | ||||
| 	; FPSUB_R f0, a3 | ||||
| 	subpd xmm0, xmm11 | ||||
| 	; IDIV_C r5, 2887845607 | ||||
| 	mov rax, 13717520480010955377 | ||||
| 	mul r13 | ||||
| 	shr rdx, 31 | ||||
| 	add r13, rdx | ||||
| 	; ISMULH_M r6, L1[r2] | ||||
| 	mov ecx, r10d | ||||
| 	and ecx, 16376 | ||||
| 	mov rax, r14 | ||||
| 	imul qword ptr [rsi+rcx] | ||||
| 	mov r14, rdx | ||||
| 	; FPSUB_R f3, a3 | ||||
| 	subpd xmm3, xmm11 | ||||
| 	; IMUL_M r6, L1[r7] | ||||
| 	mov eax, r15d | ||||
| 	and eax, 16376 | ||||
| 	imul r14, qword ptr [rsi+rax] | ||||
| 	; FPNEG_R f0 | ||||
| 	xorps xmm0, xmm15 | ||||
| 	; FPMUL_R e2, a0 | ||||
| 	mulpd xmm6, xmm8 | ||||
| 	; IMUL_9C r6, 295130073 | ||||
| 	lea r14, [r14+r14*8+295130073] | ||||
| 	; FPADD_R f1, a1 | ||||
| 	addpd xmm1, xmm9 | ||||
| 	; IXOR_R r0, r5 | ||||
| 	xor r8, r13 | ||||
| 	; FPADD_R f2, a1 | ||||
| 	addpd xmm2, xmm9 | ||||
| 	; FPSWAP_R e3 | ||||
| 	shufpd xmm7, xmm7, 1 | ||||
| 	; FPSQRT_R e3 | ||||
| 	sqrtpd xmm7, xmm7 | ||||
| 	; IADD_RC r3, r6, -1317630728 | ||||
| 	lea r11, [r11+r14-1317630728] | ||||
| 	; IMUL_M r2, L1[r3] | ||||
| 	mov eax, r11d | ||||
| 	and eax, 16376 | ||||
| 	imul r10, qword ptr [rsi+rax] | ||||
| 	; IADD_RC r1, r4, 894105694 | ||||
| 	lea r9, [r9+r12+894105694] | ||||
| 	; IMUL_R r7, r0 | ||||
| 	imul r15, r8 | ||||
| 	; FPSUB_R f1, a0 | ||||
| 	subpd xmm1, xmm8 | ||||
| 	; IMUL_M r7, L1[r1] | ||||
| 	mov eax, r9d | ||||
| 	and eax, 16376 | ||||
| 	imul r15, qword ptr [rsi+rax] | ||||
| 	; IXOR_R r2, r4 | ||||
| 	xor r10, r12 | ||||
| 	; ISUB_M r0, L1[r1] | ||||
| 	mov eax, r9d | ||||
| 	and eax, 16376 | ||||
| 	sub r8, qword ptr [rsi+rax] | ||||
| 	; INEG_R r4 | ||||
| 	neg r12 | ||||
| 	; IMUL_9C r4, -285272388 | ||||
| 	lea r12, [r12+r12*8-285272388] | ||||
| 	; IMUL_R r7, r4 | ||||
| 	imul r15, r12 | ||||
| 	; IMULH_M r5, L1[r7] | ||||
| 	mov ecx, r15d | ||||
| 	and ecx, 16376 | ||||
| 	mov rax, r13 | ||||
| 	mul qword ptr [rsi+rcx] | ||||
| 	mov r13, rdx | ||||
| 	; IROL_R r1, r7 | ||||
| 	mov ecx, r15d | ||||
| 	rol r9, cl | ||||
| 	; IXOR_R r4, -757532727 | ||||
| 	xor r12, -757532727 | ||||
| 	; IMUL_R r3, 1863959234 | ||||
| 	imul r11, 1863959234 | ||||
| 	; IROL_R r4, 59 | ||||
| 	rol r12, 59 | ||||
| 	; ISMULH_R r1, 2122681086 | ||||
| 	mov rax, 2122681086 | ||||
| 	imul r9 | ||||
| 	add r9, rdx | ||||
| 	; ISTORE L2[r6], r7 | ||||
| 	mov eax, r14d | ||||
| 	and eax, 262136 | ||||
| 	mov qword ptr [rsi+rax], r15 | ||||
| 	; ISTORE L1[r1], r5 | ||||
| 	mov eax, r9d | ||||
| 	and eax, 16376 | ||||
| 	mov qword ptr [rsi+rax], r13 | ||||
| 	; FPMUL_R e0, a1 | ||||
| 	mulpd xmm4, xmm9 | ||||
| 	; COND_R r2, ns(r1, 486049737) | ||||
| 	xor ecx, ecx | ||||
| 	cmp r9d, 486049737 | ||||
| 	setns cl | ||||
| 	add r10, rcx | ||||
| 	; FPMUL_M e0, L2[r7] | ||||
| 	mov eax, r15d | ||||
| 	and eax, 262136 | ||||
| 	cvtdq2pd xmm12, qword ptr [rsi+rax] | ||||
| 	mulpd xmm4, xmm12 | ||||
| 	maxpd xmm4, xmm13 | ||||
| 	; FPMUL_R e3, a2 | ||||
| 	mulpd xmm7, xmm10 | ||||
| 	; IROL_R r5, r2 | ||||
| 	mov ecx, r10d | ||||
| 	rol r13, cl | ||||
| 	; IADD_M r0, L1[r4] | ||||
| 	mov eax, r12d | ||||
| 	and eax, 16376 | ||||
| 	add r8, qword ptr [rsi+rax] | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue