mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	ARM64 JIT: don't use x18 register
				
					
				
			This commit is contained in:
		
							parent
							
								
									277791085c
								
							
						
					
					
						commit
						f72101aa2c
					
				
					 2 changed files with 75 additions and 77 deletions
				
			
		|  | @ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con | |||
| 	// and w16, w10, ScratchpadL3Mask64
 | ||||
| 	emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); | ||||
| 
 | ||||
| 	// and w17, w18, ScratchpadL3Mask64
 | ||||
| 	emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); | ||||
| 	// and w17, w20, ScratchpadL3Mask64
 | ||||
| 	emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); | ||||
| 
 | ||||
| 	codePos = PrologueSize; | ||||
| 	literalPos = ImulRcpLiteralsEnd; | ||||
|  | @ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con | |||
| 	} | ||||
| 
 | ||||
| 	// Update spMix2
 | ||||
| 	// eor w18, config.readReg2, config.readReg3
 | ||||
| 	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); | ||||
| 	// eor w20, config.readReg2, config.readReg3
 | ||||
| 	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); | ||||
| 
 | ||||
| 	// Jump back to the main loop
 | ||||
| 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos; | ||||
| 	emit32(ARMV8A::B | (offset / 4), code, codePos); | ||||
| 
 | ||||
| 	// and w18, w18, CacheLineAlignMask
 | ||||
| 	// and w20, w20, CacheLineAlignMask
 | ||||
| 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64)); | ||||
| 	emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); | ||||
| 	emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); | ||||
| 
 | ||||
| 	// and w10, w10, CacheLineAlignMask
 | ||||
| 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64)); | ||||
|  | @ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration | |||
| 	// and w16, w10, ScratchpadL3Mask64
 | ||||
| 	emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); | ||||
| 
 | ||||
| 	// and w17, w18, ScratchpadL3Mask64
 | ||||
| 	emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); | ||||
| 	// and w17, w20, ScratchpadL3Mask64
 | ||||
| 	emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); | ||||
| 
 | ||||
| 	codePos = PrologueSize; | ||||
| 	literalPos = ImulRcpLiteralsEnd; | ||||
|  | @ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration | |||
| 	} | ||||
| 
 | ||||
| 	// Update spMix2
 | ||||
| 	// eor w18, config.readReg2, config.readReg3
 | ||||
| 	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); | ||||
| 	// eor w20, config.readReg2, config.readReg3
 | ||||
| 	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); | ||||
| 
 | ||||
| 	// Jump back to the main loop
 | ||||
| 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos; | ||||
|  | @ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, | |||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		constexpr uint32_t tmp_reg = 18; | ||||
| 		constexpr uint32_t tmp_reg = 20; | ||||
| 		emitMovImmediate(tmp_reg, imm, code, k); | ||||
| 
 | ||||
| 		// add dst, src, tmp_reg
 | ||||
|  | @ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co | |||
| 	uint32_t k = codePos; | ||||
| 
 | ||||
| 	uint32_t imm = instr.getImm32(); | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 19; | ||||
| 
 | ||||
| 	imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1); | ||||
| 	emitAddImmediate(tmp_reg, src, imm, code, k); | ||||
|  | @ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos) | |||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emitMemLoad<tmp_reg>(dst, src, instr, code, k); | ||||
| 
 | ||||
| 	// add dst, dst, tmp_reg
 | ||||
|  | @ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos) | |||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emitMemLoad<tmp_reg>(dst, src, instr, code, k); | ||||
| 
 | ||||
| 	// sub dst, dst, tmp_reg
 | ||||
|  | @ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	if (src == dst) | ||||
| 	{ | ||||
| 		src = 18; | ||||
| 		src = 20; | ||||
| 		emitMovImmediate(src, instr.getImm32(), code, k); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos) | |||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emitMemLoad<tmp_reg>(dst, src, instr, code, k); | ||||
| 
 | ||||
| 	// sub dst, dst, tmp_reg
 | ||||
|  | @ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos) | |||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emitMemLoad<tmp_reg>(dst, src, instr, code, k); | ||||
| 
 | ||||
| 	// umulh dst, dst, tmp_reg
 | ||||
|  | @ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos) | |||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emitMemLoad<tmp_reg>(dst, src, instr, code, k); | ||||
| 
 | ||||
| 	// smulh dst, dst, tmp_reg
 | ||||
|  | @ -692,7 +692,7 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	uint32_t k = codePos; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint64_t N = 1ULL << 63; | ||||
|  | @ -711,9 +711,9 @@ void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) | |||
| 	literalPos -= sizeof(uint64_t); | ||||
| 	*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor); | ||||
| 
 | ||||
| 	if (literal_id < 13) | ||||
| 	if (literal_id < 12) | ||||
| 	{ | ||||
| 		static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 }; | ||||
| 		static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 }; | ||||
| 
 | ||||
| 		// mul dst, dst, literal_reg
 | ||||
| 		emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k); | ||||
|  | @ -751,7 +751,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	if (src == dst) | ||||
| 	{ | ||||
| 		src = 18; | ||||
| 		src = 20; | ||||
| 		emitMovImmediate(src, instr.getImm32(), code, k); | ||||
| 	} | ||||
| 
 | ||||
|  | @ -769,7 +769,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos) | |||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emitMemLoad<tmp_reg>(dst, src, instr, code, k); | ||||
| 
 | ||||
| 	// eor dst, dst, tmp_reg
 | ||||
|  | @ -807,7 +807,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	if (src != dst) | ||||
| 	{ | ||||
| 		constexpr uint32_t tmp_reg = 18; | ||||
| 		constexpr uint32_t tmp_reg = 20; | ||||
| 
 | ||||
| 		// sub tmp_reg, xzr, src
 | ||||
| 		emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k); | ||||
|  | @ -835,7 +835,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	uint32_t k = codePos; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k); | ||||
| 	emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k); | ||||
| 	emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k); | ||||
|  | @ -984,7 +984,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 
 | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 	constexpr uint32_t fpcr_tmp_reg = 8; | ||||
| 
 | ||||
| 	// ror tmp_reg, src, imm
 | ||||
|  | @ -1008,7 +1008,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos) | |||
| 
 | ||||
| 	const uint32_t src = IntRegMap[instr.src]; | ||||
| 	const uint32_t dst = IntRegMap[instr.dst]; | ||||
| 	constexpr uint32_t tmp_reg = 18; | ||||
| 	constexpr uint32_t tmp_reg = 20; | ||||
| 
 | ||||
| 	uint32_t imm = instr.getImm32(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -74,9 +74,9 @@ | |||
| # x15 -> "r7" | ||||
| # x16 -> spAddr0 | ||||
| # x17 -> spAddr1 | ||||
| # x18 -> temporary | ||||
| # x18 -> unused (platform register, don't touch it) | ||||
| # x19 -> temporary | ||||
| # x20 -> literal for IMUL_RCP | ||||
| # x20 -> temporary | ||||
| # x21 -> literal for IMUL_RCP | ||||
| # x22 -> literal for IMUL_RCP | ||||
| # x23 -> literal for IMUL_RCP | ||||
|  | @ -111,7 +111,7 @@ DECL(randomx_program_aarch64): | |||
| 	# Save callee-saved registers | ||||
| 	sub	sp, sp, 192 | ||||
| 	stp	x16, x17, [sp] | ||||
| 	stp	x18, x19, [sp, 16] | ||||
| 	str	x19, [sp, 16] | ||||
| 	stp	x20, x21, [sp, 32] | ||||
| 	stp	x22, x23, [sp, 48] | ||||
| 	stp	x24, x25, [sp, 64] | ||||
|  | @ -166,7 +166,6 @@ DECL(randomx_program_aarch64): | |||
| 	# Read literals | ||||
| 	ldr	x0, literal_x0 | ||||
| 	ldr	x11, literal_x11 | ||||
| 	ldr	x20, literal_x20 | ||||
| 	ldr	x21, literal_x21 | ||||
| 	ldr	x22, literal_x22 | ||||
| 	ldr	x23, literal_x23 | ||||
|  | @ -198,11 +197,11 @@ DECL(randomx_program_aarch64): | |||
| DECL(randomx_program_aarch64_main_loop): | ||||
| 	# spAddr0 = spMix1 & ScratchpadL3Mask64;
 | ||||
| 	# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
 | ||||
| 	lsr	x18, x10, 32 | ||||
| 	lsr	x20, x10, 32 | ||||
| 
 | ||||
| 	# Actual mask will be inserted by JIT compiler | ||||
| 	and	w16, w10, 1 | ||||
| 	and	w17, w18, 1 | ||||
| 	and	w17, w20, 1 | ||||
| 
 | ||||
| 	# x16 = scratchpad + spAddr0 | ||||
| 	# x17 = scratchpad + spAddr1 | ||||
|  | @ -210,31 +209,31 @@ DECL(randomx_program_aarch64_main_loop): | |||
| 	add	x17, x17, x2 | ||||
| 
 | ||||
| 	# xor integer registers with scratchpad data (spAddr0) | ||||
| 	ldp	x18, x19, [x16] | ||||
| 	eor	x4, x4, x18 | ||||
| 	ldp	x20, x19, [x16] | ||||
| 	eor	x4, x4, x20 | ||||
| 	eor	x5, x5, x19 | ||||
| 	ldp	x18, x19, [x16, 16] | ||||
| 	eor	x6, x6, x18 | ||||
| 	ldp	x20, x19, [x16, 16] | ||||
| 	eor	x6, x6, x20 | ||||
| 	eor	x7, x7, x19 | ||||
| 	ldp	x18, x19, [x16, 32] | ||||
| 	eor	x12, x12, x18 | ||||
| 	ldp	x20, x19, [x16, 32] | ||||
| 	eor	x12, x12, x20 | ||||
| 	eor	x13, x13, x19 | ||||
| 	ldp	x18, x19, [x16, 48] | ||||
| 	eor	x14, x14, x18 | ||||
| 	ldp	x20, x19, [x16, 48] | ||||
| 	eor	x14, x14, x20 | ||||
| 	eor	x15, x15, x19 | ||||
| 
 | ||||
| 	# Load group F registers (spAddr1) | ||||
| 	ldpsw	x18, x19, [x17] | ||||
| 	ins	v16.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17] | ||||
| 	ins	v16.d[0], x20 | ||||
| 	ins	v16.d[1], x19 | ||||
| 	ldpsw	x18, x19, [x17, 8] | ||||
| 	ins	v17.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 8] | ||||
| 	ins	v17.d[0], x20 | ||||
| 	ins	v17.d[1], x19 | ||||
| 	ldpsw	x18, x19, [x17, 16] | ||||
| 	ins	v18.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 16] | ||||
| 	ins	v18.d[0], x20 | ||||
| 	ins	v18.d[1], x19 | ||||
| 	ldpsw	x18, x19, [x17, 24] | ||||
| 	ins	v19.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 24] | ||||
| 	ins	v19.d[0], x20 | ||||
| 	ins	v19.d[1], x19 | ||||
| 	scvtf	v16.2d, v16.2d | ||||
| 	scvtf	v17.2d, v17.2d | ||||
|  | @ -242,17 +241,17 @@ DECL(randomx_program_aarch64_main_loop): | |||
| 	scvtf	v19.2d, v19.2d | ||||
| 
 | ||||
| 	# Load group E registers (spAddr1) | ||||
| 	ldpsw	x18, x19, [x17, 32] | ||||
| 	ins	v20.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 32] | ||||
| 	ins	v20.d[0], x20 | ||||
| 	ins	v20.d[1], x19 | ||||
| 	ldpsw	x18, x19, [x17, 40] | ||||
| 	ins	v21.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 40] | ||||
| 	ins	v21.d[0], x20 | ||||
| 	ins	v21.d[1], x19 | ||||
| 	ldpsw	x18, x19, [x17, 48] | ||||
| 	ins	v22.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 48] | ||||
| 	ins	v22.d[0], x20 | ||||
| 	ins	v22.d[1], x19 | ||||
| 	ldpsw	x18, x19, [x17, 56] | ||||
| 	ins	v23.d[0], x18 | ||||
| 	ldpsw	x20, x19, [x17, 56] | ||||
| 	ins	v23.d[0], x20 | ||||
| 	ins	v23.d[1], x19 | ||||
| 	scvtf	v20.2d, v20.2d | ||||
| 	scvtf	v21.2d, v21.2d | ||||
|  | @ -276,7 +275,6 @@ DECL(randomx_program_aarch64_vm_instructions): | |||
| 
 | ||||
| literal_x0:  .fill 1,8,0 | ||||
| literal_x11: .fill 1,8,0 | ||||
| literal_x20: .fill 1,8,0 | ||||
| literal_x21: .fill 1,8,0 | ||||
| literal_x22: .fill 1,8,0 | ||||
| literal_x23: .fill 1,8,0 | ||||
|  | @ -312,17 +310,17 @@ DECL(randomx_program_aarch64_vm_instructions_end): | |||
| 	lsr	x10, x9, 32 | ||||
| 
 | ||||
| 	# mx ^= r[readReg2] ^ r[readReg3];
 | ||||
| 	eor	x9, x9, x18 | ||||
| 	eor	x9, x9, x20 | ||||
| 
 | ||||
| 	# Calculate dataset pointer for dataset prefetch | ||||
| 	mov	w18, w9 | ||||
| 	mov	w20, w9 | ||||
| DECL(randomx_program_aarch64_cacheline_align_mask1): | ||||
| 	# Actual mask will be inserted by JIT compiler | ||||
| 	and	x18, x18, 1 | ||||
| 	add	x18, x18, x1 | ||||
| 	and	x20, x20, 1 | ||||
| 	add	x20, x20, x1 | ||||
| 
 | ||||
| 	# Prefetch dataset data | ||||
| 	prfm	pldl2strm, [x18] | ||||
| 	prfm	pldl2strm, [x20] | ||||
| 
 | ||||
| 	# mx <-> ma | ||||
| 	ror	x9, x9, 32 | ||||
|  | @ -335,17 +333,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2): | |||
| DECL(randomx_program_aarch64_xor_with_dataset_line): | ||||
| rx_program_xor_with_dataset_line: | ||||
| 	# xor integer registers with dataset data | ||||
| 	ldp	x18, x19, [x10] | ||||
| 	eor	x4, x4, x18 | ||||
| 	ldp	x20, x19, [x10] | ||||
| 	eor	x4, x4, x20 | ||||
| 	eor	x5, x5, x19 | ||||
| 	ldp	x18, x19, [x10, 16] | ||||
| 	eor	x6, x6, x18 | ||||
| 	ldp	x20, x19, [x10, 16] | ||||
| 	eor	x6, x6, x20 | ||||
| 	eor	x7, x7, x19 | ||||
| 	ldp	x18, x19, [x10, 32] | ||||
| 	eor	x12, x12, x18 | ||||
| 	ldp	x20, x19, [x10, 32] | ||||
| 	eor	x12, x12, x20 | ||||
| 	eor	x13, x13, x19 | ||||
| 	ldp	x18, x19, [x10, 48] | ||||
| 	eor	x14, x14, x18 | ||||
| 	ldp	x20, x19, [x10, 48] | ||||
| 	eor	x14, x14, x20 | ||||
| 	eor	x15, x15, x19 | ||||
| 
 | ||||
| DECL(randomx_program_aarch64_update_spMix1): | ||||
|  | @ -388,7 +386,7 @@ DECL(randomx_program_aarch64_update_spMix1): | |||
| 
 | ||||
| 	# Restore callee-saved registers | ||||
| 	ldp	x16, x17, [sp] | ||||
| 	ldp	x18, x19, [sp, 16] | ||||
| 	ldr	x19, [sp, 16] | ||||
| 	ldp	x20, x21, [sp, 32] | ||||
| 	ldp	x22, x23, [sp, 48] | ||||
| 	ldp	x24, x25, [sp, 64] | ||||
|  | @ -409,7 +407,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light): | |||
| 	stp	x2, x30, [sp, 80] | ||||
| 
 | ||||
| 	# mx ^= r[readReg2] ^ r[readReg3];
 | ||||
| 	eor	x9, x9, x18 | ||||
| 	eor	x9, x9, x20 | ||||
| 
 | ||||
| 	# mx <-> ma | ||||
| 	ror	x9, x9, 32 | ||||
|  | @ -451,8 +449,8 @@ DECL(randomx_program_aarch64_light_dataset_offset): | |||
| # x3 -> end item | ||||
| 
 | ||||
| DECL(randomx_init_dataset_aarch64): | ||||
| 	# Save x30 (return address) | ||||
| 	str	x30, [sp, -16]! | ||||
| 	# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address) | ||||
| 	stp	x20, x30, [sp, -16]! | ||||
| 
 | ||||
| 	# Load pointer to cache memory | ||||
| 	ldr	x0, [x0] | ||||
|  | @ -464,8 +462,8 @@ DECL(randomx_init_dataset_aarch64_main_loop): | |||
| 	cmp	x2, x3 | ||||
| 	bne	DECL(randomx_init_dataset_aarch64_main_loop) | ||||
| 
 | ||||
| 	# Restore x30 (return address) | ||||
| 	ldr	x30, [sp], 16 | ||||
| 	# Restore x20 and x30 | ||||
| 	ldp	x20, x30, [sp], 16 | ||||
| 
 | ||||
| 	ret | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue