2018-12-31 18:06:45 +00:00
|
|
|
;# Copyright (c) 2018 tevador
|
|
|
|
;#
|
|
|
|
;# This file is part of RandomX.
|
|
|
|
;#
|
|
|
|
;# RandomX is free software: you can redistribute it and/or modify
|
|
|
|
;# it under the terms of the GNU General Public License as published by
|
|
|
|
;# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
;# (at your option) any later version.
|
|
|
|
;#
|
|
|
|
;# RandomX is distributed in the hope that it will be useful,
|
|
|
|
;# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
;# GNU General Public License for more details.
|
|
|
|
;#
|
|
|
|
;# You should have received a copy of the GNU General Public License
|
|
|
|
;# along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
2018-12-13 22:11:55 +00:00
|
|
|
|
2019-01-18 16:57:47 +00:00
|
|
|
IFDEF RAX
|
|
|
|
|
2019-01-04 18:44:15 +00:00
|
|
|
_RANDOMX_EXECUTE_PROGRAM SEGMENT PAGE READ EXECUTE
|
2018-12-13 22:11:55 +00:00
|
|
|
|
2019-01-04 18:44:15 +00:00
|
|
|
PUBLIC executeProgram
|
2018-12-13 22:11:55 +00:00
|
|
|
|
|
|
|
executeProgram PROC
|
|
|
|
; REGISTER ALLOCATION:
|
|
|
|
; rax -> temporary
|
2019-01-08 13:50:31 +00:00
|
|
|
; rbx -> "ic"
|
2018-12-13 22:11:55 +00:00
|
|
|
; rcx -> temporary
|
2018-12-16 12:43:18 +00:00
|
|
|
; rdx -> temporary
|
2019-01-24 18:29:59 +00:00
|
|
|
; rsi -> scratchpad pointer
|
|
|
|
; rdi -> dataset pointer
|
2019-01-08 13:50:31 +00:00
|
|
|
; rbp -> "ma", "mx"
|
2019-01-24 18:29:59 +00:00
|
|
|
; rsp -> stack pointer
|
2018-12-16 12:43:18 +00:00
|
|
|
; r8 -> "r0"
|
2018-12-13 22:11:55 +00:00
|
|
|
; r9 -> "r1"
|
|
|
|
; r10 -> "r2"
|
|
|
|
; r11 -> "r3"
|
|
|
|
; r12 -> "r4"
|
|
|
|
; r13 -> "r5"
|
|
|
|
; r14 -> "r6"
|
|
|
|
; r15 -> "r7"
|
2019-01-24 18:29:59 +00:00
|
|
|
; xmm0 -> "f0"
|
|
|
|
; xmm1 -> "f1"
|
2018-12-13 22:11:55 +00:00
|
|
|
; xmm2 -> "f2"
|
|
|
|
; xmm3 -> "f3"
|
2019-01-24 18:29:59 +00:00
|
|
|
; xmm4 -> "e0"
|
|
|
|
; xmm5 -> "e1"
|
|
|
|
; xmm6 -> "e2"
|
|
|
|
; xmm7 -> "e3"
|
|
|
|
; xmm8 -> "a0"
|
|
|
|
; xmm9 -> "a1"
|
|
|
|
; xmm10 -> "a2"
|
|
|
|
; xmm11 -> "a3"
|
|
|
|
; xmm12 -> temporary
|
2019-02-24 16:24:06 +00:00
|
|
|
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
|
|
|
|
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
|
|
|
|
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
2018-12-13 22:11:55 +00:00
|
|
|
|
|
|
|
; store callee-saved registers
|
|
|
|
push rbx
|
|
|
|
push rbp
|
|
|
|
push rdi
|
|
|
|
push rsi
|
|
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push r14
|
|
|
|
push r15
|
2018-12-31 18:06:45 +00:00
|
|
|
sub rsp, 80
|
|
|
|
movdqu xmmword ptr [rsp+64], xmm6
|
|
|
|
movdqu xmmword ptr [rsp+48], xmm7
|
|
|
|
movdqu xmmword ptr [rsp+32], xmm8
|
|
|
|
movdqu xmmword ptr [rsp+16], xmm9
|
|
|
|
movdqu xmmword ptr [rsp+0], xmm10
|
2019-01-24 18:29:59 +00:00
|
|
|
sub rsp, 80
|
|
|
|
movdqu xmmword ptr [rsp+64], xmm11
|
|
|
|
movdqu xmmword ptr [rsp+48], xmm12
|
|
|
|
movdqu xmmword ptr [rsp+32], xmm13
|
|
|
|
movdqu xmmword ptr [rsp+16], xmm14
|
|
|
|
movdqu xmmword ptr [rsp+0], xmm15
|
2018-12-13 22:11:55 +00:00
|
|
|
|
2019-02-15 15:43:52 +00:00
|
|
|
;# function arguments
|
|
|
|
push rcx ;# RegisterFile& registerFile
|
|
|
|
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
|
|
|
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
|
|
|
mov rsi, r8 ;# uint8_t* scratchpad
|
|
|
|
mov rbx, r9 ;# loop counter
|
|
|
|
|
|
|
|
mov rax, rbp
|
2019-01-24 18:29:59 +00:00
|
|
|
|
|
|
|
;# zero integer registers
|
|
|
|
xor r8, r8
|
|
|
|
xor r9, r9
|
|
|
|
xor r10, r10
|
|
|
|
xor r11, r11
|
|
|
|
xor r12, r12
|
|
|
|
xor r13, r13
|
|
|
|
xor r14, r14
|
|
|
|
xor r15, r15
|
|
|
|
|
|
|
|
;# load constant registers
|
|
|
|
lea rcx, [rcx+120]
|
|
|
|
movapd xmm8, xmmword ptr [rcx+72]
|
|
|
|
movapd xmm9, xmmword ptr [rcx+88]
|
|
|
|
movapd xmm10, xmmword ptr [rcx+104]
|
|
|
|
movapd xmm11, xmmword ptr [rcx+120]
|
2019-02-24 16:24:06 +00:00
|
|
|
movapd xmm13, xmmword ptr [mantissaMask]
|
|
|
|
movapd xmm14, xmmword ptr [exp240]
|
|
|
|
movapd xmm15, xmmword ptr [scaleMask]
|
2018-12-13 22:11:55 +00:00
|
|
|
|
2019-01-27 09:52:30 +00:00
|
|
|
jmp program_begin
|
|
|
|
|
|
|
|
ALIGN 64
|
2019-02-24 16:24:06 +00:00
|
|
|
mantissaMask:
|
|
|
|
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
|
|
|
|
exp240:
|
|
|
|
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
|
|
|
|
scaleMask:
|
2019-02-15 15:43:52 +00:00
|
|
|
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
2019-01-27 09:52:30 +00:00
|
|
|
|
|
|
|
ALIGN 64
|
2019-01-10 21:04:55 +00:00
|
|
|
program_begin:
|
2019-01-27 18:33:55 +00:00
|
|
|
xor rax, r8 ;# read address register 1
|
2019-02-15 15:43:52 +00:00
|
|
|
xor rax, r10
|
2019-01-27 18:33:55 +00:00
|
|
|
mov rdx, rax
|
2019-02-15 15:43:52 +00:00
|
|
|
and eax, 2097088
|
2019-01-24 18:29:59 +00:00
|
|
|
lea rcx, [rsi+rax]
|
2019-02-15 15:43:52 +00:00
|
|
|
push rcx
|
2019-01-24 18:29:59 +00:00
|
|
|
xor r8, qword ptr [rcx+0]
|
|
|
|
xor r9, qword ptr [rcx+8]
|
|
|
|
xor r10, qword ptr [rcx+16]
|
|
|
|
xor r11, qword ptr [rcx+24]
|
|
|
|
xor r12, qword ptr [rcx+32]
|
|
|
|
xor r13, qword ptr [rcx+40]
|
|
|
|
xor r14, qword ptr [rcx+48]
|
|
|
|
xor r15, qword ptr [rcx+56]
|
2019-01-27 18:33:55 +00:00
|
|
|
ror rdx, 32
|
2019-02-15 15:43:52 +00:00
|
|
|
and edx, 2097088
|
2019-01-27 18:33:55 +00:00
|
|
|
lea rcx, [rsi+rdx]
|
2019-02-15 15:43:52 +00:00
|
|
|
push rcx
|
2019-01-24 18:29:59 +00:00
|
|
|
cvtdq2pd xmm0, qword ptr [rcx+0]
|
|
|
|
cvtdq2pd xmm1, qword ptr [rcx+8]
|
|
|
|
cvtdq2pd xmm2, qword ptr [rcx+16]
|
|
|
|
cvtdq2pd xmm3, qword ptr [rcx+24]
|
|
|
|
cvtdq2pd xmm4, qword ptr [rcx+32]
|
|
|
|
cvtdq2pd xmm5, qword ptr [rcx+40]
|
|
|
|
cvtdq2pd xmm6, qword ptr [rcx+48]
|
|
|
|
cvtdq2pd xmm7, qword ptr [rcx+56]
|
2019-02-24 16:24:06 +00:00
|
|
|
andps xmm4, xmm13
|
|
|
|
andps xmm5, xmm13
|
|
|
|
andps xmm6, xmm13
|
|
|
|
andps xmm7, xmm13
|
|
|
|
orps xmm4, xmm14
|
|
|
|
orps xmm5, xmm14
|
|
|
|
orps xmm6, xmm14
|
|
|
|
orps xmm7, xmm14
|
2019-01-24 18:29:59 +00:00
|
|
|
|
|
|
|
;# 256 instructions
|
2018-12-13 22:11:55 +00:00
|
|
|
include program.inc
|
2019-02-16 22:18:45 +00:00
|
|
|
IF 1
|
2019-02-15 15:43:52 +00:00
|
|
|
mov eax, r12d ;# read address register 1
|
|
|
|
xor eax, r15d ;# read address register 2
|
2019-01-24 18:29:59 +00:00
|
|
|
xor rbp, rax ;# modify "mx"
|
2019-02-15 15:43:52 +00:00
|
|
|
xor eax, eax
|
2019-01-24 18:29:59 +00:00
|
|
|
and rbp, -64 ;# align "mx" to the start of a cache line
|
|
|
|
mov edx, ebp ;# edx = mx
|
|
|
|
prefetchnta byte ptr [rdi+rdx]
|
|
|
|
ror rbp, 32 ;# swap "ma" and "mx"
|
|
|
|
mov edx, ebp ;# edx = ma
|
|
|
|
lea rcx, [rdi+rdx] ;# dataset cache line
|
|
|
|
xor r8, qword ptr [rcx+0]
|
|
|
|
xor r9, qword ptr [rcx+8]
|
|
|
|
xor r10, qword ptr [rcx+16]
|
|
|
|
xor r11, qword ptr [rcx+24]
|
|
|
|
xor r12, qword ptr [rcx+32]
|
|
|
|
xor r13, qword ptr [rcx+40]
|
|
|
|
xor r14, qword ptr [rcx+48]
|
2019-01-27 18:33:55 +00:00
|
|
|
xor r15, qword ptr [rcx+56]
|
2019-02-15 15:43:52 +00:00
|
|
|
pop rcx
|
2019-01-24 18:29:59 +00:00
|
|
|
mov qword ptr [rcx+0], r8
|
|
|
|
mov qword ptr [rcx+8], r9
|
|
|
|
mov qword ptr [rcx+16], r10
|
|
|
|
mov qword ptr [rcx+24], r11
|
|
|
|
mov qword ptr [rcx+32], r12
|
|
|
|
mov qword ptr [rcx+40], r13
|
|
|
|
mov qword ptr [rcx+48], r14
|
|
|
|
mov qword ptr [rcx+56], r15
|
2019-02-15 15:43:52 +00:00
|
|
|
pop rcx
|
2019-02-24 16:24:06 +00:00
|
|
|
xorpd xmm0, xmm4
|
|
|
|
xorpd xmm1, xmm5
|
|
|
|
xorpd xmm2, xmm6
|
|
|
|
xorpd xmm3, xmm7
|
2019-01-24 18:29:59 +00:00
|
|
|
movapd xmmword ptr [rcx+0], xmm0
|
|
|
|
movapd xmmword ptr [rcx+16], xmm1
|
|
|
|
movapd xmmword ptr [rcx+32], xmm2
|
|
|
|
movapd xmmword ptr [rcx+48], xmm3
|
2019-02-16 22:18:45 +00:00
|
|
|
else
|
|
|
|
; memcpy trace from stack to scratchpad
|
|
|
|
mov rax, rsi
|
|
|
|
mov rdx, rdi
|
|
|
|
|
|
|
|
cld
|
|
|
|
mov rsi, rsp
|
|
|
|
mov rdi, rax
|
|
|
|
mov rcx, 1024
|
|
|
|
|
|
|
|
rep movsq
|
|
|
|
|
|
|
|
add rsp, 8192
|
|
|
|
|
|
|
|
pop rcx
|
|
|
|
pop rcx
|
|
|
|
|
|
|
|
mov rsi, rax
|
|
|
|
mov rdi, rdx
|
|
|
|
endif
|
2019-02-15 15:43:52 +00:00
|
|
|
sub ebx, 1
|
2019-01-24 18:29:59 +00:00
|
|
|
jnz program_begin
|
|
|
|
|
2018-12-13 22:11:55 +00:00
|
|
|
rx_finish:
|
|
|
|
; save VM register values
|
2018-12-18 21:00:58 +00:00
|
|
|
pop rcx
|
2018-12-16 12:43:18 +00:00
|
|
|
mov qword ptr [rcx+0], r8
|
2018-12-13 22:11:55 +00:00
|
|
|
mov qword ptr [rcx+8], r9
|
|
|
|
mov qword ptr [rcx+16], r10
|
|
|
|
mov qword ptr [rcx+24], r11
|
|
|
|
mov qword ptr [rcx+32], r12
|
|
|
|
mov qword ptr [rcx+40], r13
|
|
|
|
mov qword ptr [rcx+48], r14
|
|
|
|
mov qword ptr [rcx+56], r15
|
2019-01-24 18:29:59 +00:00
|
|
|
movdqa xmmword ptr [rcx+64], xmm0
|
|
|
|
movdqa xmmword ptr [rcx+80], xmm1
|
2018-12-31 18:06:45 +00:00
|
|
|
movdqa xmmword ptr [rcx+96], xmm2
|
|
|
|
movdqa xmmword ptr [rcx+112], xmm3
|
|
|
|
lea rcx, [rcx+64]
|
|
|
|
movdqa xmmword ptr [rcx+64], xmm4
|
|
|
|
movdqa xmmword ptr [rcx+80], xmm5
|
|
|
|
movdqa xmmword ptr [rcx+96], xmm6
|
|
|
|
movdqa xmmword ptr [rcx+112], xmm7
|
2018-12-13 22:11:55 +00:00
|
|
|
|
|
|
|
; load callee-saved registers
|
2019-01-24 18:29:59 +00:00
|
|
|
movdqu xmm15, xmmword ptr [rsp]
|
|
|
|
movdqu xmm14, xmmword ptr [rsp+16]
|
|
|
|
movdqu xmm13, xmmword ptr [rsp+32]
|
|
|
|
movdqu xmm12, xmmword ptr [rsp+48]
|
|
|
|
movdqu xmm11, xmmword ptr [rsp+64]
|
|
|
|
add rsp, 80
|
2018-12-31 18:06:45 +00:00
|
|
|
movdqu xmm10, xmmword ptr [rsp]
|
|
|
|
movdqu xmm9, xmmword ptr [rsp+16]
|
|
|
|
movdqu xmm8, xmmword ptr [rsp+32]
|
|
|
|
movdqu xmm7, xmmword ptr [rsp+48]
|
|
|
|
movdqu xmm6, xmmword ptr [rsp+64]
|
|
|
|
add rsp, 80
|
2018-12-13 22:11:55 +00:00
|
|
|
pop r15
|
|
|
|
pop r14
|
|
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
pop rsi
|
|
|
|
pop rdi
|
|
|
|
pop rbp
|
|
|
|
pop rbx
|
|
|
|
|
|
|
|
; return
|
2019-01-04 18:44:15 +00:00
|
|
|
ret
|
|
|
|
|
|
|
|
TransformAddress MACRO reg32, reg64
|
|
|
|
;# Transforms the address in the register so that the transformed address
|
|
|
|
;# lies in a different cache line than the original address (mod 2^N).
|
|
|
|
;# This is done to prevent a load-store dependency.
|
|
|
|
;# There are 3 different transformations that can be used: x -> 9*x+C, x -> x+C, x -> x^C
|
2019-01-08 13:50:31 +00:00
|
|
|
;lea reg32, [reg64+reg64*8+127] ;# C = -119 -110 -101 -92 -83 -74 -65 -55 -46 -37 -28 -19 -10 -1 9 18 27 36 45 54 63 73 82 91 100 109 118 127
|
|
|
|
db 64
|
|
|
|
add reg32, -39 ;# C = all except -7 to +7
|
|
|
|
;xor reg32, -8 ;# C = all except 0 to 7
|
2019-01-04 18:44:15 +00:00
|
|
|
ENDM
|
|
|
|
|
2019-01-13 20:14:59 +00:00
|
|
|
ALIGN 64
|
|
|
|
rx_read:
|
2019-01-12 19:27:35 +00:00
|
|
|
;# IN eax = random 32-bit address
|
2019-01-08 13:50:31 +00:00
|
|
|
;# GLOBAL rdi = address of the dataset address
|
2019-01-04 18:44:15 +00:00
|
|
|
;# GLOBAL rsi = address of the scratchpad
|
2019-01-08 13:50:31 +00:00
|
|
|
;# GLOBAL rbp = low 32 bits = "mx", high 32 bits = "ma"
|
2019-01-04 18:44:15 +00:00
|
|
|
;# MODIFY rcx, rdx
|
2019-01-12 19:27:35 +00:00
|
|
|
TransformAddress eax, rax ;# TransformAddress function
|
|
|
|
mov rcx, qword ptr [rdi] ;# load the dataset address
|
|
|
|
xor rbp, rax ;# modify "mx"
|
2019-01-13 20:14:59 +00:00
|
|
|
;# prefetch cacheline "mx"
|
2019-01-08 13:50:31 +00:00
|
|
|
and rbp, -64 ;# align "mx" to the start of a cache line
|
|
|
|
mov edx, ebp ;# edx = mx
|
2019-01-12 19:27:35 +00:00
|
|
|
prefetchnta byte ptr [rcx+rdx]
|
2019-01-13 20:14:59 +00:00
|
|
|
;# read cacheline "ma"
|
2019-01-08 13:50:31 +00:00
|
|
|
ror rbp, 32 ;# swap "ma" and "mx"
|
|
|
|
mov edx, ebp ;# edx = ma
|
2019-01-12 19:27:35 +00:00
|
|
|
lea rcx, [rcx+rdx] ;# dataset cache line
|
2019-01-13 20:14:59 +00:00
|
|
|
xor r8, qword ptr [rcx+0]
|
|
|
|
xor r9, qword ptr [rcx+8]
|
|
|
|
xor r10, qword ptr [rcx+16]
|
|
|
|
xor r11, qword ptr [rcx+24]
|
|
|
|
xor r12, qword ptr [rcx+32]
|
|
|
|
xor r13, qword ptr [rcx+40]
|
|
|
|
xor r14, qword ptr [rcx+48]
|
|
|
|
xor r15, qword ptr [rcx+56]
|
2019-01-04 18:44:15 +00:00
|
|
|
ret
|
2018-12-13 22:11:55 +00:00
|
|
|
executeProgram ENDP
|
|
|
|
|
2019-01-04 18:44:15 +00:00
|
|
|
_RANDOMX_EXECUTE_PROGRAM ENDS
|
|
|
|
|
2019-01-18 16:57:47 +00:00
|
|
|
ENDIF
|
|
|
|
|
2018-12-13 22:11:55 +00:00
|
|
|
END
|