Modified x86 register allocation

This commit is contained in:
tevador 2018-12-16 13:43:18 +01:00
parent 6332831ec1
commit 4f276541d2
3 changed files with 343 additions and 452 deletions

View file

@ -24,9 +24,9 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
namespace RandomX { namespace RandomX {
static const char* regR[8] = { "rbx", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }; static const char* regR[8] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
static const char* regR32[8] = { "ebx", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }; static const char* regR32[8] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
static const char* regF[8] = { "xmm8", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; static const char* regF[8] = { "xmm8", "xmm9", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
void AssemblyGeneratorX86::generateProgram(const void* seed) { void AssemblyGeneratorX86::generateProgram(const void* seed) {
asmCode.str(std::string()); //clear asmCode.str(std::string()); //clear
@ -149,8 +149,8 @@ namespace RandomX {
convertible_t bimm; convertible_t bimm;
bimm.f64 = (double)instr.imm1; bimm.f64 = (double)instr.imm1;
asmCode << "\tmov rax, " << bimm.i64 << std::endl; asmCode << "\tmov rax, " << bimm.i64 << std::endl;
asmCode << "\tmovd xmm9, rax" << std::endl; asmCode << "\tmovd xmm1, rax" << std::endl;
asmCode << "\t" << instrx86 << " xmm0, xmm9" << std::endl; asmCode << "\t" << instrx86 << " xmm0, xmm1" << std::endl;
return; return;
} }
} }
@ -262,12 +262,10 @@ namespace RandomX {
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
gena(instr); gena(instr);
asmCode << "\tmov r8, rdx" << std::endl;
asmCode << "\tmov rcx, "; asmCode << "\tmov rcx, ";
genbr1(instr); genbr1(instr);
asmCode << "\tmul rcx" << std::endl; asmCode << "\tmul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl; asmCode << "\tmov rax, rdx" << std::endl;
asmCode << "\tmov rdx, r8" << std::endl;
gencr(instr); gencr(instr);
} }
@ -295,18 +293,15 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
gena(instr); gena(instr);
asmCode << "\tmov r8, rdx" << std::endl;
asmCode << "\tmov rcx, "; asmCode << "\tmov rcx, ";
genbr1(instr); genbr1(instr);
asmCode << "\timul rcx" << std::endl; asmCode << "\timul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl; asmCode << "\tmov rax, rdx" << std::endl;
asmCode << "\tmov rdx, r8" << std::endl;
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
gena(instr); gena(instr);
asmCode << "\tmov r8, rdx" << std::endl;
if ((instr.locb & 7) >= 6) { if ((instr.locb & 7) >= 6) {
if (instr.imm1 == 0) { if (instr.imm1 == 0) {
asmCode << "\tmov ecx, 1" << std::endl; asmCode << "\tmov ecx, 1" << std::endl;
@ -323,13 +318,11 @@ namespace RandomX {
} }
asmCode << "\txor edx, edx" << std::endl; asmCode << "\txor edx, edx" << std::endl;
asmCode << "\tdiv rcx" << std::endl; asmCode << "\tdiv rcx" << std::endl;
asmCode << "\tmov rdx, r8" << std::endl;
gencr(instr); gencr(instr);
} }
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) { void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
gena(instr); gena(instr);
asmCode << "\tmov r8, rdx" << std::endl;
asmCode << "\tmov edx, "; asmCode << "\tmov edx, ";
genbr132(instr); genbr132(instr);
asmCode << "\tcmp edx, -1" << std::endl; asmCode << "\tcmp edx, -1" << std::endl;
@ -346,7 +339,6 @@ namespace RandomX {
asmCode << "\tcqo" << std::endl; asmCode << "\tcqo" << std::endl;
asmCode << "\tidiv rcx" << std::endl; asmCode << "\tidiv rcx" << std::endl;
asmCode << "result_idiv_" << i << ":" << std::endl; asmCode << "result_idiv_" << i << ":" << std::endl;
asmCode << "\tmov rdx, r8" << std::endl;
gencr(instr); gencr(instr);
} }

View file

@ -22,14 +22,14 @@ PUBLIC executeProgram
executeProgram PROC executeProgram PROC
; REGISTER ALLOCATION: ; REGISTER ALLOCATION:
; rax -> temporary ; rax -> temporary
; rbx -> "r0" ; rbx -> MemoryRegisters& memory
; rcx -> temporary ; rcx -> temporary
; rdx -> MemoryRegisters& memory ; rdx -> temporary
; rsi -> convertible_t& scratchpad ; rsi -> convertible_t& scratchpad
; rdi -> "ic" (instruction counter) ; rdi -> "ic" (instruction counter)
; rbp -> beginning of VM stack ; rbp -> beginning of VM stack
; rsp -> end of VM stack ; rsp -> end of VM stack
; r8 -> temporary ; r8 -> "r0"
; r9 -> "r1" ; r9 -> "r1"
; r10 -> "r2" ; r10 -> "r2"
; r11 -> "r3" ; r11 -> "r3"
@ -38,7 +38,7 @@ executeProgram PROC
; r14 -> "r6" ; r14 -> "r6"
; r15 -> "r7" ; r15 -> "r7"
; xmm0 -> temporary ; xmm0 -> temporary
; xmm1 -> "f1" ; xmm1 -> temporary
; xmm2 -> "f2" ; xmm2 -> "f2"
; xmm3 -> "f3" ; xmm3 -> "f3"
; xmm4 -> "f4" ; xmm4 -> "f4"
@ -46,7 +46,7 @@ executeProgram PROC
; xmm6 -> "f6" ; xmm6 -> "f6"
; xmm7 -> "f7" ; xmm7 -> "f7"
; xmm8 -> "f0" ; xmm8 -> "f0"
; xmm9 -> temporary ; xmm9 -> "f1"
; STACK STRUCTURE: ; STACK STRUCTURE:
; | ; |
@ -79,16 +79,16 @@ executeProgram PROC
movdqu xmmword ptr [rsp+0], xmm9 movdqu xmmword ptr [rsp+0], xmm9
; function arguments ; function arguments
push rcx ; RegisterFile& registerFile push rcx ; RegisterFile& registerFile
; mov rdx, rdx ; MemoryRegisters& memory mov rbx, rdx ; MemoryRegisters& memory
push r8 ; DatasetReadFunc readFunc push r8 ; DatasetReadFunc readFunc
mov rsi, r9 ; convertible_t& scratchpad mov rsi, r9 ; convertible_t& scratchpad
mov rbp, rsp ; beginning of VM stack mov rbp, rsp ; beginning of VM stack
mov rdi, 1048576 ; number of VM instructions to execute mov rdi, 1048576 ; number of VM instructions to execute
; load VM register values ; load VM register values
mov rbx, qword ptr [rcx+0] mov r8, qword ptr [rcx+0]
mov r9, qword ptr [rcx+8] mov r9, qword ptr [rcx+8]
mov r10, qword ptr [rcx+16] mov r10, qword ptr [rcx+16]
mov r11, qword ptr [rcx+24] mov r11, qword ptr [rcx+24]
@ -97,7 +97,7 @@ executeProgram PROC
mov r14, qword ptr [rcx+48] mov r14, qword ptr [rcx+48]
mov r15, qword ptr [rcx+56] mov r15, qword ptr [rcx+56]
movd xmm8, qword ptr [rcx+64] movd xmm8, qword ptr [rcx+64]
movd xmm1, qword ptr [rcx+72] movd xmm9, qword ptr [rcx+72]
movd xmm2, qword ptr [rcx+80] movd xmm2, qword ptr [rcx+80]
movd xmm3, qword ptr [rcx+88] movd xmm3, qword ptr [rcx+88]
movd xmm4, qword ptr [rcx+96] movd xmm4, qword ptr [rcx+96]
@ -116,7 +116,7 @@ rx_finish:
; save VM register values ; save VM register values
mov rcx, qword ptr [rbp+8] mov rcx, qword ptr [rbp+8]
mov qword ptr [rcx+0], rbx mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9 mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10 mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11 mov qword ptr [rcx+24], r11
@ -125,7 +125,7 @@ rx_finish:
mov qword ptr [rcx+48], r14 mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15 mov qword ptr [rcx+56], r15
movd qword ptr [rcx+64], xmm8 movd qword ptr [rcx+64], xmm8
movd qword ptr [rcx+72], xmm1 movd qword ptr [rcx+72], xmm9
movd qword ptr [rcx+80], xmm2 movd qword ptr [rcx+80], xmm2
movd qword ptr [rcx+88], xmm3 movd qword ptr [rcx+88], xmm3
movd qword ptr [rcx+96], xmm4 movd qword ptr [rcx+96], xmm4
@ -170,19 +170,18 @@ rx_read_dataset_light:
ret 0 ret 0
rx_read_dataset: rx_read_dataset:
mov r8d, dword ptr [rdx] ; ma mov edx, dword ptr [rbx] ; ma
mov rax, qword ptr [rdx+8] ; dataset mov rax, qword ptr [rbx+8] ; dataset
mov rax, qword ptr [rax+r8] mov rax, qword ptr [rax+rdx]
add dword ptr [rdx], 8 add dword ptr [rbx], 8
mov r8d, dword ptr [rdx+4] ; mx xor ecx, dword ptr [rbx+4] ; mx
xor ecx, r8d mov dword ptr [rbx+4], ecx
mov dword ptr [rdx+4], ecx
test ecx, 0FFF8h test ecx, 0FFF8h
jne short rx_read_dataset_full_ret jne short rx_read_dataset_full_ret
and ecx, -8 and ecx, -8
mov dword ptr [rdx], ecx mov dword ptr [rbx], ecx
mov r8, qword ptr [rdx+8] mov rdx, qword ptr [rbx+8]
prefetcht0 byte ptr [r8+rcx] prefetcht0 byte ptr [rdx+rcx]
rx_read_dataset_full_ret: rx_read_dataset_full_ret:
ret 0 ret 0
executeProgram ENDP executeProgram ENDP

File diff suppressed because it is too large Load diff