mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Added magic division to JIT compiler
New B operand selection rules
This commit is contained in:
parent
451dfc5730
commit
2756bcdcfe
9 changed files with 1237 additions and 1136 deletions
|
@ -83,10 +83,10 @@ The `B.LOC.L` flag determines the B operand. It can be either a register or imme
|
|||
|
||||
|`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL|
|
||||
|----|--------|----|------|----|---|
|
||||
|0|register|register|register|register|register|
|
||||
|0|register|`imm8`|`imm32`|register|register|
|
||||
|1|`imm32`|register|register|register|register|
|
||||
|2|`imm32`|`imm8`|register|register|register|
|
||||
|3|`imm32`|`imm8`|`imm32`|register|register|
|
||||
|3|`imm32`|register|register|register|register|
|
||||
|
||||
Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand.
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
|
|||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
//#define TRACE
|
||||
//#define MAGIC_DIVISION
|
||||
#define MAGIC_DIVISION
|
||||
#include "AssemblyGeneratorX86.hpp"
|
||||
#include "Pcg32.hpp"
|
||||
#include "common.hpp"
|
||||
|
@ -64,108 +64,61 @@ namespace RandomX {
|
|||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||
void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
break;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
break;
|
||||
if (instr.loca & 3) {
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
|
||||
gena(instr, i);
|
||||
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
|
||||
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
|
||||
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
|
||||
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
|
||||
asmCode << "\tjnz short rx_body_" << i << std::endl;
|
||||
switch (instr.loca & 3)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
asmCode << "\tcall rx_read_l1" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
|
||||
break;
|
||||
default: //3
|
||||
asmCode << "\tcall rx_read_l2" << std::endl;
|
||||
asmCode << "rx_body_" << i << ":" << std::endl;
|
||||
if ((instr.loca & 192) == 0)
|
||||
asmCode << "\txor " << regMx << ", rcx" << std::endl;
|
||||
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
|
||||
break;
|
||||
}
|
||||
gena(instr, i);
|
||||
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
|
||||
switch (instr.locb & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
void AssemblyGeneratorX86::genbiashift(Instruction& instr, const char* instrx86) {
|
||||
if (instr.locb & 1) {
|
||||
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
|
||||
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
|
||||
return;
|
||||
default:
|
||||
} else {
|
||||
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genbr1(Instruction& instr) {
|
||||
switch (instr.locb & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
void AssemblyGeneratorX86::genbia(Instruction& instr) {
|
||||
if (instr.locb & 3) {
|
||||
asmCode << regR[instr.regb % RegistersCount] << std::endl;
|
||||
return;
|
||||
default:
|
||||
} else {
|
||||
asmCode << instr.imm32 << std::endl;;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genbr132(Instruction& instr) {
|
||||
switch (instr.locb & 7)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
void AssemblyGeneratorX86::genbia32(Instruction& instr) {
|
||||
if (instr.locb & 3) {
|
||||
asmCode << regR32[instr.regb % RegistersCount] << std::endl;
|
||||
return;
|
||||
default:
|
||||
}
|
||||
else {
|
||||
asmCode << instr.imm32 << std::endl;;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -241,28 +194,28 @@ namespace RandomX {
|
|||
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tadd rax, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tadd eax, ";
|
||||
genbr132(instr);
|
||||
genbia32(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tsub rax, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tsub eax, ";
|
||||
genbr132(instr);
|
||||
genbia32(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
|
@ -272,14 +225,14 @@ namespace RandomX {
|
|||
if ((instr.locb & 7) >= 6) {
|
||||
asmCode << "rax, ";
|
||||
}
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tmov rcx, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
asmCode << "\tmul rcx" << std::endl;
|
||||
asmCode << "\tmov rax, rdx" << std::endl;
|
||||
gencr(instr);
|
||||
|
@ -289,7 +242,7 @@ namespace RandomX {
|
|||
genar(instr, i);
|
||||
asmCode << "\tmov ecx, eax" << std::endl;
|
||||
asmCode << "\tmov eax, ";
|
||||
genbr132(instr);
|
||||
genbia32(instr);
|
||||
asmCode << "\timul rax, rcx" << std::endl;
|
||||
gencr(instr);
|
||||
}
|
||||
|
@ -310,7 +263,7 @@ namespace RandomX {
|
|||
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tmov rcx, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
asmCode << "\timul rcx" << std::endl;
|
||||
asmCode << "\tmov rax, rdx" << std::endl;
|
||||
gencr(instr);
|
||||
|
@ -318,7 +271,7 @@ namespace RandomX {
|
|||
|
||||
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
if ((instr.locb & 7) >= 6) {
|
||||
if (instr.locb & 3) {
|
||||
#ifdef MAGIC_DIVISION
|
||||
if (instr.imm32 != 0) {
|
||||
uint32_t divisor = instr.imm32;
|
||||
|
@ -373,8 +326,8 @@ namespace RandomX {
|
|||
|
||||
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
if (instr.locb & 3) {
|
||||
#ifdef MAGIC_DIVISION
|
||||
if ((instr.locb & 7) >= 6) {
|
||||
int64_t divisor = instr.imm32;
|
||||
asmCode << "\t; magic divide by " << divisor << std::endl;
|
||||
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||
|
@ -394,9 +347,10 @@ namespace RandomX {
|
|||
asmCode << "\tadd rax, rcx" << std::endl;
|
||||
asmCode << "\tsar rax, " << shift << std::endl;
|
||||
}
|
||||
if(negative)
|
||||
if (negative)
|
||||
asmCode << "\tneg rax" << std::endl;
|
||||
} else if(divisor != 0) {
|
||||
}
|
||||
else if (divisor != 0) {
|
||||
magics_info mi = compute_signed_magic_info(divisor);
|
||||
if ((divisor >= 0) != (mi.multiplier >= 0))
|
||||
asmCode << "\tmov rcx, rax" << std::endl;
|
||||
|
@ -422,25 +376,29 @@ namespace RandomX {
|
|||
asmCode << "\tsets dl" << std::endl;
|
||||
asmCode << "\tadd rax, rdx" << std::endl;
|
||||
}
|
||||
#else
|
||||
asmCode << "\tmov edx, " << instr.imm32 << std::endl;
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
asmCode << "\tmov edx, ";
|
||||
genbr132(instr);
|
||||
asmCode << "\tcmp edx, -1" << std::endl;
|
||||
asmCode << "\tjne short safe_idiv_" << i << std::endl;
|
||||
asmCode << "\tneg rax" << std::endl;
|
||||
asmCode << "\tjmp short result_idiv_" << i << std::endl;
|
||||
asmCode << "safe_idiv_" << i << ":" << std::endl;
|
||||
asmCode << "\tmov ecx, 1" << std::endl;
|
||||
asmCode << "\ttest edx, edx" << std::endl;
|
||||
asmCode << "\tcmovne ecx, edx" << std::endl;
|
||||
asmCode << "\tmovsxd rcx, ecx" << std::endl;
|
||||
asmCode << "\tcqo" << std::endl;
|
||||
asmCode << "\tidiv rcx" << std::endl;
|
||||
asmCode << "result_idiv_" << i << ":" << std::endl;
|
||||
#ifdef MAGIC_DIVISION
|
||||
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
|
||||
#ifndef MAGIC_DIVISION
|
||||
}
|
||||
#endif
|
||||
asmCode << "\tcmp edx, -1" << std::endl;
|
||||
asmCode << "\tjne short body_idiv_" << i << std::endl;
|
||||
asmCode << "\tneg rax" << std::endl;
|
||||
asmCode << "\tjmp short result_idiv_" << i << std::endl;
|
||||
asmCode << "body_idiv_" << i << ":" << std::endl;
|
||||
asmCode << "\tmov ecx, 1" << std::endl;
|
||||
asmCode << "\ttest edx, edx" << std::endl;
|
||||
asmCode << "\tcmovne ecx, edx" << std::endl;
|
||||
asmCode << "\tmovsxd rcx, ecx" << std::endl;
|
||||
asmCode << "\tcqo" << std::endl;
|
||||
asmCode << "\tidiv rcx" << std::endl;
|
||||
asmCode << "result_idiv_" << i << ":" << std::endl;
|
||||
#ifdef MAGIC_DIVISION
|
||||
}
|
||||
#endif
|
||||
gencr(instr);
|
||||
}
|
||||
|
@ -448,72 +406,72 @@ namespace RandomX {
|
|||
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tand rax, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tand eax, ";
|
||||
genbr132(instr);
|
||||
genbia32(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tor rax, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\tor eax, ";
|
||||
genbr132(instr);
|
||||
genbia32(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\txor rax, ";
|
||||
genbr1(instr);
|
||||
genbia(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
asmCode << "\txor eax, ";
|
||||
genbr132(instr);
|
||||
genbia32(instr);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
genbr0(instr, "shl");
|
||||
genbiashift(instr, "shl");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
genbr0(instr, "shr");
|
||||
genbiashift(instr, "shr");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
genbr0(instr, "sar");
|
||||
genbiashift(instr, "sar");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
genbr0(instr, "rol");
|
||||
genbiashift(instr, "rol");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
|
||||
genar(instr, i);
|
||||
genbr0(instr, "ror");
|
||||
genbiashift(instr, "ror");
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,11 +38,12 @@ namespace RandomX {
|
|||
static InstructionGenerator engine[256];
|
||||
std::stringstream asmCode;
|
||||
|
||||
void gena(Instruction&, int);
|
||||
void genar(Instruction&, int);
|
||||
void genaf(Instruction&, int);
|
||||
void genbr0(Instruction&, const char*);
|
||||
void genbr1(Instruction&);
|
||||
void genbr132(Instruction&);
|
||||
void genbiashift(Instruction&, const char*);
|
||||
void genbia(Instruction&);
|
||||
void genbia32(Instruction&);
|
||||
void genbf(Instruction&, const char*);
|
||||
void gencr(Instruction&, bool);
|
||||
void gencf(Instruction&, bool);
|
||||
|
|
|
@ -17,10 +17,14 @@ You should have received a copy of the GNU General Public License
|
|||
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//#define MAGIC_DIVISION
|
||||
#include "JitCompilerX86.hpp"
|
||||
#include "Pcg32.hpp"
|
||||
#include <cstring>
|
||||
#include <stdexcept>
|
||||
#ifdef MAGIC_DIVISION
|
||||
#include "divideByConstantCodegen.h"
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
@ -152,6 +156,17 @@ namespace RandomX {
|
|||
instructionOffsets.push_back(codePos);
|
||||
emit(0x840fcbff); //dec ebx; jz <epilogue>
|
||||
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void JitCompilerX86::fixCallOffsets() {
|
||||
for (CallOffset& co : callOffsets) {
|
||||
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::gena(Instruction& instr) {
|
||||
emit(uint16_t(0x8149)); //xor
|
||||
emitByte(0xf0 + (instr.rega % RegistersCount));
|
||||
emit(instr.addra);
|
||||
|
@ -169,41 +184,28 @@ namespace RandomX {
|
|||
emit(uint16_t(0x3348));
|
||||
emitByte(0xe9); //xor rbp, rcx
|
||||
}
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void JitCompilerX86::fixCallOffsets() {
|
||||
for (CallOffset& co : callOffsets) {
|
||||
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
|
||||
emit(uint16_t(0xe181)); //and ecx,
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genar(Instruction& instr) {
|
||||
emit(uint16_t(0xe181)); //and ecx,
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
}
|
||||
gena(instr);
|
||||
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
|
||||
}
|
||||
|
||||
void JitCompilerX86::genaf(Instruction& instr) {
|
||||
emit(uint16_t(0xe181)); //and ecx,
|
||||
if (instr.loca & 3) {
|
||||
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
|
||||
}
|
||||
else {
|
||||
emit(ScratchpadL2 - 1); //whole scratchpad
|
||||
}
|
||||
gena(instr);
|
||||
emitByte(0xf3);
|
||||
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
|
||||
}
|
||||
|
||||
void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||
if ((instr.locb & 7) <= 3) {
|
||||
void JitCompilerX86::genbiashift(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||
if (instr.locb & 1) {
|
||||
emit(uint16_t(0x8b49)); //mov
|
||||
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
|
||||
emitByte(0x48); //REX.W
|
||||
|
@ -216,8 +218,8 @@ namespace RandomX {
|
|||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genbr1(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||
if ((instr.locb & 7) <= 5) {
|
||||
void JitCompilerX86::genbia(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
|
||||
if (instr.locb & 3) {
|
||||
emit(opcodeReg); // xxx rax, r64
|
||||
emitByte(0xc0 + (instr.regb % RegistersCount));
|
||||
}
|
||||
|
@ -227,8 +229,8 @@ namespace RandomX {
|
|||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genbr132(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
|
||||
if ((instr.locb & 7) <= 5) {
|
||||
void JitCompilerX86::genbia32(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
|
||||
if (instr.locb & 3) {
|
||||
emit(opcodeReg); // xxx eax, r32
|
||||
emitByte(0xc0 + (instr.regb % RegistersCount));
|
||||
}
|
||||
|
@ -328,25 +330,25 @@ namespace RandomX {
|
|||
|
||||
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr1(instr, 0x0349, 0x0548);
|
||||
genbia(instr, 0x0349, 0x0548);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr132(instr, 0x0341, 0x05);
|
||||
genbia32(instr, 0x0341, 0x05);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr1(instr, 0x2b49, 0x2d48);
|
||||
genbia(instr, 0x2b49, 0x2d48);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr132(instr, 0x2b41, 0x2d);
|
||||
genbia32(instr, 0x2b41, 0x2d);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
|
@ -435,104 +437,209 @@ namespace RandomX {
|
|||
|
||||
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
if ((instr.locb & 7) <= 5) {
|
||||
if (instr.locb & 3) {
|
||||
#ifdef MAGIC_DIVISION
|
||||
if (instr.imm32 != 0) {
|
||||
uint32_t divisor = instr.imm32;
|
||||
if (divisor & (divisor - 1)) {
|
||||
magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
|
||||
if (mi.pre_shift > 0) {
|
||||
if (mi.pre_shift == 1) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xe8d1)); //shr rax,1
|
||||
}
|
||||
else {
|
||||
emit(0x00e8c148 | (mi.pre_shift << 24)); //shr rax, pre_shift
|
||||
}
|
||||
}
|
||||
if (mi.increment) {
|
||||
emit(0x00d8834801c08348); //add rax,1; sbb rax,0
|
||||
}
|
||||
emit(uint16_t(0xb948)); //movabs rcx, multiplier
|
||||
emit(mi.multiplier);
|
||||
emit(0x48e1f748); //mul rcx; REX
|
||||
emit(uint16_t(0xc28b)); //mov rax,rdx
|
||||
if (mi.post_shift > 0)
|
||||
emit(0x00e8c148 | (mi.post_shift << 24)); //shr rax, post_shift
|
||||
}
|
||||
else { //divisor is a power of two
|
||||
int shift = 0;
|
||||
while (divisor >>= 1)
|
||||
++shift;
|
||||
if (shift > 0)
|
||||
emit(0x00e8c148 | (shift << 24)); //shr rax, shift
|
||||
}
|
||||
}
|
||||
#else
|
||||
emitByte(0xb9); //mov ecx, imm32
|
||||
emit(instr.imm32 != 0 ? instr.imm32 : 1);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
emitByte(0xb9); //mov ecx, 1
|
||||
emit(1);
|
||||
emit(uint16_t(0x8b41)); //mov edx, r32
|
||||
emitByte(0xd0 + (instr.regb % RegistersCount));
|
||||
emit(0x450fd285); //test edx, edx; cmovne ecx,edx
|
||||
emitByte(0xca);
|
||||
#ifdef MAGIC_DIVISION
|
||||
emit(0xf748d233); //xor edx,edx; div rcx
|
||||
emitByte(0xf1);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
emitByte(0xb9); //mov ecx, imm32
|
||||
emit(instr.imm32 != 0 ? instr.imm32 : 1);
|
||||
}
|
||||
#ifndef MAGIC_DIVISION
|
||||
emit(0xf748d233); //xor edx,edx; div rcx
|
||||
emitByte(0xf1);
|
||||
#endif
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
if ((instr.locb & 7) <= 5) {
|
||||
emit(uint16_t(0x8b41)); //mov edx, r32
|
||||
emitByte(0xd0 + (instr.regb % RegistersCount));
|
||||
if (instr.locb & 3) {
|
||||
#ifdef MAGIC_DIVISION
|
||||
int64_t divisor = instr.imm32;
|
||||
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
|
||||
// +/- power of two
|
||||
bool negative = divisor < 0;
|
||||
if (negative)
|
||||
divisor = -divisor;
|
||||
int shift = 0;
|
||||
uint64_t unsignedDivisor = divisor;
|
||||
while (unsignedDivisor >>= 1)
|
||||
++shift;
|
||||
if (shift > 0) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xc88b)); //mov rcx, rax
|
||||
emit(0x3ff9c148); //sar rcx, 63
|
||||
uint32_t mask = (1ULL << shift) - 1;
|
||||
emit(uint16_t(0xe181)); //and ecx, mask
|
||||
emit(mask);
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xc103)); //add rax, rcx
|
||||
emit(0x00f8c148 | (shift << 24)); //sar rax, shift
|
||||
}
|
||||
if (negative) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xd8f7)); //neg rax
|
||||
}
|
||||
}
|
||||
else if (divisor != 0) {
|
||||
magics_info mi = compute_signed_magic_info(divisor);
|
||||
if ((divisor >= 0) != (mi.multiplier >= 0)) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xc88b)); //mov rcx, rax
|
||||
}
|
||||
emit(uint16_t(0xba48)); //movabs rdx, multiplier
|
||||
emit(mi.multiplier);
|
||||
emit(0xd233c28b48eaf748); //imul rdx; mov rax,rdx; xor edx,edx
|
||||
bool haveSF = false;
|
||||
if (divisor > 0 && mi.multiplier < 0) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xc103)); //add rax, rcx
|
||||
haveSF = true;
|
||||
}
|
||||
if (divisor < 0 && mi.multiplier > 0) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0xc12b)); //sub rax, rcx
|
||||
haveSF = true;
|
||||
}
|
||||
if (mi.shift > 0) {
|
||||
emit(0x00f8c148 | (mi.shift << 24)); //sar rax, shift
|
||||
haveSF = true;
|
||||
}
|
||||
if (!haveSF) {
|
||||
emitByte(0x48);
|
||||
emit(uint16_t(0x85c0));
|
||||
}
|
||||
emit(0x48c2980f); //sets dl; add rax, rdx
|
||||
emit(uint16_t(0xc203));
|
||||
}
|
||||
#else
|
||||
emitByte(0xba); // mov edx, imm32
|
||||
emit(instr.imm32);
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
emitByte(0xba); // xxx edx, imm32
|
||||
emit(instr.imm32);
|
||||
emit(uint16_t(0x8b41)); //mov edx, r32
|
||||
emitByte(0xd0 + (instr.regb % RegistersCount));
|
||||
#ifndef MAGIC_DIVISION
|
||||
}
|
||||
#endif
|
||||
emit(0xc88b480b75fffa83);
|
||||
emit(0x1274c9ff48c1d148);
|
||||
emit(0x0fd28500000001b9);
|
||||
emit(0x489948c96348ca45);
|
||||
emit(uint16_t(0xf9f7)); //idiv rcx
|
||||
#ifdef MAGIC_DIVISION
|
||||
}
|
||||
#endif
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr1(instr, 0x2349, 0x2548);
|
||||
genbia(instr, 0x2349, 0x2548);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr132(instr, 0x2341, 0x25);
|
||||
genbia32(instr, 0x2341, 0x25);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr1(instr, 0x0b49, 0x0d48);
|
||||
genbia(instr, 0x0b49, 0x0d48);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr132(instr, 0x0b41, 0x0d);
|
||||
genbia32(instr, 0x0b41, 0x0d);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr1(instr, 0x3349, 0x3548);
|
||||
genbia(instr, 0x3349, 0x3548);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr132(instr, 0x3341, 0x35);
|
||||
genbia32(instr, 0x3341, 0x35);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr0(instr, 0xe0d3, 0xe0c1);
|
||||
genbiashift(instr, 0xe0d3, 0xe0c1);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr0(instr, 0xe8d3, 0xe8c1);
|
||||
genbiashift(instr, 0xe8d3, 0xe8c1);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr0(instr, 0xf8d3, 0xf8c1);
|
||||
genbiashift(instr, 0xf8d3, 0xf8c1);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr0(instr, 0xc0d3, 0xc0c1);
|
||||
genbiashift(instr, 0xc0d3, 0xc0c1);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
|
||||
genar(instr);
|
||||
genbr0(instr, 0xc8d3, 0xc8c1);
|
||||
genbiashift(instr, 0xc8d3, 0xc8c1);
|
||||
gencr(instr);
|
||||
}
|
||||
|
||||
|
|
|
@ -58,11 +58,12 @@ namespace RandomX {
|
|||
std::vector<int32_t> instructionOffsets;
|
||||
std::vector<CallOffset> callOffsets;
|
||||
|
||||
void gena(Instruction&);
|
||||
void genar(Instruction&);
|
||||
void genaf(Instruction&);
|
||||
void genbr0(Instruction&, uint16_t, uint16_t);
|
||||
void genbr1(Instruction&, uint16_t, uint16_t);
|
||||
void genbr132(Instruction&, uint16_t, uint8_t);
|
||||
void genbiashift(Instruction&, uint16_t, uint16_t);
|
||||
void genbia(Instruction&, uint16_t, uint16_t);
|
||||
void genbia32(Instruction&, uint16_t, uint8_t);
|
||||
void genbf(Instruction&, uint8_t);
|
||||
void scratchpadStoreR(Instruction&, uint32_t, bool);
|
||||
void scratchpadStoreF(Instruction&, int, uint32_t, bool);
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
|
||||
#include "divideByConstantCodegen.h"
|
||||
|
||||
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
||||
struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits) {
|
||||
|
||||
//The numerator must fit in a uint
|
||||
assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);
|
||||
//The numerator must fit in a unsigned_type
|
||||
assert(num_bits > 0 && num_bits <= sizeof(unsigned_type) * CHAR_BIT);
|
||||
|
||||
// D must be larger than zero and not a power of 2
|
||||
assert(D & (D - 1));
|
||||
|
@ -22,29 +22,29 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||
// The eventual result
|
||||
struct magicu_info result;
|
||||
|
||||
// Bits in a uint
|
||||
const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;
|
||||
// Bits in a unsigned_type
|
||||
const unsigned UINT_BITS = sizeof(unsigned_type) * CHAR_BIT;
|
||||
|
||||
// The extra shift implicit in the difference between UINT_BITS and num_bits
|
||||
const unsigned extra_shift = UINT_BITS - num_bits;
|
||||
|
||||
// The initial power of 2 is one less than the first one that can possibly work
|
||||
const uint initial_power_of_2 = (uint)1 << (UINT_BITS - 1);
|
||||
const unsigned_type initial_power_of_2 = (unsigned_type)1 << (UINT_BITS - 1);
|
||||
|
||||
// The remainder and quotient of our power of 2 divided by d
|
||||
uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
|
||||
unsigned_type quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
|
||||
|
||||
// ceil(log_2 D)
|
||||
unsigned ceil_log_2_D;
|
||||
|
||||
// The magic info for the variant "round down" algorithm
|
||||
uint down_multiplier = 0;
|
||||
unsigned_type down_multiplier = 0;
|
||||
unsigned down_exponent = 0;
|
||||
int has_magic_down = 0;
|
||||
|
||||
// Compute ceil(log_2 D)
|
||||
ceil_log_2_D = 0;
|
||||
uint tmp;
|
||||
unsigned_type tmp;
|
||||
for (tmp = D; tmp > 0; tmp >>= 1)
|
||||
ceil_log_2_D += 1;
|
||||
|
||||
|
@ -67,11 +67,11 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||
// We're done if this exponent works for the round_up algorithm.
|
||||
// Note that exponent may be larger than the maximum shift supported,
|
||||
// so the check for >= ceil_log_2_D is critical.
|
||||
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
|
||||
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((unsigned_type)1 << (exponent + extra_shift)))
|
||||
break;
|
||||
|
||||
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
|
||||
if (!has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
|
||||
if (!has_magic_down && remainder <= ((unsigned_type)1 << (exponent + extra_shift))) {
|
||||
has_magic_down = 1;
|
||||
down_multiplier = quotient;
|
||||
down_exponent = exponent;
|
||||
|
@ -96,7 +96,7 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||
else {
|
||||
// Even divisor, so use a prefix-shifted dividend
|
||||
unsigned pre_shift = 0;
|
||||
uint shifted_D = D;
|
||||
unsigned_type shifted_D = D;
|
||||
while ((shifted_D & 1) == 0) {
|
||||
shifted_D >>= 1;
|
||||
pre_shift += 1;
|
||||
|
@ -108,34 +108,34 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
|
|||
return result;
|
||||
}
|
||||
|
||||
struct magics_info compute_signed_magic_info(sint D) {
|
||||
struct magics_info compute_signed_magic_info(signed_type D) {
|
||||
// D must not be zero and must not be a power of 2 (or its negative)
|
||||
assert(D != 0 && (D & -D) != D && (D & -D) != -D);
|
||||
|
||||
// Our result
|
||||
struct magics_info result;
|
||||
|
||||
// Bits in an sint
|
||||
const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;
|
||||
// Bits in an signed_type
|
||||
const unsigned SINT_BITS = sizeof(signed_type) * CHAR_BIT;
|
||||
|
||||
// Absolute value of D (we know D is not the most negative value since that's a power of 2)
|
||||
const uint abs_d = (D < 0 ? -D : D);
|
||||
const unsigned_type abs_d = (D < 0 ? -D : D);
|
||||
|
||||
// The initial power of 2 is one less than the first one that can possibly work
|
||||
// "two31" in Warren
|
||||
unsigned exponent = SINT_BITS - 1;
|
||||
const uint initial_power_of_2 = (uint)1 << exponent;
|
||||
const unsigned_type initial_power_of_2 = (unsigned_type)1 << exponent;
|
||||
|
||||
// Compute the absolute value of our "test numerator,"
|
||||
// which is the largest dividend whose remainder with d is d-1.
|
||||
// This is called anc in Warren.
|
||||
const uint tmp = initial_power_of_2 + (D < 0);
|
||||
const uint abs_test_numer = tmp - 1 - tmp % abs_d;
|
||||
const unsigned_type tmp = initial_power_of_2 + (D < 0);
|
||||
const unsigned_type abs_test_numer = tmp - 1 - tmp % abs_d;
|
||||
|
||||
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
|
||||
uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
|
||||
uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
|
||||
uint delta;
|
||||
unsigned_type quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
|
||||
unsigned_type quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
|
||||
unsigned_type delta;
|
||||
|
||||
// Begin our loop
|
||||
do {
|
||||
|
|
|
@ -24,11 +24,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef uint64_t uint;
|
||||
typedef int64_t sint;
|
||||
typedef uint64_t unsigned_type;
|
||||
typedef int64_t signed_type;
|
||||
|
||||
/* Computes "magic info" for performing signed division by a fixed integer D.
|
||||
The type 'sint' is assumed to be defined as a signed integer type large enough
|
||||
The type 'signed_type' is assumed to be defined as a signed integer type large enough
|
||||
to hold both the dividend and the divisor.
|
||||
Here >> is arithmetic (signed) shift, and >>> is logical shift.
|
||||
|
||||
|
@ -55,17 +55,17 @@ extern "C" {
|
|||
*/
|
||||
|
||||
struct magics_info {
|
||||
sint multiplier; // the "magic number" multiplier
|
||||
signed_type multiplier; // the "magic number" multiplier
|
||||
unsigned shift; // shift for the dividend after multiplying
|
||||
};
|
||||
struct magics_info compute_signed_magic_info(sint D);
|
||||
struct magics_info compute_signed_magic_info(signed_type D);
|
||||
|
||||
|
||||
/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
|
||||
The type 'uint' is assumed to be defined as an unsigned integer type large enough
|
||||
The type 'unsigned_type' is assumed to be defined as an unsigned integer type large enough
|
||||
to hold both the dividend and the divisor. num_bits can be set appropriately if n is
|
||||
known to be smaller than the largest uint; if this is not known then pass
|
||||
(sizeof(uint) * CHAR_BIT) for num_bits.
|
||||
known to be smaller than the largest unsigned_type; if this is not known then pass
|
||||
(sizeof(unsigned_type) * CHAR_BIT) for num_bits.
|
||||
|
||||
Assume we have a hardware register of width UINT_BITS, a known constant D which is
|
||||
not zero and not a power of 2, and a variable n of width num_bits (which may be
|
||||
|
@ -105,12 +105,12 @@ extern "C" {
|
|||
*/
|
||||
|
||||
struct magicu_info {
|
||||
uint multiplier; // the "magic number" multiplier
|
||||
unsigned_type multiplier; // the "magic number" multiplier
|
||||
unsigned pre_shift; // shift for the dividend before multiplying
|
||||
unsigned post_shift; //shift for the dividend after multiplying
|
||||
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
|
||||
};
|
||||
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);
|
||||
struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
|
|
|
@ -19,17 +19,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
|||
|
||||
#pragma once
|
||||
|
||||
#define WT_ADD_64 15
|
||||
#define WT_ADD_64 12
|
||||
#define WT_ADD_32 2
|
||||
#define WT_SUB_64 15
|
||||
#define WT_SUB_64 12
|
||||
#define WT_SUB_32 2
|
||||
#define WT_MUL_64 23
|
||||
#define WT_MULH_64 10
|
||||
#define WT_MUL_32 15
|
||||
#define WT_IMUL_32 15
|
||||
#define WT_IMULH_64 6
|
||||
#define WT_DIV_64 1
|
||||
#define WT_IDIV_64 1
|
||||
#define WT_DIV_64 4
|
||||
#define WT_IDIV_64 4
|
||||
#define WT_AND_64 4
|
||||
#define WT_AND_32 2
|
||||
#define WT_OR_64 4
|
||||
|
|
1882
src/program.inc
1882
src/program.inc
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue