Added magic division to JIT compiler

New B operand selection rules
This commit is contained in:
tevador 2019-01-11 16:53:52 +01:00
parent 451dfc5730
commit 2756bcdcfe
9 changed files with 1237 additions and 1136 deletions

View file

@ -83,10 +83,10 @@ The `B.LOC.L` flag determines the B operand. It can be either a register or imme
|`B.LOC.L`|IA/DIV|IA/SHIFT|IA/MATH|FP|CL|
|----|--------|----|------|----|---|
|0|register|register|register|register|register|
|0|register|`imm8`|`imm32`|register|register|
|1|`imm32`|register|register|register|register|
|2|`imm32`|`imm8`|register|register|register|
|3|`imm32`|`imm8`|`imm32`|register|register|
|3|`imm32`|register|register|register|register|
Integer instructions are split into 3 classes: integer division (IA/DIV), shift and rotate (IA/SHIFT) and other (IA/MATH). Floating point (FP) and control (CL) instructions always use a register operand.

View file

@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
//#define TRACE
//#define MAGIC_DIVISION
#define MAGIC_DIVISION
#include "AssemblyGeneratorX86.hpp"
#include "Pcg32.hpp"
#include "common.hpp"
@ -64,108 +64,61 @@ namespace RandomX {
(this->*generator)(instr, i);
}
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
void AssemblyGeneratorX86::gena(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
switch (instr.loca & 3)
{
case 0:
case 1:
case 2:
asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
break;
default: //3
asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
break;
if (instr.loca & 3) {
asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
}
else {
asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
}
}
void AssemblyGeneratorX86::genar(Instruction& instr, int i) {
gena(instr, i);
asmCode << "\tmov rax, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
}
void AssemblyGeneratorX86::genaf(Instruction& instr, int i) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\ttest " << regIc8 << ", 63" << std::endl;
asmCode << "\tjnz short rx_body_" << i << std::endl;
switch (instr.loca & 3)
{
case 0:
case 1:
case 2:
asmCode << "\tcall rx_read_l1" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL1 - 1) << std::endl;
break;
default: //3
asmCode << "\tcall rx_read_l2" << std::endl;
asmCode << "rx_body_" << i << ":" << std::endl;
if ((instr.loca & 192) == 0)
asmCode << "\txor " << regMx << ", rcx" << std::endl;
asmCode << "\tand ecx, " << (ScratchpadL2 - 1) << std::endl;
break;
}
gena(instr, i);
asmCode << "\tcvtdq2pd xmm0, qword ptr [" << regScratchpadAddr << "+rcx*8]" << std::endl;
}
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
switch (instr.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
void AssemblyGeneratorX86::genbiashift(Instruction& instr, const char* instrx86) {
if (instr.locb & 1) {
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
return;
default:
} else {
asmCode << "\t" << instrx86 << " rax, " << (instr.imm8 & 63) << std::endl;;
return;
}
}
void AssemblyGeneratorX86::genbr1(Instruction& instr) {
switch (instr.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
void AssemblyGeneratorX86::genbia(Instruction& instr) {
if (instr.locb & 3) {
asmCode << regR[instr.regb % RegistersCount] << std::endl;
return;
default:
} else {
asmCode << instr.imm32 << std::endl;;
return;
}
}
void AssemblyGeneratorX86::genbr132(Instruction& instr) {
switch (instr.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
void AssemblyGeneratorX86::genbia32(Instruction& instr) {
if (instr.locb & 3) {
asmCode << regR32[instr.regb % RegistersCount] << std::endl;
return;
default:
}
else {
asmCode << instr.imm32 << std::endl;;
return;
}
}
@ -241,28 +194,28 @@ namespace RandomX {
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tadd rax, ";
genbr1(instr);
genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tadd eax, ";
genbr132(instr);
genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tsub rax, ";
genbr1(instr);
genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tsub eax, ";
genbr132(instr);
genbia32(instr);
gencr(instr);
}
@ -272,14 +225,14 @@ namespace RandomX {
if ((instr.locb & 7) >= 6) {
asmCode << "rax, ";
}
genbr1(instr);
genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tmov rcx, ";
genbr1(instr);
genbia(instr);
asmCode << "\tmul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl;
gencr(instr);
@ -289,7 +242,7 @@ namespace RandomX {
genar(instr, i);
asmCode << "\tmov ecx, eax" << std::endl;
asmCode << "\tmov eax, ";
genbr132(instr);
genbia32(instr);
asmCode << "\timul rax, rcx" << std::endl;
gencr(instr);
}
@ -310,7 +263,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tmov rcx, ";
genbr1(instr);
genbia(instr);
asmCode << "\timul rcx" << std::endl;
asmCode << "\tmov rax, rdx" << std::endl;
gencr(instr);
@ -318,7 +271,7 @@ namespace RandomX {
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
genar(instr, i);
if ((instr.locb & 7) >= 6) {
if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
if (instr.imm32 != 0) {
uint32_t divisor = instr.imm32;
@ -373,8 +326,8 @@ namespace RandomX {
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr, i);
if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
if ((instr.locb & 7) >= 6) {
int64_t divisor = instr.imm32;
asmCode << "\t; magic divide by " << divisor << std::endl;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
@ -394,9 +347,10 @@ namespace RandomX {
asmCode << "\tadd rax, rcx" << std::endl;
asmCode << "\tsar rax, " << shift << std::endl;
}
if(negative)
if (negative)
asmCode << "\tneg rax" << std::endl;
} else if(divisor != 0) {
}
else if (divisor != 0) {
magics_info mi = compute_signed_magic_info(divisor);
if ((divisor >= 0) != (mi.multiplier >= 0))
asmCode << "\tmov rcx, rax" << std::endl;
@ -422,25 +376,29 @@ namespace RandomX {
asmCode << "\tsets dl" << std::endl;
asmCode << "\tadd rax, rdx" << std::endl;
}
#else
asmCode << "\tmov edx, " << instr.imm32 << std::endl;
#endif
}
else {
#endif
asmCode << "\tmov edx, ";
genbr132(instr);
asmCode << "\tcmp edx, -1" << std::endl;
asmCode << "\tjne short safe_idiv_" << i << std::endl;
asmCode << "\tneg rax" << std::endl;
asmCode << "\tjmp short result_idiv_" << i << std::endl;
asmCode << "safe_idiv_" << i << ":" << std::endl;
asmCode << "\tmov ecx, 1" << std::endl;
asmCode << "\ttest edx, edx" << std::endl;
asmCode << "\tcmovne ecx, edx" << std::endl;
asmCode << "\tmovsxd rcx, ecx" << std::endl;
asmCode << "\tcqo" << std::endl;
asmCode << "\tidiv rcx" << std::endl;
asmCode << "result_idiv_" << i << ":" << std::endl;
#ifdef MAGIC_DIVISION
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
#ifndef MAGIC_DIVISION
}
#endif
asmCode << "\tcmp edx, -1" << std::endl;
asmCode << "\tjne short body_idiv_" << i << std::endl;
asmCode << "\tneg rax" << std::endl;
asmCode << "\tjmp short result_idiv_" << i << std::endl;
asmCode << "body_idiv_" << i << ":" << std::endl;
asmCode << "\tmov ecx, 1" << std::endl;
asmCode << "\ttest edx, edx" << std::endl;
asmCode << "\tcmovne ecx, edx" << std::endl;
asmCode << "\tmovsxd rcx, ecx" << std::endl;
asmCode << "\tcqo" << std::endl;
asmCode << "\tidiv rcx" << std::endl;
asmCode << "result_idiv_" << i << ":" << std::endl;
#ifdef MAGIC_DIVISION
}
#endif
gencr(instr);
}
@ -448,72 +406,72 @@ namespace RandomX {
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tand rax, ";
genbr1(instr);
genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tand eax, ";
genbr132(instr);
genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tor rax, ";
genbr1(instr);
genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\tor eax, ";
genbr132(instr);
genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\txor rax, ";
genbr1(instr);
genbia(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
genar(instr, i);
asmCode << "\txor eax, ";
genbr132(instr);
genbia32(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
genar(instr, i);
genbr0(instr, "shl");
genbiashift(instr, "shl");
gencr(instr);
}
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
genar(instr, i);
genbr0(instr, "shr");
genbiashift(instr, "shr");
gencr(instr);
}
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
genar(instr, i);
genbr0(instr, "sar");
genbiashift(instr, "sar");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
genar(instr, i);
genbr0(instr, "rol");
genbiashift(instr, "rol");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
genar(instr, i);
genbr0(instr, "ror");
genbiashift(instr, "ror");
gencr(instr);
}

View file

@ -38,11 +38,12 @@ namespace RandomX {
static InstructionGenerator engine[256];
std::stringstream asmCode;
void gena(Instruction&, int);
void genar(Instruction&, int);
void genaf(Instruction&, int);
void genbr0(Instruction&, const char*);
void genbr1(Instruction&);
void genbr132(Instruction&);
void genbiashift(Instruction&, const char*);
void genbia(Instruction&);
void genbia32(Instruction&);
void genbf(Instruction&, const char*);
void gencr(Instruction&, bool);
void gencf(Instruction&, bool);

View file

@ -17,10 +17,14 @@ You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
//#define MAGIC_DIVISION
#include "JitCompilerX86.hpp"
#include "Pcg32.hpp"
#include <cstring>
#include <stdexcept>
#ifdef MAGIC_DIVISION
#include "divideByConstantCodegen.h"
#endif
#ifdef _WIN32
#include <windows.h>
@ -152,6 +156,17 @@ namespace RandomX {
instructionOffsets.push_back(codePos);
emit(0x840fcbff); //dec ebx; jz <epilogue>
emit(epilogueOffset - (codePos + 4)); //jump offset (RIP-relative)
auto generator = engine[instr.opcode];
(this->*generator)(instr, i);
}
void JitCompilerX86::fixCallOffsets() {
for (CallOffset& co : callOffsets) {
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
}
}
void JitCompilerX86::gena(Instruction& instr) {
emit(uint16_t(0x8149)); //xor
emitByte(0xf0 + (instr.rega % RegistersCount));
emit(instr.addra);
@ -169,41 +184,28 @@ namespace RandomX {
emit(uint16_t(0x3348));
emitByte(0xe9); //xor rbp, rcx
}
auto generator = engine[instr.opcode];
(this->*generator)(instr, i);
}
void JitCompilerX86::fixCallOffsets() {
for (CallOffset& co : callOffsets) {
*reinterpret_cast<int32_t*>(code + co.pos) = instructionOffsets[co.index] - (co.pos + 4);
emit(uint16_t(0xe181)); //and ecx,
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
}
else {
emit(ScratchpadL2 - 1); //whole scratchpad
}
}
void JitCompilerX86::genar(Instruction& instr) {
emit(uint16_t(0xe181)); //and ecx,
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
}
else {
emit(ScratchpadL2 - 1); //whole scratchpad
}
gena(instr);
emit(0xce048b48); //mov rax,QWORD PTR [rsi+rcx*8]
}
void JitCompilerX86::genaf(Instruction& instr) {
emit(uint16_t(0xe181)); //and ecx,
if (instr.loca & 3) {
emit(ScratchpadL1 - 1); //first 16 KiB of scratchpad
}
else {
emit(ScratchpadL2 - 1); //whole scratchpad
}
gena(instr);
emitByte(0xf3);
emit(0xce04e60f); //cvtdq2pd xmm0,QWORD PTR [rsi+rcx*8]
}
void JitCompilerX86::genbr0(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
if ((instr.locb & 7) <= 3) {
void JitCompilerX86::genbiashift(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
if (instr.locb & 1) {
emit(uint16_t(0x8b49)); //mov
emitByte(0xc8 + (instr.regb % RegistersCount)); //rcx, regb
emitByte(0x48); //REX.W
@ -216,8 +218,8 @@ namespace RandomX {
}
}
void JitCompilerX86::genbr1(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
if ((instr.locb & 7) <= 5) {
void JitCompilerX86::genbia(Instruction& instr, uint16_t opcodeReg, uint16_t opcodeImm) {
if (instr.locb & 3) {
emit(opcodeReg); // xxx rax, r64
emitByte(0xc0 + (instr.regb % RegistersCount));
}
@ -227,8 +229,8 @@ namespace RandomX {
}
}
void JitCompilerX86::genbr132(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
if ((instr.locb & 7) <= 5) {
void JitCompilerX86::genbia32(Instruction& instr, uint16_t opcodeReg, uint8_t opcodeImm) {
if (instr.locb & 3) {
emit(opcodeReg); // xxx eax, r32
emitByte(0xc0 + (instr.regb % RegistersCount));
}
@ -328,25 +330,25 @@ namespace RandomX {
void JitCompilerX86::h_ADD_64(Instruction& instr, int i) {
genar(instr);
genbr1(instr, 0x0349, 0x0548);
genbia(instr, 0x0349, 0x0548);
gencr(instr);
}
void JitCompilerX86::h_ADD_32(Instruction& instr, int i) {
genar(instr);
genbr132(instr, 0x0341, 0x05);
genbia32(instr, 0x0341, 0x05);
gencr(instr);
}
void JitCompilerX86::h_SUB_64(Instruction& instr, int i) {
genar(instr);
genbr1(instr, 0x2b49, 0x2d48);
genbia(instr, 0x2b49, 0x2d48);
gencr(instr);
}
void JitCompilerX86::h_SUB_32(Instruction& instr, int i) {
genar(instr);
genbr132(instr, 0x2b41, 0x2d);
genbia32(instr, 0x2b41, 0x2d);
gencr(instr);
}
@ -435,104 +437,209 @@ namespace RandomX {
void JitCompilerX86::h_DIV_64(Instruction& instr, int i) {
genar(instr);
if ((instr.locb & 7) <= 5) {
if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
if (instr.imm32 != 0) {
uint32_t divisor = instr.imm32;
if (divisor & (divisor - 1)) {
magicu_info mi = compute_unsigned_magic_info(divisor, sizeof(uint64_t) * 8);
if (mi.pre_shift > 0) {
if (mi.pre_shift == 1) {
emitByte(0x48);
emit(uint16_t(0xe8d1)); //shr rax,1
}
else {
emit(0x00e8c148 | (mi.pre_shift << 24)); //shr rax, pre_shift
}
}
if (mi.increment) {
emit(0x00d8834801c08348); //add rax,1; sbb rax,0
}
emit(uint16_t(0xb948)); //movabs rcx, multiplier
emit(mi.multiplier);
emit(0x48e1f748); //mul rcx; REX
emit(uint16_t(0xc28b)); //mov rax,rdx
if (mi.post_shift > 0)
emit(0x00e8c148 | (mi.post_shift << 24)); //shr rax, post_shift
}
else { //divisor is a power of two
int shift = 0;
while (divisor >>= 1)
++shift;
if (shift > 0)
emit(0x00e8c148 | (shift << 24)); //shr rax, shift
}
}
#else
emitByte(0xb9); //mov ecx, imm32
emit(instr.imm32 != 0 ? instr.imm32 : 1);
#endif
}
else {
emitByte(0xb9); //mov ecx, 1
emit(1);
emit(uint16_t(0x8b41)); //mov edx, r32
emitByte(0xd0 + (instr.regb % RegistersCount));
emit(0x450fd285); //test edx, edx; cmovne ecx,edx
emitByte(0xca);
#ifdef MAGIC_DIVISION
emit(0xf748d233); //xor edx,edx; div rcx
emitByte(0xf1);
#endif
}
else {
emitByte(0xb9); //mov ecx, imm32
emit(instr.imm32 != 0 ? instr.imm32 : 1);
}
#ifndef MAGIC_DIVISION
emit(0xf748d233); //xor edx,edx; div rcx
emitByte(0xf1);
#endif
gencr(instr);
}
void JitCompilerX86::h_IDIV_64(Instruction& instr, int i) {
genar(instr);
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8b41)); //mov edx, r32
emitByte(0xd0 + (instr.regb % RegistersCount));
if (instr.locb & 3) {
#ifdef MAGIC_DIVISION
int64_t divisor = instr.imm32;
if ((divisor & -divisor) == divisor || (divisor & -divisor) == -divisor) {
// +/- power of two
bool negative = divisor < 0;
if (negative)
divisor = -divisor;
int shift = 0;
uint64_t unsignedDivisor = divisor;
while (unsignedDivisor >>= 1)
++shift;
if (shift > 0) {
emitByte(0x48);
emit(uint16_t(0xc88b)); //mov rcx, rax
emit(0x3ff9c148); //sar rcx, 63
uint32_t mask = (1ULL << shift) - 1;
emit(uint16_t(0xe181)); //and ecx, mask
emit(mask);
emitByte(0x48);
emit(uint16_t(0xc103)); //add rax, rcx
emit(0x00f8c148 | (shift << 24)); //sar rax, shift
}
if (negative) {
emitByte(0x48);
emit(uint16_t(0xd8f7)); //neg rax
}
}
else if (divisor != 0) {
magics_info mi = compute_signed_magic_info(divisor);
if ((divisor >= 0) != (mi.multiplier >= 0)) {
emitByte(0x48);
emit(uint16_t(0xc88b)); //mov rcx, rax
}
emit(uint16_t(0xba48)); //movabs rdx, multiplier
emit(mi.multiplier);
emit(0xd233c28b48eaf748); //imul rdx; mov rax,rdx; xor edx,edx
bool haveSF = false;
if (divisor > 0 && mi.multiplier < 0) {
emitByte(0x48);
emit(uint16_t(0xc103)); //add rax, rcx
haveSF = true;
}
if (divisor < 0 && mi.multiplier > 0) {
emitByte(0x48);
emit(uint16_t(0xc12b)); //sub rax, rcx
haveSF = true;
}
if (mi.shift > 0) {
emit(0x00f8c148 | (mi.shift << 24)); //sar rax, shift
haveSF = true;
}
if (!haveSF) {
emitByte(0x48);
emit(uint16_t(0x85c0));
}
emit(0x48c2980f); //sets dl; add rax, rdx
emit(uint16_t(0xc203));
}
#else
emitByte(0xba); // mov edx, imm32
emit(instr.imm32);
#endif
}
else {
emitByte(0xba); // xxx edx, imm32
emit(instr.imm32);
emit(uint16_t(0x8b41)); //mov edx, r32
emitByte(0xd0 + (instr.regb % RegistersCount));
#ifndef MAGIC_DIVISION
}
#endif
emit(0xc88b480b75fffa83);
emit(0x1274c9ff48c1d148);
emit(0x0fd28500000001b9);
emit(0x489948c96348ca45);
emit(uint16_t(0xf9f7)); //idiv rcx
#ifdef MAGIC_DIVISION
}
#endif
gencr(instr);
}
void JitCompilerX86::h_AND_64(Instruction& instr, int i) {
genar(instr);
genbr1(instr, 0x2349, 0x2548);
genbia(instr, 0x2349, 0x2548);
gencr(instr);
}
void JitCompilerX86::h_AND_32(Instruction& instr, int i) {
genar(instr);
genbr132(instr, 0x2341, 0x25);
genbia32(instr, 0x2341, 0x25);
gencr(instr);
}
void JitCompilerX86::h_OR_64(Instruction& instr, int i) {
genar(instr);
genbr1(instr, 0x0b49, 0x0d48);
genbia(instr, 0x0b49, 0x0d48);
gencr(instr);
}
void JitCompilerX86::h_OR_32(Instruction& instr, int i) {
genar(instr);
genbr132(instr, 0x0b41, 0x0d);
genbia32(instr, 0x0b41, 0x0d);
gencr(instr);
}
void JitCompilerX86::h_XOR_64(Instruction& instr, int i) {
genar(instr);
genbr1(instr, 0x3349, 0x3548);
genbia(instr, 0x3349, 0x3548);
gencr(instr);
}
void JitCompilerX86::h_XOR_32(Instruction& instr, int i) {
genar(instr);
genbr132(instr, 0x3341, 0x35);
genbia32(instr, 0x3341, 0x35);
gencr(instr);
}
void JitCompilerX86::h_SHL_64(Instruction& instr, int i) {
genar(instr);
genbr0(instr, 0xe0d3, 0xe0c1);
genbiashift(instr, 0xe0d3, 0xe0c1);
gencr(instr);
}
void JitCompilerX86::h_SHR_64(Instruction& instr, int i) {
genar(instr);
genbr0(instr, 0xe8d3, 0xe8c1);
genbiashift(instr, 0xe8d3, 0xe8c1);
gencr(instr);
}
void JitCompilerX86::h_SAR_64(Instruction& instr, int i) {
genar(instr);
genbr0(instr, 0xf8d3, 0xf8c1);
genbiashift(instr, 0xf8d3, 0xf8c1);
gencr(instr);
}
void JitCompilerX86::h_ROL_64(Instruction& instr, int i) {
genar(instr);
genbr0(instr, 0xc0d3, 0xc0c1);
genbiashift(instr, 0xc0d3, 0xc0c1);
gencr(instr);
}
void JitCompilerX86::h_ROR_64(Instruction& instr, int i) {
genar(instr);
genbr0(instr, 0xc8d3, 0xc8c1);
genbiashift(instr, 0xc8d3, 0xc8c1);
gencr(instr);
}

View file

@ -58,11 +58,12 @@ namespace RandomX {
std::vector<int32_t> instructionOffsets;
std::vector<CallOffset> callOffsets;
void gena(Instruction&);
void genar(Instruction&);
void genaf(Instruction&);
void genbr0(Instruction&, uint16_t, uint16_t);
void genbr1(Instruction&, uint16_t, uint16_t);
void genbr132(Instruction&, uint16_t, uint8_t);
void genbiashift(Instruction&, uint16_t, uint16_t);
void genbia(Instruction&, uint16_t, uint16_t);
void genbia32(Instruction&, uint16_t, uint8_t);
void genbf(Instruction&, uint8_t);
void scratchpadStoreR(Instruction&, uint32_t, bool);
void scratchpadStoreF(Instruction&, int, uint32_t, bool);

View file

@ -11,10 +11,10 @@
#include "divideByConstantCodegen.h"
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits) {
//The numerator must fit in a uint
assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);
//The numerator must fit in a unsigned_type
assert(num_bits > 0 && num_bits <= sizeof(unsigned_type) * CHAR_BIT);
// D must be larger than zero and not a power of 2
assert(D & (D - 1));
@ -22,29 +22,29 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
// The eventual result
struct magicu_info result;
// Bits in a uint
const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;
// Bits in a unsigned_type
const unsigned UINT_BITS = sizeof(unsigned_type) * CHAR_BIT;
// The extra shift implicit in the difference between UINT_BITS and num_bits
const unsigned extra_shift = UINT_BITS - num_bits;
// The initial power of 2 is one less than the first one that can possibly work
const uint initial_power_of_2 = (uint)1 << (UINT_BITS - 1);
const unsigned_type initial_power_of_2 = (unsigned_type)1 << (UINT_BITS - 1);
// The remainder and quotient of our power of 2 divided by d
uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
unsigned_type quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
// ceil(log_2 D)
unsigned ceil_log_2_D;
// The magic info for the variant "round down" algorithm
uint down_multiplier = 0;
unsigned_type down_multiplier = 0;
unsigned down_exponent = 0;
int has_magic_down = 0;
// Compute ceil(log_2 D)
ceil_log_2_D = 0;
uint tmp;
unsigned_type tmp;
for (tmp = D; tmp > 0; tmp >>= 1)
ceil_log_2_D += 1;
@ -67,11 +67,11 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
// We're done if this exponent works for the round_up algorithm.
// Note that exponent may be larger than the maximum shift supported,
// so the check for >= ceil_log_2_D is critical.
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((unsigned_type)1 << (exponent + extra_shift)))
break;
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
if (!has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
if (!has_magic_down && remainder <= ((unsigned_type)1 << (exponent + extra_shift))) {
has_magic_down = 1;
down_multiplier = quotient;
down_exponent = exponent;
@ -96,7 +96,7 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
else {
// Even divisor, so use a prefix-shifted dividend
unsigned pre_shift = 0;
uint shifted_D = D;
unsigned_type shifted_D = D;
while ((shifted_D & 1) == 0) {
shifted_D >>= 1;
pre_shift += 1;
@ -108,34 +108,34 @@ struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
return result;
}
struct magics_info compute_signed_magic_info(sint D) {
struct magics_info compute_signed_magic_info(signed_type D) {
// D must not be zero and must not be a power of 2 (or its negative)
assert(D != 0 && (D & -D) != D && (D & -D) != -D);
// Our result
struct magics_info result;
// Bits in an sint
const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;
// Bits in an signed_type
const unsigned SINT_BITS = sizeof(signed_type) * CHAR_BIT;
// Absolute value of D (we know D is not the most negative value since that's a power of 2)
const uint abs_d = (D < 0 ? -D : D);
const unsigned_type abs_d = (D < 0 ? -D : D);
// The initial power of 2 is one less than the first one that can possibly work
// "two31" in Warren
unsigned exponent = SINT_BITS - 1;
const uint initial_power_of_2 = (uint)1 << exponent;
const unsigned_type initial_power_of_2 = (unsigned_type)1 << exponent;
// Compute the absolute value of our "test numerator,"
// which is the largest dividend whose remainder with d is d-1.
// This is called anc in Warren.
const uint tmp = initial_power_of_2 + (D < 0);
const uint abs_test_numer = tmp - 1 - tmp % abs_d;
const unsigned_type tmp = initial_power_of_2 + (D < 0);
const unsigned_type abs_test_numer = tmp - 1 - tmp % abs_d;
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
uint delta;
unsigned_type quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
unsigned_type quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
unsigned_type delta;
// Begin our loop
do {

View file

@ -24,11 +24,11 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
extern "C" {
#endif
typedef uint64_t uint;
typedef int64_t sint;
typedef uint64_t unsigned_type;
typedef int64_t signed_type;
/* Computes "magic info" for performing signed division by a fixed integer D.
The type 'sint' is assumed to be defined as a signed integer type large enough
The type 'signed_type' is assumed to be defined as a signed integer type large enough
to hold both the dividend and the divisor.
Here >> is arithmetic (signed) shift, and >>> is logical shift.
@ -55,17 +55,17 @@ extern "C" {
*/
struct magics_info {
sint multiplier; // the "magic number" multiplier
signed_type multiplier; // the "magic number" multiplier
unsigned shift; // shift for the dividend after multiplying
};
struct magics_info compute_signed_magic_info(sint D);
struct magics_info compute_signed_magic_info(signed_type D);
/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
The type 'uint' is assumed to be defined as an unsigned integer type large enough
The type 'unsigned_type' is assumed to be defined as an unsigned integer type large enough
to hold both the dividend and the divisor. num_bits can be set appropriately if n is
known to be smaller than the largest uint; if this is not known then pass
(sizeof(uint) * CHAR_BIT) for num_bits.
known to be smaller than the largest unsigned_type; if this is not known then pass
(sizeof(unsigned_type) * CHAR_BIT) for num_bits.
Assume we have a hardware register of width UINT_BITS, a known constant D which is
not zero and not a power of 2, and a variable n of width num_bits (which may be
@ -105,12 +105,12 @@ extern "C" {
*/
struct magicu_info {
uint multiplier; // the "magic number" multiplier
unsigned_type multiplier; // the "magic number" multiplier
unsigned pre_shift; // shift for the dividend before multiplying
unsigned post_shift; //shift for the dividend after multiplying
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
};
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);
struct magicu_info compute_unsigned_magic_info(unsigned_type D, unsigned num_bits);
#if defined(__cplusplus)
}

View file

@ -19,17 +19,17 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once
#define WT_ADD_64 15
#define WT_ADD_64 12
#define WT_ADD_32 2
#define WT_SUB_64 15
#define WT_SUB_64 12
#define WT_SUB_32 2
#define WT_MUL_64 23
#define WT_MULH_64 10
#define WT_MUL_32 15
#define WT_IMUL_32 15
#define WT_IMULH_64 6
#define WT_DIV_64 1
#define WT_IDIV_64 1
#define WT_DIV_64 4
#define WT_IDIV_64 4
#define WT_AND_64 4
#define WT_AND_32 2
#define WT_OR_64 4

File diff suppressed because it is too large Load diff