mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
8 branch conditions for CALL/RET
This commit is contained in:
parent
55afe9646f
commit
740c40b218
11 changed files with 1396 additions and 1051 deletions
51
doc/isa.md
51
doc/isa.md
|
@ -14,9 +14,9 @@ There are 256 opcodes, which are distributed between various operations dependin
|
||||||
|
|
||||||
|operation|number of opcodes||
|
|operation|number of opcodes||
|
||||||
|---------|-----------------|----|
|
|---------|-----------------|----|
|
||||||
|ALU operations|146|57.0%|
|
|ALU operations|136|53.1%|
|
||||||
|FPU operations|78|30.5%|
|
|FPU operations|78|30.5%|
|
||||||
|Control flow |32|12.5%|
|
|Control flow |42|16.4%|
|
||||||
|
|
||||||
#### Operand A
|
#### Operand A
|
||||||
The first operand is read from memory. The location is determined by the `loc(a)` flag:
|
The first operand is read from memory. The location is determined by the `loc(a)` flag:
|
||||||
|
@ -90,15 +90,15 @@ A 32-bit address mask that is used to calculate the write address for the C oper
|
||||||
|
|
||||||
|weight|instruction|signed|A width|B width|C|C width|
|
|weight|instruction|signed|A width|B width|C|C width|
|
||||||
|-|-|-|-|-|-|-|
|
|-|-|-|-|-|-|-|
|
||||||
|16|ADD_64|no|64|64|A + B|64|
|
|10|ADD_64|no|64|64|A + B|64|
|
||||||
|4|ADD_32|no|32|32|A + B|32|
|
|2|ADD_32|no|32|32|A + B|32|
|
||||||
|16|SUB_64|no|64|64|A - B|64|
|
|10|SUB_64|no|64|64|A - B|64|
|
||||||
|4|SUB_32|no|32|32|A - B|32|
|
|2|SUB_32|no|32|32|A - B|32|
|
||||||
|15|MUL_64|no|64|64|A * B|64|
|
|21|MUL_64|no|64|64|A * B|64|
|
||||||
|11|MULH_64|no|64|64|A * B|64|
|
|10|MULH_64|no|64|64|A * B|64|
|
||||||
|11|MUL_32|no|32|32|A * B|64|
|
|15|MUL_32|no|32|32|A * B|64|
|
||||||
|11|IMUL_32|yes|32|32|A * B|64|
|
|15|IMUL_32|yes|32|32|A * B|64|
|
||||||
|11|IMULH_64|yes|64|64|A * B|64|
|
|10|IMULH_64|yes|64|64|A * B|64|
|
||||||
|1|DIV_64|no|64|32|A / B|32|
|
|1|DIV_64|no|64|32|A / B|32|
|
||||||
|1|IDIV_64|yes|64|32|A / B|32|
|
|1|IDIV_64|yes|64|32|A / B|32|
|
||||||
|4|AND_64|no|64|64|A & B|64|
|
|4|AND_64|no|64|64|A & B|64|
|
||||||
|
@ -110,8 +110,8 @@ A 32-bit address mask that is used to calculate the write address for the C oper
|
||||||
|3|SHL_64|no|64|6|A << B|64|
|
|3|SHL_64|no|64|6|A << B|64|
|
||||||
|3|SHR_64|no|64|6|A >> B|64|
|
|3|SHR_64|no|64|6|A >> B|64|
|
||||||
|3|SAR_64|yes|64|6|A >> B|64|
|
|3|SAR_64|yes|64|6|A >> B|64|
|
||||||
|9|ROL_64|no|64|6|A <<< B|64|
|
|6|ROL_64|no|64|6|A <<< B|64|
|
||||||
|9|ROR_64|no|64|6|A >>> B|64|
|
|6|ROR_64|no|64|6|A >>> B|64|
|
||||||
|
|
||||||
##### 32-bit operations
|
##### 32-bit operations
|
||||||
Instructions ADD_32, SUB_32, AND_32, OR_32, XOR_32 only use the low-order 32 bits of the input operands. The result of these operations is 32 bits long and bits 32-63 of C are zero.
|
Instructions ADD_32, SUB_32, AND_32, OR_32, XOR_32 only use the low-order 32 bits of the input operands. The result of these operations is 32 bits long and bits 32-63 of C are zero.
|
||||||
|
@ -162,15 +162,30 @@ The rounding modes are defined by the IEEE-754 standard.
|
||||||
|
|
||||||
*The two-bit flag value exactly corresponds to bits 13-14 of the x86 `MXCSR` register and bits 23 and 22 (reversed) of the ARM `FPSCR` register.*
|
*The two-bit flag value exactly corresponds to bits 13-14 of the x86 `MXCSR` register and bits 23 and 22 (reversed) of the ARM `FPSCR` register.*
|
||||||
|
|
||||||
### Control flow instructions
|
### Control instructions
|
||||||
The following 2 control flow instructions are supported:
|
The following 2 control instructions are supported:
|
||||||
|
|
||||||
|weight|instruction|function|
|
|weight|instruction|function|
|
||||||
|-|-|-|
|
|-|-|-|
|
||||||
|17|CALL|near procedure call|
|
|24|CALL|near procedure call|
|
||||||
|15|RET|return from procedure|
|
|18|RET|return from procedure|
|
||||||
|
|
||||||
Both instructions are conditional in 75% of cases. The jump is taken only if `B <= imm32`. For the 25% of cases when `B` is equal to `imm32`, the jump is unconditional. In case the branch is not taken, both instructions become "arithmetic no-op" `C = A`.
|
Both instructions are conditional. The condition function takes the lower 32 bits of integer register `reg(b)` and the value `imm32` and evaluates a condition based on the `loc(b)` flag:
|
||||||
|
|
||||||
|
|loc(b)[2:0]|signed|jump condition|probability|*x86*|*ARM*
|
||||||
|
|---|---|----------|-----|--|----|
|
||||||
|
|000|no|`reg(b)[31:0] <= imm32`|0% - 100%|`JBE`|`BLS`
|
||||||
|
|001|no|`reg(b)[31:0] > imm32`|0% - 100%|`JA`|`BHI`
|
||||||
|
|010|yes|`reg(b)[31:0] - imm32 < 0`|50%|`JS`|`BMI`
|
||||||
|
|011|yes|`reg(b)[31:0] - imm32 >= 0`|50%|`JNS`|`BPL`
|
||||||
|
|100|yes|`reg(b)[31:0] - imm32` overflows|0% - 50%|`JO`|`BVS`
|
||||||
|
|101|yes|`reg(b)[31:0] - imm32` doesn't overflow|50% - 100%|`JNO`|`BVC`
|
||||||
|
|110|yes|`reg(b)[31:0] < imm32`|0% - 100%|`JL`|`BLT`
|
||||||
|
|111|yes|`reg(b)[31:0] >= imm32`|0% - 100%|`JGE`|`BGE`
|
||||||
|
|
||||||
|
The 'signed' column specifies if the operands are interpreted as signed or unsigned 32-bit numbers. Column 'probability' lists the expected jump probability (range means that the actual value for a specific instruction depends on `imm32`). *Columns 'x86' and 'ARM' list the corresponding hardware instructions (following a `CMP` instruction).*
|
||||||
|
|
||||||
|
In case the branch is not taken, both CALL and RET become "arithmetic no-op" `C = A`.
|
||||||
|
|
||||||
##### CALL
|
##### CALL
|
||||||
Taken CALL instruction pushes the values `A` and `pc` (program counter) onto the stack and then performs a forward jump relative to the value of `pc`. The forward offset is equal to `16 * (imm8[6:0] + 1)`. Maximum jump distance is therefore 128 instructions forward (this means that at least 4 correctly spaced CALL instructions are needed to form a loop in the program).
|
Taken CALL instruction pushes the values `A` and `pc` (program counter) onto the stack and then performs a forward jump relative to the value of `pc`. The forward offset is equal to `16 * (imm8[6:0] + 1)`. Maximum jump distance is therefore 128 instructions forward (this means that at least 4 correctly spaced CALL instructions are needed to form a loop in the program).
|
||||||
|
|
8
makefile
8
makefile
|
@ -42,7 +42,7 @@ $(OBJDIR)/argon2_core.o: $(addprefix $(SRCDIR)/,argon2_core.c argon2_core.h blak
|
||||||
$(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h) | $(OBJDIR)
|
$(OBJDIR)/argon2_ref.o: $(addprefix $(SRCDIR)/,argon2_ref.c argon2.h argon2_core.h blake2/blake2.h blake2/blake2-impl.h blake2/blamka-round-ref.h) | $(OBJDIR)
|
||||||
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@
|
$(CC) $(CCFLAGS) -c $(SRCDIR)/argon2_ref.c -o $@
|
||||||
|
|
||||||
$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp Pcg32.hpp common.hpp instructions.hpp) | $(OBJDIR)
|
$(OBJDIR)/AssemblyGeneratorX86.o: $(addprefix $(SRCDIR)/,AssemblyGeneratorX86.cpp AssemblyGeneratorX86.hpp Instruction.hpp Pcg32.hpp common.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AssemblyGeneratorX86.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/AssemblyGeneratorX86.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h) | $(OBJDIR)
|
$(OBJDIR)/blake2b.o: $(addprefix $(SRCDIR)/blake2/,blake2b.c blake2.h blake2-impl.h) | $(OBJDIR)
|
||||||
|
@ -54,16 +54,16 @@ $(OBJDIR)/CompiledVirtualMachine.o: $(addprefix $(SRCDIR)/,CompiledVirtualMachin
|
||||||
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)
|
$(OBJDIR)/dataset.o: $(addprefix $(SRCDIR)/,dataset.cpp common.hpp Pcg32.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/dataset.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp) | $(OBJDIR)
|
$(OBJDIR)/JitCompilerX86.o: $(addprefix $(SRCDIR)/,JitCompilerX86.cpp JitCompilerX86.hpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/JitCompilerX86.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR)
|
$(OBJDIR)/instructionsPortable.o: $(addprefix $(SRCDIR)/,instructionsPortable.cpp instructions.hpp intrinPortable.h) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/instructionsPortable.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp) | $(OBJDIR)
|
$(OBJDIR)/Instruction.o: $(addprefix $(SRCDIR)/,Instruction.cpp Instruction.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Instruction.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/Instruction.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp) | $(OBJDIR)
|
$(OBJDIR)/InterpretedVirtualMachine.o: $(addprefix $(SRCDIR)/,InterpretedVirtualMachine.cpp InterpretedVirtualMachine.hpp Pcg32.hpp instructions.hpp instructionWeights.hpp) | $(OBJDIR)
|
||||||
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
|
$(CXX) $(CXXFLAGS) -c $(SRCDIR)/InterpretedVirtualMachine.cpp -o $@
|
||||||
|
|
||||||
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR)
|
$(OBJDIR)/main.o: $(addprefix $(SRCDIR)/,main.cpp InterpretedVirtualMachine.hpp Stopwatch.hpp blake2/blake2.h) | $(OBJDIR)
|
||||||
|
|
|
@ -307,7 +307,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
asmCode << "mov ecx, 1" << std::endl;
|
asmCode << "\tmov ecx, 1" << std::endl;
|
||||||
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
|
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
|
||||||
asmCode << "\ttest edx, edx" << std::endl;
|
asmCode << "\ttest edx, edx" << std::endl;
|
||||||
asmCode << "\tcmovne ecx, edx" << std::endl;
|
asmCode << "\tcmovne ecx, edx" << std::endl;
|
||||||
|
@ -458,15 +458,36 @@ namespace RandomX {
|
||||||
gencf(instr);
|
gencf(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
|
||||||
|
switch ((instr.locb & 7) ^ invert)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return "jbe";
|
||||||
|
case 1:
|
||||||
|
return "ja";
|
||||||
|
case 2:
|
||||||
|
return "js";
|
||||||
|
case 3:
|
||||||
|
return "jns";
|
||||||
|
case 4:
|
||||||
|
return "jo";
|
||||||
|
case 5:
|
||||||
|
return "jno";
|
||||||
|
case 6:
|
||||||
|
return "jl";
|
||||||
|
case 7:
|
||||||
|
return "jge";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
|
||||||
gena(instr);
|
gena(instr);
|
||||||
if ((instr.locb & 7) < 6) {
|
|
||||||
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
||||||
asmCode << "\tjbe short taken_call_" << i << std::endl;
|
asmCode << "\t" << jumpCondition(instr);
|
||||||
|
asmCode << " short taken_call_" << i << std::endl;
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
|
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
|
||||||
asmCode << "taken_call_" << i << ":" << std::endl;
|
asmCode << "taken_call_" << i << ":" << std::endl;
|
||||||
}
|
|
||||||
if (trace) {
|
if (trace) {
|
||||||
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
|
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -478,10 +499,9 @@ namespace RandomX {
|
||||||
gena(instr);
|
gena(instr);
|
||||||
asmCode << "\tcmp rsp, rbp" << std::endl;
|
asmCode << "\tcmp rsp, rbp" << std::endl;
|
||||||
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
asmCode << "\tje short not_taken_ret_" << i << std::endl;
|
||||||
if ((instr.locb & 7) < 6) {
|
|
||||||
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
|
||||||
asmCode << "\tja short not_taken_ret_" << i << std::endl;
|
asmCode << "\t" << jumpCondition(instr, true);
|
||||||
}
|
asmCode << " short not_taken_ret_" << i << std::endl;
|
||||||
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
asmCode << "\tret 8" << std::endl;
|
asmCode << "\tret 8" << std::endl;
|
||||||
|
|
|
@ -280,13 +280,10 @@ namespace RandomX {
|
||||||
|
|
||||||
void InterpretedVirtualMachine::h_CALL(Instruction& inst) {
|
void InterpretedVirtualMachine::h_CALL(Instruction& inst) {
|
||||||
convertible_t a = loada(inst);
|
convertible_t a = loada(inst);
|
||||||
convertible_t b = loadbr1(inst);
|
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
|
||||||
if (b.u32 <= (uint32_t)inst.imm32) {
|
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
if ((inst.locb & 7) <= 5)
|
|
||||||
count_CALL_taken++;
|
count_CALL_taken++;
|
||||||
else
|
count_jump_taken[inst.locb & 7]++;
|
||||||
count_CALL_uncond++;
|
|
||||||
#endif
|
#endif
|
||||||
stackPush(a);
|
stackPush(a);
|
||||||
stackPush(pc);
|
stackPush(pc);
|
||||||
|
@ -298,6 +295,7 @@ namespace RandomX {
|
||||||
convertible_t& c = getcr(inst);
|
convertible_t& c = getcr(inst);
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
count_CALL_not_taken++;
|
count_CALL_not_taken++;
|
||||||
|
count_jump_not_taken[inst.locb & 7]++;
|
||||||
#endif
|
#endif
|
||||||
c.u64 = a.u64;
|
c.u64 = a.u64;
|
||||||
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
|
||||||
|
@ -308,12 +306,10 @@ namespace RandomX {
|
||||||
convertible_t a = loada(inst);
|
convertible_t a = loada(inst);
|
||||||
convertible_t b = loadbr1(inst);
|
convertible_t b = loadbr1(inst);
|
||||||
convertible_t& c = getcr(inst);
|
convertible_t& c = getcr(inst);
|
||||||
if (stack.size() > 0 && b.u32 <= (uint32_t)inst.imm32) {
|
if (stack.size() > 0 && JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
if ((inst.locb & 7) <= 5)
|
|
||||||
count_RET_taken++;
|
count_RET_taken++;
|
||||||
else
|
count_jump_taken[inst.locb & 7]++;
|
||||||
count_RET_uncond++;
|
|
||||||
#endif
|
#endif
|
||||||
auto raddr = stackPopAddress();
|
auto raddr = stackPopAddress();
|
||||||
auto retval = stackPopValue();
|
auto retval = stackPopValue();
|
||||||
|
@ -324,8 +320,10 @@ namespace RandomX {
|
||||||
#ifdef STATS
|
#ifdef STATS
|
||||||
if (stack.size() == 0)
|
if (stack.size() == 0)
|
||||||
count_RET_stack_empty++;
|
count_RET_stack_empty++;
|
||||||
else
|
else {
|
||||||
count_RET_not_taken++;
|
count_RET_not_taken++;
|
||||||
|
count_jump_not_taken[inst.locb & 7]++;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
c.u64 = a.u64;
|
c.u64 = a.u64;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,13 +71,13 @@ namespace RandomX {
|
||||||
int count_FPDIV;
|
int count_FPDIV;
|
||||||
int count_FPSQRT;
|
int count_FPSQRT;
|
||||||
int count_FPROUND;
|
int count_FPROUND;
|
||||||
int count_CALL_uncond;
|
|
||||||
int count_CALL_taken;
|
int count_CALL_taken;
|
||||||
int count_CALL_not_taken;
|
int count_CALL_not_taken;
|
||||||
int count_RET_stack_empty;
|
int count_RET_stack_empty;
|
||||||
int count_RET_uncond;
|
|
||||||
int count_RET_taken;
|
int count_RET_taken;
|
||||||
int count_RET_not_taken;
|
int count_RET_not_taken;
|
||||||
|
int count_jump_taken[8] = { 0 };
|
||||||
|
int count_jump_not_taken[8] = { 0 };
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
convertible_t loada(Instruction&);
|
convertible_t loada(Instruction&);
|
||||||
|
|
|
@ -657,20 +657,41 @@ namespace RandomX {
|
||||||
gencf(instr);
|
gencf(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
|
||||||
|
switch ((instr.locb & 7) ^ invert)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return 0x76; //jbe
|
||||||
|
case 1:
|
||||||
|
return 0x77; //ja
|
||||||
|
case 2:
|
||||||
|
return 0x78; //js
|
||||||
|
case 3:
|
||||||
|
return 0x79; //jns
|
||||||
|
case 4:
|
||||||
|
return 0x70; //jo
|
||||||
|
case 5:
|
||||||
|
return 0x71; //jno
|
||||||
|
case 6:
|
||||||
|
return 0x7c; //jl
|
||||||
|
case 7:
|
||||||
|
return 0x7d; //jge
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_CALL(Instruction& instr, int i) {
|
void JitCompilerX86::h_CALL(Instruction& instr, int i) {
|
||||||
if ((instr.locb & 7) <= 5) {
|
|
||||||
emit(uint16_t(0x8141)); //cmp regb, imm32
|
emit(uint16_t(0x8141)); //cmp regb, imm32
|
||||||
emitByte(0xf8 + (instr.regb % RegistersCount));
|
emitByte(0xf8 + (instr.regb % RegistersCount));
|
||||||
emit(instr.imm32);
|
emit(instr.imm32);
|
||||||
|
emitByte(jumpCondition(instr));
|
||||||
if ((instr.locc & 7) <= 3) {
|
if ((instr.locc & 7) <= 3) {
|
||||||
emit(uint16_t(0x1676)); //jmp
|
emitByte(0x16);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
emit(uint16_t(0x0576)); //jmp
|
emitByte(0x05);
|
||||||
}
|
}
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
emit(uint16_t(0x06eb)); //jmp to next
|
emit(uint16_t(0x06eb)); //jmp to next
|
||||||
}
|
|
||||||
emitByte(0x50); //push rax
|
emitByte(0x50); //push rax
|
||||||
emitByte(0xe8); //call
|
emitByte(0xe8); //call
|
||||||
i = wrapInstr(i + (instr.imm8 & 127) + 2);
|
i = wrapInstr(i + (instr.imm8 & 127) + 2);
|
||||||
|
@ -685,22 +706,16 @@ namespace RandomX {
|
||||||
|
|
||||||
void JitCompilerX86::h_RET(Instruction& instr, int i) {
|
void JitCompilerX86::h_RET(Instruction& instr, int i) {
|
||||||
int crlen = 0;
|
int crlen = 0;
|
||||||
int blen = 0;
|
|
||||||
if ((instr.locc & 7) <= 3) {
|
if ((instr.locc & 7) <= 3) {
|
||||||
crlen = 17;
|
crlen = 17;
|
||||||
}
|
}
|
||||||
if ((instr.locb & 7) <= 5) {
|
|
||||||
blen = 9;
|
|
||||||
}
|
|
||||||
emit(0x74e53b48); //cmp rsp, rbp; je
|
emit(0x74e53b48); //cmp rsp, rbp; je
|
||||||
emitByte(11 + blen + crlen);
|
emitByte(20 + crlen);
|
||||||
if ((instr.locb & 7) <= 5) {
|
|
||||||
emit(uint16_t(0x8141)); //cmp regb, imm32
|
emit(uint16_t(0x8141)); //cmp regb, imm32
|
||||||
emitByte(0xf8 + (instr.regb % RegistersCount));
|
emitByte(0xf8 + (instr.regb % RegistersCount));
|
||||||
emit(instr.imm32);
|
emit(instr.imm32);
|
||||||
emitByte(0x77); //jmp
|
emitByte(jumpCondition(instr, true));
|
||||||
emitByte(11 + crlen);
|
emitByte(11 + crlen);
|
||||||
}
|
|
||||||
emitByte(0x48);
|
emitByte(0x48);
|
||||||
emit(0x08244433); //xor rax,QWORD PTR [rsp+0x8]
|
emit(0x08244433); //xor rax,QWORD PTR [rsp+0x8]
|
||||||
gencr(instr);
|
gencr(instr);
|
||||||
|
|
|
@ -19,15 +19,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define WT_ADD_64 16
|
#define WT_ADD_64 10
|
||||||
#define WT_ADD_32 4
|
#define WT_ADD_32 2
|
||||||
#define WT_SUB_64 16
|
#define WT_SUB_64 10
|
||||||
#define WT_SUB_32 4
|
#define WT_SUB_32 2
|
||||||
#define WT_MUL_64 15
|
#define WT_MUL_64 21
|
||||||
#define WT_MULH_64 11
|
#define WT_MULH_64 10
|
||||||
#define WT_MUL_32 11
|
#define WT_MUL_32 15
|
||||||
#define WT_IMUL_32 11
|
#define WT_IMUL_32 15
|
||||||
#define WT_IMULH_64 11
|
#define WT_IMULH_64 10
|
||||||
#define WT_DIV_64 1
|
#define WT_DIV_64 1
|
||||||
#define WT_IDIV_64 1
|
#define WT_IDIV_64 1
|
||||||
#define WT_AND_64 4
|
#define WT_AND_64 4
|
||||||
|
@ -39,16 +39,16 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#define WT_SHL_64 3
|
#define WT_SHL_64 3
|
||||||
#define WT_SHR_64 3
|
#define WT_SHR_64 3
|
||||||
#define WT_SAR_64 3
|
#define WT_SAR_64 3
|
||||||
#define WT_ROL_64 9
|
#define WT_ROL_64 6
|
||||||
#define WT_ROR_64 9
|
#define WT_ROR_64 6
|
||||||
#define WT_FPADD 20
|
#define WT_FPADD 20
|
||||||
#define WT_FPSUB 20
|
#define WT_FPSUB 20
|
||||||
#define WT_FPMUL 22
|
#define WT_FPMUL 22
|
||||||
#define WT_FPDIV 8
|
#define WT_FPDIV 8
|
||||||
#define WT_FPSQRT 6
|
#define WT_FPSQRT 6
|
||||||
#define WT_FPROUND 2
|
#define WT_FPROUND 2
|
||||||
#define WT_CALL 17
|
#define WT_CALL 24
|
||||||
#define WT_RET 15
|
#define WT_RET 18
|
||||||
|
|
||||||
constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \
|
constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \
|
||||||
WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \
|
WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \
|
||||||
|
|
|
@ -57,6 +57,7 @@ namespace RandomX {
|
||||||
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||||
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||||
|
bool JMP_COND(uint8_t, convertible_t&, int32_t);
|
||||||
void FPINIT();
|
void FPINIT();
|
||||||
void FPADD(convertible_t& a, double b, convertible_t& c);
|
void FPADD(convertible_t& a, double b, convertible_t& c);
|
||||||
void FPSUB(convertible_t& a, double b, convertible_t& c);
|
void FPSUB(convertible_t& a, double b, convertible_t& c);
|
||||||
|
|
|
@ -126,6 +126,34 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#define imulhi64 __imulhi64
|
#define imulhi64 __imulhi64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// avoid undefined behavior of signed overflow
|
||||||
|
static inline int32_t safeSub(int32_t a, int32_t b) {
|
||||||
|
return int32_t(uint32_t(a) - uint32_t(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __GNUC__ >= 5
|
||||||
|
#undef __has_builtin
|
||||||
|
#define __has_builtin(x) 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__has_builtin)
|
||||||
|
#if __has_builtin(__builtin_sub_overflow)
|
||||||
|
static inline bool __subOverflow(int32_t a, int32_t b) {
|
||||||
|
int32_t temp;
|
||||||
|
return __builtin_sub_overflow(a, b, &temp);
|
||||||
|
}
|
||||||
|
#define subOverflow __subOverflow
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef subOverflow
|
||||||
|
static inline bool __subOverflow(int32_t a, int32_t b) {
|
||||||
|
auto c = safeSub(a, b);
|
||||||
|
return (c < a) != (b > 0);
|
||||||
|
}
|
||||||
|
#define subOverflow __subOverflow
|
||||||
|
#endif
|
||||||
|
|
||||||
static double FlushDenormal(double x) {
|
static double FlushDenormal(double x) {
|
||||||
if (std::fpclassify(x) == FP_SUBNORMAL) {
|
if (std::fpclassify(x) == FP_SUBNORMAL) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -235,6 +263,28 @@ namespace RandomX {
|
||||||
c.u64 = ror64(a.u64, (b.u64 & 63));
|
c.u64 = ror64(a.u64, (b.u64 & 63));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool JMP_COND(uint8_t type, convertible_t& regb, int32_t imm32) {
|
||||||
|
switch (type & 7)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return regb.u32 <= (uint32_t)imm32;
|
||||||
|
case 1:
|
||||||
|
return regb.u32 > (uint32_t)imm32;
|
||||||
|
case 2:
|
||||||
|
return safeSub(regb.i32, imm32) < 0;
|
||||||
|
case 3:
|
||||||
|
return safeSub(regb.i32, imm32) >= 0;
|
||||||
|
case 4:
|
||||||
|
return subOverflow(regb.i32, imm32);
|
||||||
|
case 5:
|
||||||
|
return !subOverflow(regb.i32, imm32);
|
||||||
|
case 6:
|
||||||
|
return regb.i32 < imm32;
|
||||||
|
case 7:
|
||||||
|
return regb.i32 >= imm32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void FPINIT() {
|
void FPINIT() {
|
||||||
setRoundMode(FE_TONEAREST);
|
setRoundMode(FE_TONEAREST);
|
||||||
}
|
}
|
||||||
|
|
|
@ -248,8 +248,12 @@ int main(int argc, char** argv) {
|
||||||
std::cout << "Calculated result: ";
|
std::cout << "Calculated result: ";
|
||||||
result.print(std::cout);
|
result.print(std::cout);
|
||||||
if(programCount == 1000)
|
if(programCount == 1000)
|
||||||
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
|
std::cout << "Reference result: f6bf06465d5fa1b1dc919140b9e9f9e210b07ae6d662988458a172e9a267eb3f" << std::endl;
|
||||||
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
|
||||||
|
/*if (threadCount == 1 && !compiled) {
|
||||||
|
auto ivm = (RandomX::InterpretedVirtualMachine*)vms[0];
|
||||||
|
std::cout << ivm->getProgam();
|
||||||
|
}*/
|
||||||
}
|
}
|
||||||
catch (std::exception& e) {
|
catch (std::exception& e) {
|
||||||
std::cout << "ERROR: " << e.what() << std::endl;
|
std::cout << "ERROR: " << e.what() << std::endl;
|
||||||
|
|
2168
src/program.inc
2168
src/program.inc
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue