mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
FPROUND - variable flag offset
This commit is contained in:
parent
e487092f07
commit
c02ee4291d
5 changed files with 20 additions and 8 deletions
|
@ -12,6 +12,7 @@ There are 31 unique instructions divided into 3 groups:
|
||||||
|
|
||||||
## Integer instructions
|
## Integer instructions
|
||||||
There are 22 integer instructions. They are divided into 3 classes (MATH, DIV, SHIFT) with different B operand selection rules.
|
There are 22 integer instructions. They are divided into 3 classes (MATH, DIV, SHIFT) with different B operand selection rules.
|
||||||
|
|
||||||
|# opcodes|instruction|class|signed|A width|B width|C|C width|
|
|# opcodes|instruction|class|signed|A width|B width|C|C width|
|
||||||
|-|-|-|-|-|-|-|-|
|
|-|-|-|-|-|-|-|-|
|
||||||
|12|ADD_64|MATH|no|64|64|`A + B`|64|
|
|12|ADD_64|MATH|no|64|64|`A + B`|64|
|
||||||
|
@ -55,7 +56,7 @@ The shift/rotate instructions use just the bottom 6 bits of the `B` operand (`im
|
||||||
There are 5 floating point instructions. All floating point instructions are vector instructions that operate on two packed double precision floating point values.
|
There are 5 floating point instructions. All floating point instructions are vector instructions that operate on two packed double precision floating point values.
|
||||||
|
|
||||||
|# opcodes|instruction|C|
|
|# opcodes|instruction|C|
|
||||||
|-|-|-|-|
|
|-|-|-|
|
||||||
|20|FPADD|`A + B`|
|
|20|FPADD|`A + B`|
|
||||||
|20|FPSUB|`A - B`|
|
|20|FPSUB|`A - B`|
|
||||||
|22|FPMUL|`A * B`|
|
|22|FPMUL|`A * B`|
|
||||||
|
|
|
@ -9,6 +9,7 @@ The encoding of each 128-bit instruction word is following:
|
||||||
There are 256 opcodes, which are distributed between 3 groups of instructions. There are 31 distinct operations (each operation can be encoded using multiple opcodes - for example opcodes `0x00` to `0x0d` correspond to integer addition).
|
There are 256 opcodes, which are distributed between 3 groups of instructions. There are 31 distinct operations (each operation can be encoded using multiple opcodes - for example opcodes `0x00` to `0x0d` correspond to integer addition).
|
||||||
|
|
||||||
**Table 1: Instruction groups**
|
**Table 1: Instruction groups**
|
||||||
|
|
||||||
|group|# operations|# opcodes||
|
|group|# operations|# opcodes||
|
||||||
|---------|-----------------|----|-|
|
|---------|-----------------|----|-|
|
||||||
|integer (IA)|22|144|56.3%|
|
|integer (IA)|22|144|56.3%|
|
||||||
|
@ -31,8 +32,8 @@ The `A.LOC.W` flag determines the address width when reading operand A from the
|
||||||
|
|
||||||
**Table 3: Operand A read address width**
|
**Table 3: Operand A read address width**
|
||||||
|
|
||||||
|`A.LOC.W`|address width (W)
|
|`A.LOC.W`|address width (W)|
|
||||||
|---------|-|-|
|
|---------|-|
|
||||||
|0|15 bits (256 KiB)|
|
|0|15 bits (256 KiB)|
|
||||||
|1-3|11 bits (16 KiB)|
|
|1-3|11 bits (16 KiB)|
|
||||||
|
|
||||||
|
@ -125,8 +126,8 @@ The `C.LOC.W` flag determines the address width when writing operand C to the sc
|
||||||
|
|
||||||
**Table 10: Operand C write address width**
|
**Table 10: Operand C write address width**
|
||||||
|
|
||||||
|`C.LOC.W`|address width (W)
|
|`C.LOC.W`|address width (W)|
|
||||||
|---------|-|-|
|
|---------|-|
|
||||||
|0|15 bits (256 KiB)|
|
|0|15 bits (256 KiB)|
|
||||||
|1-3|11 bits (16 KiB)|
|
|1-3|11 bits (16 KiB)|
|
||||||
|
|
||||||
|
|
|
@ -466,7 +466,9 @@ namespace RandomX {
|
||||||
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
|
||||||
genar(instr, i);
|
genar(instr, i);
|
||||||
asmCode << "\tmov rcx, rax" << std::endl;
|
asmCode << "\tmov rcx, rax" << std::endl;
|
||||||
asmCode << "\tshl eax, 13" << std::endl;
|
int rotate = (13 - (instr.imm8 & 63)) & 63;
|
||||||
|
if (rotate != 0)
|
||||||
|
asmCode << "\trol rax, " << rotate << std::endl;
|
||||||
asmCode << "\tand eax, 24576" << std::endl;
|
asmCode << "\tand eax, 24576" << std::endl;
|
||||||
asmCode << "\tor eax, 40896" << std::endl;
|
asmCode << "\tor eax, 40896" << std::endl;
|
||||||
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
|
||||||
|
|
|
@ -574,7 +574,15 @@ namespace RandomX {
|
||||||
|
|
||||||
void JitCompilerX86::h_FPROUND(Instruction& instr, int i) {
|
void JitCompilerX86::h_FPROUND(Instruction& instr, int i) {
|
||||||
genar(instr);
|
genar(instr);
|
||||||
emit(0x00250de0c1c88b48); //mov rcx,rax; shl eax,0xd
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc88b)); //mov rcx,rax
|
||||||
|
int rotate = (13 - (instr.imm8 & 63)) & 63;
|
||||||
|
if (rotate != 0) {
|
||||||
|
emitByte(0x48);
|
||||||
|
emit(uint16_t(0xc0c1)); //rol rax
|
||||||
|
emitByte(rotate);
|
||||||
|
}
|
||||||
|
emit(uint16_t(0x0025));
|
||||||
emit(0x00009fc00d000060); //and eax,0x6000; or eax,0x9fc0
|
emit(0x00009fc00d000060); //and eax,0x6000; or eax,0x9fc0
|
||||||
emit(0x2454ae0ff8244489); //ldmxcsr DWORD PTR [rsp-0x8]
|
emit(0x2454ae0ff8244489); //ldmxcsr DWORD PTR [rsp-0x8]
|
||||||
emitByte(0xf8);
|
emitByte(0xf8);
|
||||||
|
|
|
@ -8859,7 +8859,7 @@ rx_body_509:
|
||||||
and ecx, 2047
|
and ecx, 2047
|
||||||
mov rax, qword ptr [rsi+rcx*8]
|
mov rax, qword ptr [rsi+rcx*8]
|
||||||
mov rcx, rax
|
mov rcx, rax
|
||||||
shl eax, 13
|
rol rax, 34
|
||||||
and eax, 24576
|
and eax, 24576
|
||||||
or eax, 40896
|
or eax, 40896
|
||||||
mov dword ptr [rsp - 8], eax
|
mov dword ptr [rsp - 8], eax
|
||||||
|
|
Loading…
Reference in a new issue