mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Reworked "FNEG" instruction to make ASIC optimizations more difficult
This commit is contained in:
parent
376c868ca0
commit
f76e8c2e20
11 changed files with 29 additions and 24 deletions
|
@ -48,11 +48,16 @@ Memory operands are loaded as 8-byte values from the address indicated by `src`.
|
||||||
|5/256|FADD_M|F|mem|`(dst0, dst1) = (dst0 + [src][0], dst1 + [src][1])`|
|
|5/256|FADD_M|F|mem|`(dst0, dst1) = (dst0 + [src][0], dst1 + [src][1])`|
|
||||||
|20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`|
|
|20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`|
|
||||||
|5/256|FSUB_M|F|mem|`(dst0, dst1) = (dst0 - [src][0], dst1 - [src][1])`|
|
|5/256|FSUB_M|F|mem|`(dst0, dst1) = (dst0 - [src][0], dst1 - [src][1])`|
|
||||||
|6/256|FNEG_R|F|-|`(dst0, dst1) = (-dst0, -dst1)`|
|
|6/256|FSCAL_R|F|-|<code>(dst0, dst1) = (-2<sup>x0</sup> * dst0, -2<sup>x1</sup> * dst1)</code>|
|
||||||
|20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`|
|
|20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`|
|
||||||
|4/256|FDIV_M|E|mem|`(dst0, dst1) = (dst0 / [src][0], dst1 / [src][1])`|
|
|4/256|FDIV_M|E|mem|`(dst0, dst1) = (dst0 / [src][0], dst1 / [src][1])`|
|
||||||
|6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`|
|
|6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`|
|
||||||
|
|
||||||
|
#### FSCAL_R
|
||||||
|
This instruction negates the number and multiplies it by <code>2<sup>x</sup></code>. `x` is calculated by taking the 5 least significant digits of the biased exponent and interpreting them as a binary number using the digit set `{-1, +1}` as opposed to the traditional `{0, 1}`. The possible values of `x` are all odd numbers from -31 to +31.
|
||||||
|
|
||||||
|
The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`.
|
||||||
|
|
||||||
#### Denormal and NaN values
|
#### Denormal and NaN values
|
||||||
Due to restrictions on the values of the floating point registers, no operation results in `NaN`.
|
Due to restrictions on the values of the floating point registers, no operation results in `NaN`.
|
||||||
`FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number.
|
`FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number.
|
||||||
|
|
|
@ -373,7 +373,7 @@ namespace RandomX {
|
||||||
}
|
}
|
||||||
|
|
||||||
//1 uOP
|
//1 uOP
|
||||||
void AssemblyGeneratorX86::h_FNEG_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl;
|
asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -522,7 +522,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(FADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FNEG_R)
|
INST_HANDLE(FSCAL_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_HANDLE(FMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
|
|
|
@ -70,7 +70,7 @@ namespace RandomX {
|
||||||
void h_FADD_M(Instruction&, int);
|
void h_FADD_M(Instruction&, int);
|
||||||
void h_FSUB_R(Instruction&, int);
|
void h_FSUB_R(Instruction&, int);
|
||||||
void h_FSUB_M(Instruction&, int);
|
void h_FSUB_M(Instruction&, int);
|
||||||
void h_FNEG_R(Instruction&, int);
|
void h_FSCAL_R(Instruction&, int);
|
||||||
void h_FMUL_R(Instruction&, int);
|
void h_FMUL_R(Instruction&, int);
|
||||||
void h_FMUL_M(Instruction&, int);
|
void h_FMUL_M(Instruction&, int);
|
||||||
void h_FDIV_R(Instruction&, int);
|
void h_FDIV_R(Instruction&, int);
|
||||||
|
|
|
@ -237,7 +237,7 @@ namespace RandomX {
|
||||||
os << std::endl;
|
os << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Instruction::h_FNEG_R(std::ostream& os) const {
|
void Instruction::h_FSCAL_R(std::ostream& os) const {
|
||||||
auto dstIndex = dst % 4;
|
auto dstIndex = dst % 4;
|
||||||
os << "f" << dstIndex << std::endl;
|
os << "f" << dstIndex << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -362,7 +362,7 @@ namespace RandomX {
|
||||||
INST_NAME(FADD_M)
|
INST_NAME(FADD_M)
|
||||||
INST_NAME(FSUB_R)
|
INST_NAME(FSUB_R)
|
||||||
INST_NAME(FSUB_M)
|
INST_NAME(FSUB_M)
|
||||||
INST_NAME(FNEG_R)
|
INST_NAME(FSCAL_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_NAME(FMUL_R)
|
INST_NAME(FMUL_R)
|
||||||
|
@ -413,7 +413,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(FADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FNEG_R)
|
INST_HANDLE(FSCAL_R)
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
INST_HANDLE(FMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
|
|
|
@ -54,7 +54,7 @@ namespace RandomX {
|
||||||
constexpr int FADD_M = 22;
|
constexpr int FADD_M = 22;
|
||||||
constexpr int FSUB_R = 23;
|
constexpr int FSUB_R = 23;
|
||||||
constexpr int FSUB_M = 24;
|
constexpr int FSUB_M = 24;
|
||||||
constexpr int FNEG_R = 25;
|
constexpr int FSCAL_R = 25;
|
||||||
constexpr int FMUL_R = 26;
|
constexpr int FMUL_R = 26;
|
||||||
constexpr int FMUL_M = 27;
|
constexpr int FMUL_M = 27;
|
||||||
constexpr int FDIV_R = 28;
|
constexpr int FDIV_R = 28;
|
||||||
|
@ -116,7 +116,7 @@ namespace RandomX {
|
||||||
void h_FADD_M(std::ostream&) const;
|
void h_FADD_M(std::ostream&) const;
|
||||||
void h_FSUB_R(std::ostream&) const;
|
void h_FSUB_R(std::ostream&) const;
|
||||||
void h_FSUB_M(std::ostream&) const;
|
void h_FSUB_M(std::ostream&) const;
|
||||||
void h_FNEG_R(std::ostream&) const;
|
void h_FSCAL_R(std::ostream&) const;
|
||||||
void h_FMUL_R(std::ostream&) const;
|
void h_FMUL_R(std::ostream&) const;
|
||||||
void h_FMUL_M(std::ostream&) const;
|
void h_FMUL_M(std::ostream&) const;
|
||||||
void h_FDIV_R(std::ostream&) const;
|
void h_FDIV_R(std::ostream&) const;
|
||||||
|
|
|
@ -203,8 +203,8 @@ namespace RandomX {
|
||||||
*ibc.fdst = _mm_sub_pd(*ibc.fdst, fsrc);
|
*ibc.fdst = _mm_sub_pd(*ibc.fdst, fsrc);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case InstructionType::FNEG_R: {
|
case InstructionType::FSCAL_R: {
|
||||||
const __m128d signMask = _mm_castsi128_pd(_mm_set1_epi64x(1ULL << 63));
|
const __m128d signMask = _mm_castsi128_pd(_mm_set1_epi64x(0x81F0000000000000));
|
||||||
*ibc.fdst = _mm_xor_pd(*ibc.fdst, signMask);
|
*ibc.fdst = _mm_xor_pd(*ibc.fdst, signMask);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
@ -657,10 +657,10 @@ namespace RandomX {
|
||||||
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
CASE_REP(FNEG_R) {
|
CASE_REP(FSCAL_R) {
|
||||||
auto dst = instr.dst % 4;
|
auto dst = instr.dst % 4;
|
||||||
ibc.fdst = &f[dst];
|
ibc.fdst = &f[dst];
|
||||||
ibc.type = InstructionType::FNEG_R;
|
ibc.type = InstructionType::FSCAL_R;
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
CASE_REP(FMUL_R) {
|
CASE_REP(FMUL_R) {
|
||||||
|
|
|
@ -605,7 +605,7 @@ namespace RandomX {
|
||||||
emitByte(0xc4 + 8 * instr.dst);
|
emitByte(0xc4 + 8 * instr.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitCompilerX86::h_FNEG_R(Instruction& instr) {
|
void JitCompilerX86::h_FSCAL_R(Instruction& instr) {
|
||||||
instr.dst %= 4;
|
instr.dst %= 4;
|
||||||
emit(REX_XORPS);
|
emit(REX_XORPS);
|
||||||
emitByte(0xc7 + 8 * instr.dst);
|
emitByte(0xc7 + 8 * instr.dst);
|
||||||
|
@ -761,7 +761,7 @@ namespace RandomX {
|
||||||
INST_HANDLE(FADD_M)
|
INST_HANDLE(FADD_M)
|
||||||
INST_HANDLE(FSUB_R)
|
INST_HANDLE(FSUB_R)
|
||||||
INST_HANDLE(FSUB_M)
|
INST_HANDLE(FSUB_M)
|
||||||
INST_HANDLE(FNEG_R)
|
INST_HANDLE(FSCAL_R)
|
||||||
INST_HANDLE(FMUL_R)
|
INST_HANDLE(FMUL_R)
|
||||||
INST_HANDLE(FMUL_M)
|
INST_HANDLE(FMUL_M)
|
||||||
INST_HANDLE(FDIV_R)
|
INST_HANDLE(FDIV_R)
|
||||||
|
|
|
@ -114,7 +114,7 @@ namespace RandomX {
|
||||||
void h_FADD_M(Instruction&);
|
void h_FADD_M(Instruction&);
|
||||||
void h_FSUB_R(Instruction&);
|
void h_FSUB_R(Instruction&);
|
||||||
void h_FSUB_M(Instruction&);
|
void h_FSUB_M(Instruction&);
|
||||||
void h_FNEG_R(Instruction&);
|
void h_FSCAL_R(Instruction&);
|
||||||
void h_FMUL_R(Instruction&);
|
void h_FMUL_R(Instruction&);
|
||||||
void h_FMUL_M(Instruction&);
|
void h_FMUL_M(Instruction&);
|
||||||
void h_FDIV_R(Instruction&);
|
void h_FDIV_R(Instruction&);
|
||||||
|
|
|
@ -3,4 +3,4 @@ minDbl:
|
||||||
absMask:
|
absMask:
|
||||||
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
|
||||||
signMask:
|
signMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128
|
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
|
@ -49,7 +49,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
|
||||||
#define WT_FADD_M 5
|
#define WT_FADD_M 5
|
||||||
#define WT_FSUB_R 20
|
#define WT_FSUB_R 20
|
||||||
#define WT_FSUB_M 5
|
#define WT_FSUB_M 5
|
||||||
#define WT_FNEG_R 6
|
#define WT_FSCAL_R 6
|
||||||
|
|
||||||
//Floating point group E
|
//Floating point group E
|
||||||
#define WT_FMUL_R 20
|
#define WT_FMUL_R 20
|
||||||
|
@ -74,7 +74,7 @@ WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \
|
||||||
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \
|
||||||
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \
|
||||||
WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \
|
WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \
|
||||||
WT_FNEG_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \
|
WT_FSCAL_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \
|
||||||
WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP;
|
||||||
|
|
||||||
static_assert(wtSum == 256,
|
static_assert(wtSum == 256,
|
||||||
|
|
|
@ -229,7 +229,7 @@
|
||||||
mov eax, r13d
|
mov eax, r13d
|
||||||
and eax, 16376
|
and eax, 16376
|
||||||
xor r8, qword ptr [rsi+rax]
|
xor r8, qword ptr [rsi+rax]
|
||||||
; FNEG_R f2
|
; FSCAL_R f2
|
||||||
xorps xmm2, xmm15
|
xorps xmm2, xmm15
|
||||||
; IDIV_C r5, 2577129788
|
; IDIV_C r5, 2577129788
|
||||||
mov rax, 15371395512010654233
|
mov rax, 15371395512010654233
|
||||||
|
@ -429,7 +429,7 @@
|
||||||
ror r10, cl
|
ror r10, cl
|
||||||
; ISUB_R r4, -1079131550
|
; ISUB_R r4, -1079131550
|
||||||
sub r12, -1079131550
|
sub r12, -1079131550
|
||||||
; FNEG_R f3
|
; FSCAL_R f3
|
||||||
xorps xmm3, xmm15
|
xorps xmm3, xmm15
|
||||||
; COND_R r4, ns(r5, -362284631)
|
; COND_R r4, ns(r5, -362284631)
|
||||||
xor ecx, ecx
|
xor ecx, ecx
|
||||||
|
@ -440,7 +440,7 @@
|
||||||
subpd xmm2, xmm8
|
subpd xmm2, xmm8
|
||||||
; IXOR_R r4, r5
|
; IXOR_R r4, r5
|
||||||
xor r12, r13
|
xor r12, r13
|
||||||
; FNEG_R f1
|
; FSCAL_R f1
|
||||||
xorps xmm1, xmm15
|
xorps xmm1, xmm15
|
||||||
; FADD_R f0, a0
|
; FADD_R f0, a0
|
||||||
addpd xmm0, xmm8
|
addpd xmm0, xmm8
|
||||||
|
@ -605,7 +605,7 @@
|
||||||
mov eax, r9d
|
mov eax, r9d
|
||||||
and eax, 262136
|
and eax, 262136
|
||||||
mov qword ptr [rsi+rax], r8
|
mov qword ptr [rsi+rax], r8
|
||||||
; FNEG_R f0
|
; FSCAL_R f0
|
||||||
xorps xmm0, xmm15
|
xorps xmm0, xmm15
|
||||||
; FMUL_R e0, a3
|
; FMUL_R e0, a3
|
||||||
mulpd xmm4, xmm11
|
mulpd xmm4, xmm11
|
||||||
|
@ -620,7 +620,7 @@
|
||||||
addpd xmm0, xmm8
|
addpd xmm0, xmm8
|
||||||
; FMUL_R e1, a2
|
; FMUL_R e1, a2
|
||||||
mulpd xmm5, xmm10
|
mulpd xmm5, xmm10
|
||||||
; FNEG_R f3
|
; FSCAL_R f3
|
||||||
xorps xmm3, xmm15
|
xorps xmm3, xmm15
|
||||||
; FADD_R f1, a1
|
; FADD_R f1, a1
|
||||||
addpd xmm1, xmm9
|
addpd xmm1, xmm9
|
||||||
|
|
Loading…
Reference in a new issue