mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Group E exponent changed from a static value (-240) to dynamic
This commit is contained in:
parent
be21ba767c
commit
ca96270509
8 changed files with 56 additions and 46 deletions
|
@ -1,6 +1,6 @@
|
||||||
mantissaMask:
|
mantissaMask:
|
||||||
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
|
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
|
||||||
exp240:
|
exp240:
|
||||||
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
|
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||||
scaleMask:
|
scaleMask:
|
||||||
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
|
|
@ -35,9 +35,9 @@ namespace randomx {
|
||||||
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
||||||
|
|
||||||
static const char* tempRegx = "xmm12";
|
static const char* tempRegx = "xmm12";
|
||||||
static const char* mantissaMask = "xmm13";
|
static const char* mantissaMaskReg = "xmm13";
|
||||||
static const char* exponentMask = "xmm14";
|
static const char* exponentMaskReg = "xmm14";
|
||||||
static const char* scaleMask = "xmm15";
|
static const char* scaleMaskReg = "xmm15";
|
||||||
static const char* regIc = "rbx";
|
static const char* regIc = "rbx";
|
||||||
static const char* regIc32 = "ebx";
|
static const char* regIc32 = "ebx";
|
||||||
static const char* regIc8 = "bl";
|
static const char* regIc8 = "bl";
|
||||||
|
@ -328,7 +328,6 @@ namespace randomx {
|
||||||
traceint(instr);
|
traceint(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
//4 uOPs
|
|
||||||
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
|
||||||
registerUsage[instr.dst].lastUsed = i;
|
registerUsage[instr.dst].lastUsed = i;
|
||||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||||
|
@ -489,7 +488,7 @@ namespace randomx {
|
||||||
|
|
||||||
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
|
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||||
instr.dst %= RegisterCountFlt;
|
instr.dst %= RegisterCountFlt;
|
||||||
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMask << std::endl;
|
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl;
|
||||||
traceflt(instr);
|
traceflt(instr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -504,8 +503,8 @@ namespace randomx {
|
||||||
instr.dst %= RegisterCountFlt;
|
instr.dst %= RegisterCountFlt;
|
||||||
genAddressReg(instr);
|
genAddressReg(instr);
|
||||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||||
asmCode << "\tandps " << tempRegx << ", " << mantissaMask << std::endl;
|
asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl;
|
||||||
asmCode << "\torps " << tempRegx << ", " << exponentMask << std::endl;
|
asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl;
|
||||||
asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
|
asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
|
||||||
traceflt(instr);
|
traceflt(instr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,6 +122,16 @@ namespace randomx {
|
||||||
return minIndex;
|
return minIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr int mantissaSize = 52;
|
||||||
|
constexpr int exponentSize = 11;
|
||||||
|
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
||||||
|
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
||||||
|
constexpr int exponentBias = 1023;
|
||||||
|
constexpr int dynamicExponentBits = 4;
|
||||||
|
constexpr int staticExponentBits = 4;
|
||||||
|
constexpr uint64_t constExponentBits = 0x300;
|
||||||
|
constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1;
|
||||||
|
|
||||||
struct MemoryRegisters {
|
struct MemoryRegisters {
|
||||||
addr_t mx, ma;
|
addr_t mx, ma;
|
||||||
uint8_t* memory = nullptr;
|
uint8_t* memory = nullptr;
|
||||||
|
|
|
@ -312,12 +312,6 @@ inline __m128d load_cvt_i32x2(const void* addr) {
|
||||||
return _mm_cvtepi32_pd(ix);
|
return _mm_cvtepi32_pd(ix);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int E>
|
|
||||||
constexpr uint64_t ieee_get_exponent_mask() {
|
|
||||||
static_assert(E > -1023, "Invalid exponent value");
|
|
||||||
return (uint64_t)(E + 1023U) << 52;
|
|
||||||
}
|
|
||||||
|
|
||||||
double loadDoublePortable(const void* addr);
|
double loadDoublePortable(const void* addr);
|
||||||
uint64_t mulh(uint64_t, uint64_t);
|
uint64_t mulh(uint64_t, uint64_t);
|
||||||
int64_t smulh(int64_t, int64_t);
|
int64_t smulh(int64_t, int64_t);
|
||||||
|
|
|
@ -229,7 +229,7 @@ int main(int argc, char** argv) {
|
||||||
std::cout << "Calculated result: ";
|
std::cout << "Calculated result: ";
|
||||||
result.print(std::cout);
|
result.print(std::cout);
|
||||||
if (noncesCount == 1000 && seedValue == 0)
|
if (noncesCount == 1000 && seedValue == 0)
|
||||||
std::cout << "Reference result: 092868e4cee629a5b3848b97a52199d8a158e5b56ab9064764cda7ff656f3741" << std::endl;
|
std::cout << "Reference result: 6d95d8d07fa3a80771f33d1b20452b61ab2d0bf21058b5e586fad38bf3e1e0ca" << std::endl;
|
||||||
if (!miningMode) {
|
if (!miningMode) {
|
||||||
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
|
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,30 +35,40 @@ void randomx_vm::resetRoundingMode() {
|
||||||
initFpu();
|
initFpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr int mantissaSize = 52;
|
namespace randomx {
|
||||||
constexpr int exponentSize = 11;
|
|
||||||
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
||||||
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
auto exponent = entropy >> 59; //0..31
|
||||||
constexpr int exponentBias = 1023;
|
auto mantissa = entropy & mantissaMask;
|
||||||
|
exponent += exponentBias;
|
||||||
|
exponent &= exponentMask;
|
||||||
|
exponent <<= mantissaSize;
|
||||||
|
return exponent | mantissa;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t getStaticExponent(uint64_t entropy) {
|
||||||
|
auto exponent = constExponentBits;
|
||||||
|
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits;
|
||||||
|
exponent <<= mantissaSize;
|
||||||
|
return exponent;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint64_t getFloatMask(uint64_t entropy) {
|
||||||
|
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
|
||||||
|
return (entropy & mask22bit) | getStaticExponent(entropy);
|
||||||
|
}
|
||||||
|
|
||||||
static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
|
|
||||||
auto exponent = entropy >> 59; //0..31
|
|
||||||
auto mantissa = entropy & mantissaMask;
|
|
||||||
exponent += exponentBias;
|
|
||||||
exponent &= exponentMask;
|
|
||||||
exponent <<= mantissaSize;
|
|
||||||
return exponent | mantissa;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void randomx_vm::initialize() {
|
void randomx_vm::initialize() {
|
||||||
store64(®.a[0].lo, getSmallPositiveFloatBits(program.getEntropy(0)));
|
store64(®.a[0].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(0)));
|
||||||
store64(®.a[0].hi, getSmallPositiveFloatBits(program.getEntropy(1)));
|
store64(®.a[0].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(1)));
|
||||||
store64(®.a[1].lo, getSmallPositiveFloatBits(program.getEntropy(2)));
|
store64(®.a[1].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(2)));
|
||||||
store64(®.a[1].hi, getSmallPositiveFloatBits(program.getEntropy(3)));
|
store64(®.a[1].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(3)));
|
||||||
store64(®.a[2].lo, getSmallPositiveFloatBits(program.getEntropy(4)));
|
store64(®.a[2].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(4)));
|
||||||
store64(®.a[2].hi, getSmallPositiveFloatBits(program.getEntropy(5)));
|
store64(®.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5)));
|
||||||
store64(®.a[3].lo, getSmallPositiveFloatBits(program.getEntropy(6)));
|
store64(®.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6)));
|
||||||
store64(®.a[3].hi, getSmallPositiveFloatBits(program.getEntropy(7)));
|
store64(®.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7)));
|
||||||
mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask;
|
mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask;
|
||||||
mem.mx = program.getEntropy(10);
|
mem.mx = program.getEntropy(10);
|
||||||
auto addressRegisters = program.getEntropy(12);
|
auto addressRegisters = program.getEntropy(12);
|
||||||
|
@ -70,10 +80,8 @@ void randomx_vm::initialize() {
|
||||||
addressRegisters >>= 1;
|
addressRegisters >>= 1;
|
||||||
config.readReg3 = 6 + (addressRegisters & 1);
|
config.readReg3 = 6 + (addressRegisters & 1);
|
||||||
datasetOffset = (program.getEntropy(13) & randomx::DatasetExtraItems) * randomx::CacheLineSize;
|
datasetOffset = (program.getEntropy(13) & randomx::DatasetExtraItems) * randomx::CacheLineSize;
|
||||||
constexpr uint64_t mask22bit = (1ULL << 22) - 1;
|
store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14)));
|
||||||
constexpr uint64_t maskExp240 = ieee_get_exponent_mask<-240>();
|
store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15)));
|
||||||
store64(&config.eMask[0], (program.getEntropy(14) & mask22bit) | maskExp240);
|
|
||||||
store64(&config.eMask[1], (program.getEntropy(15) & mask22bit) | maskExp240);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace randomx {
|
namespace randomx {
|
||||||
|
|
|
@ -61,11 +61,10 @@ namespace randomx {
|
||||||
|
|
||||||
template<class Allocator, bool softAes>
|
template<class Allocator, bool softAes>
|
||||||
FORCE_INLINE __m128d InterpretedVm<Allocator, softAes>::maskRegisterExponentMantissa(__m128d x) {
|
FORCE_INLINE __m128d InterpretedVm<Allocator, softAes>::maskRegisterExponentMantissa(__m128d x) {
|
||||||
constexpr uint64_t mantissaMask64 = (1ULL << 52) - 1;
|
const __m128d xmantissaMask = _mm_castsi128_pd(_mm_set_epi64x(dynamicMantissaMask, dynamicMantissaMask));
|
||||||
const __m128d mantissaMask = _mm_castsi128_pd(_mm_set_epi64x(mantissaMask64, mantissaMask64));
|
const __m128d xexponentMask = _mm_load_pd((const double*)&config.eMask);
|
||||||
const __m128d exponentMask = _mm_load_pd((const double*)&config.eMask);
|
x = _mm_and_pd(x, xmantissaMask);
|
||||||
x = _mm_and_pd(x, mantissaMask);
|
x = _mm_or_pd(x, xexponentMask);
|
||||||
x = _mm_or_pd(x, exponentMask);
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<SDLCheck>true</SDLCheck>
|
<SDLCheck>false</SDLCheck>
|
||||||
<ConformanceMode>true</ConformanceMode>
|
<ConformanceMode>true</ConformanceMode>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
|
|
Loading…
Reference in a new issue