Big endian bug fixes

This commit is contained in:
tevador 2019-04-24 18:37:58 +02:00
parent 1c3666aa98
commit 018c1a5222
7 changed files with 47 additions and 43 deletions

View File

@ -41,11 +41,15 @@ static FORCE_INLINE uint32_t load32(const void *src) {
#endif
}
static FORCE_INLINE uint64_t load64(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
static FORCE_INLINE uint64_t load64_native(const void *src) {
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
}
static FORCE_INLINE uint64_t load64(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
return load64_native(src);
#else
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
@ -75,9 +79,13 @@ static FORCE_INLINE void store32(void *dst, uint32_t w) {
#endif
}
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
memcpy(dst, &w, sizeof w);
}
static FORCE_INLINE void store64(void *dst, uint64_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
store64_native(dst, w);
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;

View File

@ -192,7 +192,7 @@ namespace randomx {
executeSuperscalar(rl, prog, &cache->reciprocalCache);
for (unsigned q = 0; q < 8; ++q)
rl[q] ^= load64(mixBlock + 8 * q);
rl[q] ^= load64_native(mixBlock + 8 * q);
registerValue = rl[prog.getAddressRegister()];
}

View File

@ -295,7 +295,8 @@ inline __m128i _mm_slli_si128(__m128i _A, int _Imm) {
inline __m128i _mm_loadl_epi64(__m128i const* mem_addr) {
__m128i x;
x.u64[0] = load64(mem_addr);
x.u32[0] = load32((uint8_t*)mem_addr + 0);
x.u32[1] = load32((uint8_t*)mem_addr + 4);
return x;
}

View File

@ -573,14 +573,6 @@ namespace randomx {
constexpr int LOOK_FORWARD_CYCLES = 4;
constexpr int MAX_THROWAWAY_COUNT = 256;
#ifndef _DEBUG
constexpr bool TRACE = false;
constexpr bool INFO = false;
#else
constexpr bool TRACE = true;
constexpr bool INFO = true;
#endif
template<bool commit>
static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
@ -588,21 +580,21 @@ namespace randomx {
for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
if (commit) {
if (TRACE) std::cout << "; P5 at cycle " << cycle << std::endl;
if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
portBusy[cycle][2] = uop;
}
return cycle;
}
if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
if (commit) {
if (TRACE) std::cout << "; P0 at cycle " << cycle << std::endl;
if (trace) std::cout << "; P0 at cycle " << cycle << std::endl;
portBusy[cycle][0] = uop;
}
return cycle;
}
if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
if (commit) {
if (TRACE) std::cout << "; P1 at cycle " << cycle << std::endl;
if (trace) std::cout << "; P1 at cycle " << cycle << std::endl;
portBusy[cycle][1] = uop;
}
return cycle;
@ -621,7 +613,7 @@ namespace randomx {
//move instructions are eliminated and don't need an execution unit
if (mop.isEliminated()) {
if (commit)
if (TRACE) std::cout << "; (eliminated)" << std::endl;
if (trace) std::cout << "; (eliminated)" << std::endl;
return cycle;
}
else if (mop.isSimple()) {
@ -677,7 +669,7 @@ namespace randomx {
//select a decode configuration
decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
int bufferIndex = 0;
@ -692,15 +684,15 @@ namespace randomx {
//select an instruction so that the first macro-op fits into the current slot
currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
macroOpIndex = 0;
if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
}
const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
if (TRACE) std::cout << mop.getName() << " ";
if (trace) std::cout << mop.getName() << " ";
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
if (scheduleCycle < 0) {
if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
//__debugbreak();
portsSaturated = true;
break;
@ -711,7 +703,7 @@ namespace randomx {
int forward;
//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
if (TRACE) std::cout << "; src STALL at cycle " << cycle << std::endl;
if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl;
++scheduleCycle;
++cycle;
}
@ -720,22 +712,22 @@ namespace randomx {
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
throwAwayCount++;
macroOpIndex = currentInstruction.getInfo().getSize();
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
//cycle = topCycle;
continue;
}
//abort this decode buffer
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
currentInstruction = SuperscalarInstruction::Null;
break;
}
if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
}
//find a destination register that will be ready when this instruction executes
if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
int forward;
for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl;
if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl;
++scheduleCycle;
++cycle;
}
@ -743,16 +735,16 @@ namespace randomx {
if (throwAwayCount < MAX_THROWAWAY_COUNT) {
throwAwayCount++;
macroOpIndex = currentInstruction.getInfo().getSize();
if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
//cycle = topCycle;
continue;
}
//abort this decode buffer
if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
currentInstruction = SuperscalarInstruction::Null;
break;
}
if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
}
throwAwayCount = 0;
@ -773,7 +765,7 @@ namespace randomx {
ri.latency = retireCycle;
ri.lastOpGroup = currentInstruction.getGroup();
ri.lastOpPar = currentInstruction.getGroupPar();
if (TRACE) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
}
codeSize += mop.getSize();
bufferIndex++;

View File

@ -37,14 +37,6 @@ const uint8_t blockTemplate_[] = {
0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09
};
constexpr char hexmap[] = "0123456789abcdef";
void outputHex(std::ostream& os, const char* data, int length) {
for (int i = 0; i < length; ++i) {
os << hexmap[(data[i] & 0xF0) >> 4];
os << hexmap[data[i] & 0x0F];
}
}
class AtomicHash {
public:
AtomicHash() {
@ -101,7 +93,8 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit;
int noncesCount, threadCount, initThreadCount;
int32_t seed;
int32_t seedValue;
char seed[4];
readOption("--softAes", argc, argv, softAes);
readOption("--mine", argc, argv, miningMode);
@ -109,11 +102,13 @@ int main(int argc, char** argv) {
readIntOption("--threads", argc, argv, threadCount, 1);
readIntOption("--nonces", argc, argv, noncesCount, 1000);
readIntOption("--init", argc, argv, initThreadCount, 1);
readIntOption("--seed", argc, argv, seed, 0);
readIntOption("--seed", argc, argv, seedValue, 0);
readOption("--largePages", argc, argv, largePages);
readOption("--jit", argc, argv, jit);
readOption("--help", argc, argv, help);
store32(&seed, seedValue);
std::cout << "RandomX benchmark" << std::endl;
if (help || (!miningMode && !verificationMode)) {
@ -229,7 +224,7 @@ int main(int argc, char** argv) {
double elapsed = sw.getElapsed();
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seed == 0)
if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;

View File

@ -24,6 +24,14 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <iostream>
#include <fstream>
constexpr char hexmap[] = "0123456789abcdef";
inline void outputHex(std::ostream& os, const char* data, int length) {
for (int i = 0; i < length; ++i) {
os << hexmap[(data[i] & 0xF0) >> 4];
os << hexmap[data[i] & 0x0F];
}
}
inline void dump(const char* buffer, uint64_t count, const char* name) {
std::ofstream fout(name, std::ios::out | std::ios::binary);
fout.write(buffer, count);

View File

@ -114,7 +114,7 @@ namespace randomx {
template<class Allocator, bool softAes>
void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) {
auto& ibc = byteCode[ic];
if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic);
switch (ibc.type)
{
case InstructionType::IADD_RS: {
@ -270,7 +270,7 @@ namespace randomx {
default:
UNREACHABLE;
}
if (trace) {
if (trace && ibc.type != InstructionType::NOP) {
if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32)
print(*ibc.idst);
else //if(ibc.type >= 20 && ibc.type <= 30)