mirror of
				https://git.wownero.com/wownero/RandomWOW.git
				synced 2024-08-15 00:23:14 +00:00 
			
		
		
		
	Big endian bug fixes
This commit is contained in:
		
							parent
							
								
									1c3666aa98
								
							
						
					
					
						commit
						018c1a5222
					
				
					 7 changed files with 47 additions and 43 deletions
				
			
		|  | @ -41,11 +41,15 @@ static FORCE_INLINE uint32_t load32(const void *src) { | |||
| #endif | ||||
| } | ||||
| 
 | ||||
| static FORCE_INLINE uint64_t load64(const void *src) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| static FORCE_INLINE uint64_t load64_native(const void *src) { | ||||
| 	uint64_t w; | ||||
| 	memcpy(&w, src, sizeof w); | ||||
| 	return w; | ||||
| } | ||||
| 
 | ||||
| static FORCE_INLINE uint64_t load64(const void *src) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| 	return load64_native(src); | ||||
| #else | ||||
| 	const uint8_t *p = (const uint8_t *)src; | ||||
| 	uint64_t w = *p++; | ||||
|  | @ -75,9 +79,13 @@ static FORCE_INLINE void store32(void *dst, uint32_t w) { | |||
| #endif | ||||
| } | ||||
| 
 | ||||
| static FORCE_INLINE void store64_native(void *dst, uint64_t w) { | ||||
| 	memcpy(dst, &w, sizeof w); | ||||
| } | ||||
| 
 | ||||
| static FORCE_INLINE void store64(void *dst, uint64_t w) { | ||||
| #if defined(NATIVE_LITTLE_ENDIAN) | ||||
| 	memcpy(dst, &w, sizeof w); | ||||
| 	store64_native(dst, w); | ||||
| #else | ||||
| 	uint8_t *p = (uint8_t *)dst; | ||||
| 	*p++ = (uint8_t)w; | ||||
|  |  | |||
|  | @ -192,7 +192,7 @@ namespace randomx { | |||
| 			executeSuperscalar(rl, prog, &cache->reciprocalCache); | ||||
| 
 | ||||
| 			for (unsigned q = 0; q < 8; ++q) | ||||
| 				rl[q] ^= load64(mixBlock + 8 * q); | ||||
| 				rl[q] ^= load64_native(mixBlock + 8 * q); | ||||
| 
 | ||||
| 			registerValue = rl[prog.getAddressRegister()]; | ||||
| 		} | ||||
|  |  | |||
|  | @ -295,7 +295,8 @@ inline __m128i _mm_slli_si128(__m128i _A, int _Imm) { | |||
| 
 | ||||
| inline __m128i _mm_loadl_epi64(__m128i const* mem_addr) { | ||||
| 	__m128i x; | ||||
| 	x.u64[0] = load64(mem_addr); | ||||
| 	x.u32[0] = load32((uint8_t*)mem_addr + 0); | ||||
| 	x.u32[1] = load32((uint8_t*)mem_addr + 4); | ||||
| 	return x; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -573,14 +573,6 @@ namespace randomx { | |||
| 	constexpr int LOOK_FORWARD_CYCLES = 4; | ||||
| 	constexpr int MAX_THROWAWAY_COUNT = 256; | ||||
| 
 | ||||
| #ifndef _DEBUG | ||||
| 	constexpr bool TRACE = false; | ||||
| 	constexpr bool INFO = false; | ||||
| #else | ||||
| 	constexpr bool TRACE = true; | ||||
| 	constexpr bool INFO = true; | ||||
| #endif | ||||
| 
 | ||||
| 	template<bool commit> | ||||
| 	static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) { | ||||
| 		//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
 | ||||
|  | @ -588,21 +580,21 @@ namespace randomx { | |||
| 		for (; cycle < CYCLE_MAP_SIZE; ++cycle) { | ||||
| 			if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) { | ||||
| 				if (commit) { | ||||
| 					if (TRACE) std::cout << "; P5 at cycle " << cycle << std::endl; | ||||
| 					if (trace) std::cout << "; P5 at cycle " << cycle << std::endl; | ||||
| 					portBusy[cycle][2] = uop; | ||||
| 				} | ||||
| 				return cycle; | ||||
| 			} | ||||
| 			if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) { | ||||
| 				if (commit) { | ||||
| 					if (TRACE) std::cout << "; P0 at cycle " << cycle << std::endl; | ||||
| 					if (trace) std::cout << "; P0 at cycle " << cycle << std::endl; | ||||
| 					portBusy[cycle][0] = uop; | ||||
| 				} | ||||
| 				return cycle; | ||||
| 			} | ||||
| 			if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) { | ||||
| 				if (commit) { | ||||
| 					if (TRACE) std::cout << "; P1 at cycle " << cycle << std::endl; | ||||
| 					if (trace) std::cout << "; P1 at cycle " << cycle << std::endl; | ||||
| 					portBusy[cycle][1] = uop; | ||||
| 				} | ||||
| 				return cycle; | ||||
|  | @ -621,7 +613,7 @@ namespace randomx { | |||
| 		//move instructions are eliminated and don't need an execution unit
 | ||||
| 		if (mop.isEliminated()) { | ||||
| 			if (commit) | ||||
| 				if (TRACE) std::cout << "; (eliminated)" << std::endl; | ||||
| 				if (trace) std::cout << "; (eliminated)" << std::endl; | ||||
| 			return cycle; | ||||
| 		}  | ||||
| 		else if (mop.isSimple()) { | ||||
|  | @ -677,7 +669,7 @@ namespace randomx { | |||
| 
 | ||||
| 			//select a decode configuration
 | ||||
| 			decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen); | ||||
| 			if (TRACE) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl; | ||||
| 			if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl; | ||||
| 
 | ||||
| 			int bufferIndex = 0; | ||||
| 			 | ||||
|  | @ -692,15 +684,15 @@ namespace randomx { | |||
| 					//select an instruction so that the first macro-op fits into the current slot
 | ||||
| 					currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0); | ||||
| 					macroOpIndex = 0; | ||||
| 					if (TRACE) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 					if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 				} | ||||
| 				const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex); | ||||
| 				if (TRACE) std::cout << mop.getName() << " "; | ||||
| 				if (trace) std::cout << mop.getName() << " "; | ||||
| 
 | ||||
| 				//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
 | ||||
| 				int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle); | ||||
| 				if (scheduleCycle < 0) { | ||||
| 					if (TRACE) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; | ||||
| 					if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl; | ||||
| 					//__debugbreak();
 | ||||
| 					portsSaturated = true; | ||||
| 					break; | ||||
|  | @ -711,7 +703,7 @@ namespace randomx { | |||
| 					int forward; | ||||
| 					//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
 | ||||
| 					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) { | ||||
| 						if (TRACE) std::cout << "; src STALL at cycle " << cycle << std::endl; | ||||
| 						if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl; | ||||
| 						++scheduleCycle; | ||||
| 						++cycle; | ||||
| 					} | ||||
|  | @ -720,22 +712,22 @@ namespace randomx { | |||
| 						if (throwAwayCount < MAX_THROWAWAY_COUNT) { | ||||
| 							throwAwayCount++; | ||||
| 							macroOpIndex = currentInstruction.getInfo().getSize(); | ||||
| 							if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 							if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 							//cycle = topCycle;
 | ||||
| 							continue; | ||||
| 						} | ||||
| 						//abort this decode buffer
 | ||||
| 						if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 						if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 						currentInstruction = SuperscalarInstruction::Null; | ||||
| 						break; | ||||
| 					} | ||||
| 					if (TRACE) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; | ||||
| 					if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl; | ||||
| 				} | ||||
| 				//find a destination register that will be ready when this instruction executes
 | ||||
| 				if (macroOpIndex == currentInstruction.getInfo().getDstOp()) { | ||||
| 					int forward; | ||||
| 					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) { | ||||
| 						if (TRACE) std::cout << "; dst STALL at cycle " << cycle << std::endl; | ||||
| 						if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl; | ||||
| 						++scheduleCycle; | ||||
| 						++cycle; | ||||
| 					} | ||||
|  | @ -743,16 +735,16 @@ namespace randomx { | |||
| 						if (throwAwayCount < MAX_THROWAWAY_COUNT) { | ||||
| 							throwAwayCount++; | ||||
| 							macroOpIndex = currentInstruction.getInfo().getSize(); | ||||
| 							if (TRACE) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 							if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl; | ||||
| 							//cycle = topCycle;
 | ||||
| 							continue; | ||||
| 						} | ||||
| 						//abort this decode buffer
 | ||||
| 						if (TRACE) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; | ||||
| 						if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl; | ||||
| 						currentInstruction = SuperscalarInstruction::Null; | ||||
| 						break; | ||||
| 					} | ||||
| 					if (TRACE) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; | ||||
| 					if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl; | ||||
| 				} | ||||
| 				throwAwayCount = 0; | ||||
| 
 | ||||
|  | @ -773,7 +765,7 @@ namespace randomx { | |||
| 					ri.latency = retireCycle; | ||||
| 					ri.lastOpGroup = currentInstruction.getGroup(); | ||||
| 					ri.lastOpPar = currentInstruction.getGroupPar(); | ||||
| 					if (TRACE) std::cout << "; RETIRED at cycle " << retireCycle << std::endl; | ||||
| 					if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl; | ||||
| 				} | ||||
| 				codeSize += mop.getSize(); | ||||
| 				bufferIndex++; | ||||
|  |  | |||
|  | @ -37,14 +37,6 @@ const uint8_t blockTemplate_[] = { | |||
| 		0xc3, 0x8b, 0xde, 0xd3, 0x4d, 0x2d, 0xcd, 0xee, 0xf9, 0x5c, 0xd2, 0x0c, 0xef, 0xc1, 0x2f, 0x61, 0xd5, 0x61, 0x09 | ||||
| }; | ||||
| 
 | ||||
| constexpr char hexmap[] = "0123456789abcdef"; | ||||
| void outputHex(std::ostream& os, const char* data, int length) { | ||||
| 	for (int i = 0; i < length; ++i) { | ||||
| 		os << hexmap[(data[i] & 0xF0) >> 4]; | ||||
| 		os << hexmap[data[i] & 0x0F]; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| class AtomicHash { | ||||
| public: | ||||
| 	AtomicHash() { | ||||
|  | @ -101,7 +93,8 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result | |||
| int main(int argc, char** argv) { | ||||
| 	bool softAes, miningMode, verificationMode, help, largePages, jit; | ||||
| 	int noncesCount, threadCount, initThreadCount; | ||||
| 	int32_t seed; | ||||
| 	int32_t seedValue; | ||||
| 	char seed[4]; | ||||
| 
 | ||||
| 	readOption("--softAes", argc, argv, softAes); | ||||
| 	readOption("--mine", argc, argv, miningMode); | ||||
|  | @ -109,11 +102,13 @@ int main(int argc, char** argv) { | |||
| 	readIntOption("--threads", argc, argv, threadCount, 1); | ||||
| 	readIntOption("--nonces", argc, argv, noncesCount, 1000); | ||||
| 	readIntOption("--init", argc, argv, initThreadCount, 1); | ||||
| 	readIntOption("--seed", argc, argv, seed, 0); | ||||
| 	readIntOption("--seed", argc, argv, seedValue, 0); | ||||
| 	readOption("--largePages", argc, argv, largePages); | ||||
| 	readOption("--jit", argc, argv, jit); | ||||
| 	readOption("--help", argc, argv, help); | ||||
| 
 | ||||
| 	store32(&seed, seedValue); | ||||
| 
 | ||||
| 	std::cout << "RandomX benchmark" << std::endl; | ||||
| 
 | ||||
| 	if (help || (!miningMode && !verificationMode)) { | ||||
|  | @ -229,7 +224,7 @@ int main(int argc, char** argv) { | |||
| 		double elapsed = sw.getElapsed(); | ||||
| 		std::cout << "Calculated result: "; | ||||
| 		result.print(std::cout); | ||||
| 		if (noncesCount == 1000 && seed == 0) | ||||
| 		if (noncesCount == 1000 && seedValue == 0) | ||||
| 			std::cout << "Reference result:  b69741719152625854031c2337ceae68c3030f2b9581a73acebaa69fc9b555fc" << std::endl; | ||||
| 		if (!miningMode) { | ||||
| 			std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; | ||||
|  |  | |||
|  | @ -24,6 +24,14 @@ along with RandomX.  If not, see<http://www.gnu.org/licenses/>. | |||
| #include <iostream> | ||||
| #include <fstream> | ||||
| 
 | ||||
| constexpr char hexmap[] = "0123456789abcdef"; | ||||
| inline void outputHex(std::ostream& os, const char* data, int length) { | ||||
| 	for (int i = 0; i < length; ++i) { | ||||
| 		os << hexmap[(data[i] & 0xF0) >> 4]; | ||||
| 		os << hexmap[data[i] & 0x0F]; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| inline void dump(const char* buffer, uint64_t count, const char* name) { | ||||
| 	std::ofstream fout(name, std::ios::out | std::ios::binary); | ||||
| 	fout.write(buffer, count); | ||||
|  |  | |||
|  | @ -114,7 +114,7 @@ namespace randomx { | |||
| 	template<class Allocator, bool softAes> | ||||
| 	void InterpretedVm<Allocator, softAes>::executeBytecode(int& ic, int_reg_t(&r)[8], __m128d (&f)[4], __m128d (&e)[4], __m128d (&a)[4]) { | ||||
| 		auto& ibc = byteCode[ic]; | ||||
| 		if (trace) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); | ||||
| 		if (trace && ibc.type != InstructionType::NOP) std::cout << std::dec << std::setw(3) << ic << " " << program(ic); | ||||
| 		switch (ibc.type) | ||||
| 		{ | ||||
| 			case InstructionType::IADD_RS: { | ||||
|  | @ -270,7 +270,7 @@ namespace randomx { | |||
| 			default: | ||||
| 				UNREACHABLE; | ||||
| 		} | ||||
| 		if (trace) { | ||||
| 		if (trace && ibc.type != InstructionType::NOP) { | ||||
| 			if(ibc.type < 20 || ibc.type == 31 || ibc.type == 32) | ||||
| 				print(*ibc.idst); | ||||
| 			else //if(ibc.type >= 20 && ibc.type <= 30)
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue