RandomWOW/src/instructionsPortable.cpp

/*
Copyright (c) 2018 tevador

This file is part of RandomX.

RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with RandomX.  If not, see<http://www.gnu.org/licenses/>.
*/
//#define DEBUG
//#define FTZ
#include "instructions.hpp"
#include "intrinPortable.h"
#pragma STDC FENV_ACCESS on
#include <cfenv>
#include <cmath>
#ifdef DEBUG
#include <iostream>
#endif

#if defined(__SIZEOF_INT128__)
	typedef unsigned __int128 uint128_t;
	typedef __int128 int128_t;
	static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
		return ((uint128_t)a * b) >> 64;
	}
	static inline uint64_t __imulhi64(int64_t a, int64_t b) {
		return ((int128_t)a * b) >> 64;
	}
	#define umulhi64 __umulhi64
	#define imulhi64 __imulhi64
#endif

#if defined(_MSC_VER)
	#define HAS_VALUE(X) X ## 0
	#define EVAL_DEFINE(X) HAS_VALUE(X)
	#include <intrin.h>
	#include <stdlib.h>
	#define ror64 _rotr64
	#define rol64 _rotl64
	#if EVAL_DEFINE(__MACHINEARM64_X64(1))
		#define umulhi64 __umulh
	#endif
	#if EVAL_DEFINE(__MACHINEX64(1))
		static inline uint64_t __imulhi64(int64_t a, int64_t b) {
			int64_t hi;
			_mul128(a, b, &hi);
			return hi;
		}
		#define imulhi64 __imulhi64
	#endif
	static inline uint32_t _setRoundMode(uint32_t mode) {
		return _controlfp(mode, _MCW_RC);
	}
	#define setRoundMode _setRoundMode
#endif

#ifndef setRoundMode
	#define setRoundMode fesetround
#endif

#ifndef ror64
	static inline uint64_t __ror64(uint64_t a, int b) {
		return (a >> b) | (a << (64 - b));
	}
	#define ror64 __ror64
#endif

#ifndef rol64
	static inline uint64_t __rol64(uint64_t a, int b) {
		return (a << b) | (a >> (64 - b));
	}
	#define rol64 __rol64
#endif

#ifndef sar64
	#include <type_traits>
	constexpr int64_t builtintShr64(int64_t value, int shift) noexcept {
		return value >> shift;
	}

	struct UsesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> {
	};

	static inline int64_t __sar64(int64_t a, int b) {
		return UsesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b);
	}
	#define sar64 __sar64
#endif

#ifndef umulhi64
	#define LO(x) ((x)&0xffffffff)
	#define HI(x) ((x)>>32)
	static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
		uint64_t ah = HI(a), al = LO(a);
		uint64_t bh = HI(b), bl = LO(b);
		uint64_t x00 = al * bl;
		uint64_t x01 = al * bh;
		uint64_t x10 = ah * bl;
		uint64_t x11 = ah * bh;
		uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
		uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
		uint64_t m3 = HI(x11) + HI(m2);

		return (m3 << 32) + LO(m2);
	}
	#define umulhi64 __umulhi64
#endif

#ifndef imulhi64
	static inline int64_t __imulhi64(int64_t a, int64_t b) {
		int64_t hi = umulhi64(a, b);
		if (a < 0LL) hi -= b;
		if (b < 0LL) hi -= a;
		return hi;
	}
	#define imulhi64 __imulhi64
#endif

static double FlushDenormal(double x) {
	if (std::fpclassify(x) == FP_SUBNORMAL) {
		return 0;
	}
	return x;
}

#ifdef FTZ
#undef FTZ
#define FTZ(x) FlushDenormal(x)
#else
#define FTZ(x) x
#endif

namespace RandomX {

	extern "C" {

		void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 + b.u64;
		}

		void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u32 + b.u32;
		}

		void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 - b.u64;
		}

		void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u32 - b.u32;
		}

		void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 * b.u64;
		}

		void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = umulhi64(a.u64, b.u64);
		}

		void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = (uint64_t)a.u32 * b.u32;
		}

		void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.i64 = (int64_t)a.i32 * b.i32;
		}

		void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.i64 = imulhi64(a.i64, b.i64);
		}

		void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
		}

		void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			if (a.i64 == INT64_MIN && b.i32 == -1)
				c.i64 = INT64_MIN;
			else
				c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
		}

		void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 & b.u64;
		}

		void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u32 & b.u32;
		}

		void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 | b.u64;
		}

		void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u32 | b.u32;
		}

		void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 ^ b.u64;
		}

		void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u32 ^ b.u32;
		}

		void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 << (b.u64 & 63);
		}

		void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = a.u64 >> (b.u64 & 63);
		}

		void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = sar64(a.i64, b.u64 & 63);
		}

		void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = rol64(a.u64, (b.u64 & 63));
		}

		void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.u64 = ror64(a.u64, (b.u64 & 63));
		}

		void FPINIT() {
			setRoundMode(FE_TONEAREST);
		}

		void FPADD(convertible_t& a, double b, convertible_t& c) {
			c.f64 = FTZ(convertToDouble(a.i64) + b);
		}

		void FPSUB(convertible_t& a, double b, convertible_t& c) {
			c.f64 = FTZ(convertToDouble(a.i64) - b);
		}

		void FPMUL(convertible_t& a, double b, convertible_t& c) {
			c.f64 = FTZ(convertToDoubleNonZero(a.i64) * b);
		}

		void FPDIV(convertible_t& a, double b, convertible_t& c) {
			c.f64 = FTZ(convertToDoubleNonZero(a.i64) / b);
		}

		void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {
#ifdef __SSE2__
			double d = convertToDoubleNonNegative(a.i64);
			c.f64 = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&d)));
#else
			c.f64 = FTZ(sqrt(convertToDoubleNonNegative(a.i64)));
#endif

		}

		void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c) {
			c.f64 = convertToDouble(a.i64);
			switch (a.u64 & 3) {
				case RoundDown:
#ifdef DEBUG
					std::cout << "Round FE_DOWNWARD (" << FE_DOWNWARD << ") = " <<
#endif
					setRoundMode(FE_DOWNWARD);
#ifdef DEBUG
					std::cout << std::endl;
#endif
					break;
				case RoundUp:
#ifdef DEBUG
					std::cout << "Round FE_UPWARD (" << FE_UPWARD << ") = " <<
#endif
					setRoundMode(FE_UPWARD);
#ifdef DEBUG
					std::cout << std::endl;
#endif
					break;
				case RoundToZero:
#ifdef DEBUG
					std::cout << "Round FE_TOWARDZERO (" << FE_TOWARDZERO << ") = " <<
#endif
					setRoundMode(FE_TOWARDZERO);
#ifdef DEBUG
					std::cout << std::endl;
#endif
					break;
				default:
#ifdef DEBUG
					std::cout << "Round FE_TONEAREST (" << FE_TONEAREST << ") = " <<
#endif
					setRoundMode(FE_TONEAREST);
#ifdef DEBUG
					std::cout << std::endl;
#endif
					break;
			}
		}
	}
}
RandomX portable interpreter 2018-12-11 20:00:30 +00:00			`/*`
			`Copyright (c) 2018 tevador`

			`This file is part of RandomX.`

			`RandomX is free software: you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation, either version 3 of the License, or`
			`(at your option) any later version.`

			`RandomX is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with RandomX. If not, see<http://www.gnu.org/licenses/>.`
			`*/`
			`//#define DEBUG`
			`//#define FTZ`
			`#include "instructions.hpp"`
			`#include "intrinPortable.h"`
			`#pragma STDC FENV_ACCESS on`
			`#include <cfenv>`
			`#include <cmath>`
			`#ifdef DEBUG`
			`#include <iostream>`
			`#endif`

			`#if defined(__SIZEOF_INT128__)`
			`typedef unsigned __int128 uint128_t;`
			`typedef __int128 int128_t;`
			`static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {`
			`return ((uint128_t)a * b) >> 64;`
			`}`
			`static inline uint64_t __imulhi64(int64_t a, int64_t b) {`
			`return ((int128_t)a * b) >> 64;`
			`}`
			`#define umulhi64 __umulhi64`
			`#define imulhi64 __imulhi64`
			`#endif`

			`#if defined(_MSC_VER)`
			`#define HAS_VALUE(X) X ## 0`
			`#define EVAL_DEFINE(X) HAS_VALUE(X)`
			`#include <intrin.h>`
			`#include <stdlib.h>`
			`#define ror64 _rotr64`
			`#define rol64 _rotl64`
			`#if EVAL_DEFINE(__MACHINEARM64_X64(1))`
			`#define umulhi64 __umulh`
			`#endif`
			`#if EVAL_DEFINE(__MACHINEX64(1))`
			`static inline uint64_t __imulhi64(int64_t a, int64_t b) {`
			`int64_t hi;`
			`_mul128(a, b, &hi);`
			`return hi;`
			`}`
			`#define imulhi64 __imulhi64`
			`#endif`
			`static inline uint32_t _setRoundMode(uint32_t mode) {`
			`return _controlfp(mode, _MCW_RC);`
			`}`
			`#define setRoundMode _setRoundMode`
			`#endif`

			`#ifndef setRoundMode`
			`#define setRoundMode fesetround`
			`#endif`

			`#ifndef ror64`
			`static inline uint64_t __ror64(uint64_t a, int b) {`
			`return (a >> b) \| (a << (64 - b));`
			`}`
			`#define ror64 __ror64`
			`#endif`

			`#ifndef rol64`
			`static inline uint64_t __rol64(uint64_t a, int b) {`
			`return (a << b) \| (a >> (64 - b));`
			`}`
			`#define rol64 __rol64`
			`#endif`

			`#ifndef sar64`
			`#include <type_traits>`
			`constexpr int64_t builtintShr64(int64_t value, int shift) noexcept {`
			`return value >> shift;`
			`}`

			`struct UsesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> {`
			`};`

			`static inline int64_t __sar64(int64_t a, int b) {`
			`return UsesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b);`
			`}`
			`#define sar64 __sar64`
			`#endif`

			`#ifndef umulhi64`
			`#define LO(x) ((x)&0xffffffff)`
			`#define HI(x) ((x)>>32)`
			`static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {`
			`uint64_t ah = HI(a), al = LO(a);`
			`uint64_t bh = HI(b), bl = LO(b);`
			`uint64_t x00 = al * bl;`
			`uint64_t x01 = al * bh;`
			`uint64_t x10 = ah * bl;`
			`uint64_t x11 = ah * bh;`
			`uint64_t m1 = LO(x10) + LO(x01) + HI(x00);`
			`uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);`
			`uint64_t m3 = HI(x11) + HI(m2);`

			`return (m3 << 32) + LO(m2);`
			`}`
			`#define umulhi64 __umulhi64`
			`#endif`

			`#ifndef imulhi64`
			`static inline int64_t __imulhi64(int64_t a, int64_t b) {`
			`int64_t hi = umulhi64(a, b);`
			`if (a < 0LL) hi -= b;`
			`if (b < 0LL) hi -= a;`
			`return hi;`
			`}`
			`#define imulhi64 __imulhi64`
			`#endif`

			`static double FlushDenormal(double x) {`
			`if (std::fpclassify(x) == FP_SUBNORMAL) {`
			`return 0;`
			`}`
			`return x;`
			`}`

			`#ifdef FTZ`
			`#undef FTZ`
			`#define FTZ(x) FlushDenormal(x)`
			`#else`
			`#define FTZ(x) x`
			`#endif`

			`namespace RandomX {`

			`extern "C" {`

			`void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 + b.u64;`
			`}`

			`void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u32 + b.u32;`
			`}`

			`void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 - b.u64;`
			`}`

			`void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u32 - b.u32;`
			`}`

			`void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 * b.u64;`
			`}`

			`void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = umulhi64(a.u64, b.u64);`
			`}`

			`void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = (uint64_t)a.u32 * b.u32;`
			`}`

			`void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.i64 = (int64_t)a.i32 * b.i32;`
			`}`

			`void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.i64 = imulhi64(a.i64, b.i64);`
			`}`

			`void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);`
			`}`

			`void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`if (a.i64 == INT64_MIN && b.i32 == -1)`
			`c.i64 = INT64_MIN;`
			`else`
			`c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);`
			`}`

			`void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 & b.u64;`
			`}`

			`void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u32 & b.u32;`
			`}`

			`void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 \| b.u64;`
			`}`

			`void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u32 \| b.u32;`
			`}`

			`void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 ^ b.u64;`
			`}`

			`void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u32 ^ b.u32;`
			`}`

			`void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 << (b.u64 & 63);`
			`}`

			`void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = a.u64 >> (b.u64 & 63);`
			`}`

			`void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = sar64(a.i64, b.u64 & 63);`
			`}`

			`void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = rol64(a.u64, (b.u64 & 63));`
			`}`

			`void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.u64 = ror64(a.u64, (b.u64 & 63));`
			`}`

			`void FPINIT() {`
			`setRoundMode(FE_TONEAREST);`
			`}`

			`void FPADD(convertible_t& a, double b, convertible_t& c) {`
			`c.f64 = FTZ(convertToDouble(a.i64) + b);`
			`}`

			`void FPSUB(convertible_t& a, double b, convertible_t& c) {`
			`c.f64 = FTZ(convertToDouble(a.i64) - b);`
			`}`

			`void FPMUL(convertible_t& a, double b, convertible_t& c) {`
			`c.f64 = FTZ(convertToDoubleNonZero(a.i64) * b);`
			`}`

			`void FPDIV(convertible_t& a, double b, convertible_t& c) {`
			`c.f64 = FTZ(convertToDoubleNonZero(a.i64) / b);`
			`}`

			`void FPSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`#ifdef __SSE2__`
			`double d = convertToDoubleNonNegative(a.i64);`
			`c.f64 = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&d)));`
			`#else`
			`c.f64 = FTZ(sqrt(convertToDoubleNonNegative(a.i64)));`
			`#endif`

			`}`

			`void FPROUND(convertible_t& a, convertible_t& b, convertible_t& c) {`
			`c.f64 = convertToDouble(a.i64);`
			`switch (a.u64 & 3) {`
			`case RoundDown:`
			`#ifdef DEBUG`
			`std::cout << "Round FE_DOWNWARD (" << FE_DOWNWARD << ") = " <<`
			`#endif`
			`setRoundMode(FE_DOWNWARD);`
			`#ifdef DEBUG`
			`std::cout << std::endl;`
			`#endif`
			`break;`
			`case RoundUp:`
			`#ifdef DEBUG`
			`std::cout << "Round FE_UPWARD (" << FE_UPWARD << ") = " <<`
			`#endif`
			`setRoundMode(FE_UPWARD);`
			`#ifdef DEBUG`
			`std::cout << std::endl;`
			`#endif`
			`break;`
			`case RoundToZero:`
			`#ifdef DEBUG`
			`std::cout << "Round FE_TOWARDZERO (" << FE_TOWARDZERO << ") = " <<`
			`#endif`
			`setRoundMode(FE_TOWARDZERO);`
			`#ifdef DEBUG`
			`std::cout << std::endl;`
			`#endif`
			`break;`
			`default:`
			`#ifdef DEBUG`
			`std::cout << "Round FE_TONEAREST (" << FE_TONEAREST << ") = " <<`
			`#endif`
			`setRoundMode(FE_TONEAREST);`
			`#ifdef DEBUG`
			`std::cout << std::endl;`
			`#endif`
			`break;`
			`}`
			`}`
			`}`
			`}`