mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Removed some legacy code
This commit is contained in:
parent
9e5eac8645
commit
1037cc0139
16 changed files with 0 additions and 27702 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,16 +0,0 @@
|
|||
all: branch_always branch_predictably branch_randomly branch_mixed
|
||||
|
||||
branch_always: branch_always.c
|
||||
gcc -O0 branch_always.c -o branch_always
|
||||
|
||||
branch_predictably: branch_predictably.c
|
||||
gcc -O0 branch_predictably.c -o branch_predictably
|
||||
|
||||
branch_randomly: branch_randomly.c
|
||||
gcc -O0 branch_randomly.c -o branch_randomly
|
||||
|
||||
branch_mixed: branch_mixed.c
|
||||
gcc -O0 branch_mixed.c -o branch_mixed
|
||||
|
||||
clean:
|
||||
rm branch_always branch_predictably branch_randomly branch_mixed
|
|
@ -1,333 +0,0 @@
|
|||
/*
|
||||
* Prof
|
||||
* ====
|
||||
*
|
||||
* Self-contained C/C++ profiler library for Linux.
|
||||
*
|
||||
* Prof offers a quick way to measure performance events (CPU clock cycles,
|
||||
* cache misses, branch mispredictions, etc.) of C/C++ code snippets. Prof is
|
||||
* just a wrapper around the `perf_event_open` system call, its main goal is to
|
||||
* be easy to setup and painless to use for targeted optimizations, namely, when
|
||||
* the hot spot has already been identified. In no way Prof is a replacement for
|
||||
* a fully-fledged profiler like perf, gprof, callgrind, etc.
|
||||
*
|
||||
* Please be aware that Prof uses `__attribute__((constructor))` to be as more
|
||||
* straightforward to setup as possible, so it cannot be included more than
|
||||
* once.
|
||||
*
|
||||
* Examples
|
||||
* --------
|
||||
*
|
||||
* ### Minimal
|
||||
*
|
||||
* The following snippet prints the rough number of CPU clock cycles spent in
|
||||
* executing the code between the two Prof calls:
|
||||
*
|
||||
* ```c
|
||||
* #include "prof.h"
|
||||
*
|
||||
* int main()
|
||||
* {
|
||||
* PROF_START();
|
||||
* // slow code goes here...
|
||||
* PROF_STDOUT();
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* ### Custom options
|
||||
*
|
||||
* The following snippet instead counts both read and write faults of the level
|
||||
* 1 data cache that occur in the userland code between the two Prof calls:
|
||||
*
|
||||
* ```c
|
||||
* #include <stdio.h>
|
||||
*
|
||||
* #define PROF_USER_EVENTS_ONLY
|
||||
* #define PROF_EVENT_LIST \
|
||||
* PROF_EVENT_CACHE(L1D, READ, MISS) \
|
||||
* PROF_EVENT_CACHE(L1D, WRITE, MISS)
|
||||
* #include "prof.h"
|
||||
*
|
||||
* int main()
|
||||
* {
|
||||
* uint64_t faults[2] = { 0 };
|
||||
*
|
||||
* PROF_START();
|
||||
* // slow code goes here...
|
||||
* PROF_DO(faults[index] += counter);
|
||||
*
|
||||
* // fast or uninteresting code goes here...
|
||||
*
|
||||
* PROF_START();
|
||||
* // slow code goes here...
|
||||
* PROF_DO(faults[index] += counter);
|
||||
*
|
||||
* printf("Total L1 faults: R = %lu; W = %lu\n", faults[0], faults[1]);
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* Installation
|
||||
* ------------
|
||||
*
|
||||
* Just include `prof.h`. Here is a quick way to fetch the latest version:
|
||||
*
|
||||
* wget -q https://raw.githubusercontent.com/cyrus-and/prof/master/prof.h
|
||||
*/
|
||||
#ifndef PROF_H
|
||||
#define PROF_H
|
||||
|
||||
#include <errno.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
* API
|
||||
* ---
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reset the counters and (re)start counting the events.
|
||||
*
|
||||
* The events to be monitored are specified by setting the `PROF_EVENT_LIST`
|
||||
* macro before including this file to a list of `PROF_EVENT_*` invocations;
|
||||
* defaults to counting the number CPU clock cycles.
|
||||
*
|
||||
* If the `PROF_USER_EVENTS_ONLY` macro is defined before including this file
|
||||
* then kernel and hypervisor events are excluded from the count.
|
||||
*/
|
||||
#define PROF_START() \
|
||||
do { \
|
||||
PROF_IOCTL_(ENABLE); \
|
||||
PROF_IOCTL_(RESET); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Specify an event to be monitored, `type` and `config` are defined in the
|
||||
* documentation of the `perf_event_open` system call.
|
||||
*/
|
||||
#define PROF_EVENT(type, config) \
|
||||
(uint32_t)(type), (uint64_t)(config),
|
||||
|
||||
/*
|
||||
* Same as `PROF_EVENT` but for hardware events; prefix `PERF_COUNT_HW_` must be
|
||||
* omitted from `config`.
|
||||
*/
|
||||
#define PROF_EVENT_HW(config) \
|
||||
PROF_EVENT(PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## config)
|
||||
|
||||
/*
|
||||
* Same as `PROF_EVENT` but for software events; prefix `PERF_COUNT_SW_` must be
|
||||
* omitted from `config`.
|
||||
*/
|
||||
#define PROF_EVENT_SW(config) \
|
||||
PROF_EVENT(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ ## config)
|
||||
|
||||
/*
|
||||
* Same as `PROF_EVENT` but for cache events; prefixes `PERF_COUNT_HW_CACHE_`,
|
||||
* `PERF_COUNT_HW_CACHE_OP_` and `PERF_COUNT_HW_CACHE_RESULT_` must be omitted
|
||||
* from `cache`, `op` and `result`, respectively. Again `cache`, `op` and
|
||||
* `result` are defined in the documentation of the `perf_event_open` system
|
||||
* call.
|
||||
*/
|
||||
#define PROF_EVENT_CACHE(cache, op, result) \
|
||||
PROF_EVENT(PERF_TYPE_HW_CACHE, \
|
||||
(PERF_COUNT_HW_CACHE_ ## cache) | \
|
||||
(PERF_COUNT_HW_CACHE_OP_ ## op << 8) | \
|
||||
(PERF_COUNT_HW_CACHE_RESULT_ ## result << 16))
|
||||
|
||||
/*
|
||||
* Stop counting the events. The counter array can then be accessed with
|
||||
* `PROF_COUNTERS`.
|
||||
*/
|
||||
#define PROF_STOP() \
|
||||
do { \
|
||||
PROF_IOCTL_(DISABLE); \
|
||||
PROF_READ_COUNTERS_(prof_event_buf_); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Access the counter array. The order of counters is the same of the events
|
||||
* defined in `PROF_EVENT_LIST`. Elements of this array are 64 bit unsigned
|
||||
* integers.
|
||||
*/
|
||||
#define PROF_COUNTERS \
|
||||
(prof_event_buf_ + 1)
|
||||
|
||||
/*
|
||||
* Stop counting the events and execute the code provided by `block` for each
|
||||
* event. Within `code`: `index` refers to the event position index in the
|
||||
* counter array defined by `PROF_COUNTERS`; `counter` is the actual value of
|
||||
* the counter. `index` is a 64 bit unsigned integer.
|
||||
*/
|
||||
#define PROF_DO(block) \
|
||||
do { \
|
||||
uint64_t i_; \
|
||||
PROF_STOP(); \
|
||||
for (i_ = 0; i_ < prof_event_cnt_; i_++) { \
|
||||
uint64_t index = i_; \
|
||||
uint64_t counter = prof_event_buf_[i_ + 1]; \
|
||||
(void)index; \
|
||||
(void)counter; \
|
||||
block; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Same as `PROF_DO` except that `callback` is the name of a *callable* object
|
||||
* (e.g. a function) which, for each event, is be called with the two parameters
|
||||
* `index` and `counter`.
|
||||
*/
|
||||
#define PROF_CALL(callback) \
|
||||
PROF_DO(callback(index, counter))
|
||||
|
||||
/*
|
||||
* Stop counting the events and write to `file` (a stdio.h `FILE *`) as many
|
||||
* lines as are events in `PROF_EVENT_LIST`. Each line contains `index` and
|
||||
* `counter` (as defined by `PROF_DO`) separated by a tabulation character. If
|
||||
* there is only one event then `index` is omitted.
|
||||
*/
|
||||
#define PROF_FILE(file) \
|
||||
PROF_DO(if (prof_event_cnt_ > 1) { \
|
||||
fprintf((file), "%lu\t%lu\n", index, counter); \
|
||||
} else { \
|
||||
fprintf((file), "%lu\n", counter); \
|
||||
} \
|
||||
)
|
||||
|
||||
/*
|
||||
* Same as `PROF_LOG_FILE` except that `file` is `stdout`.
|
||||
*/
|
||||
#define PROF_STDOUT() \
|
||||
PROF_FILE(stdout)
|
||||
|
||||
/*
|
||||
* Same as `PROF_LOG_FILE` except that `file` is `stderr`.
|
||||
*/
|
||||
#define PROF_STDERR() \
|
||||
PROF_FILE(stderr)
|
||||
|
||||
/* DEFAULTS ----------------------------------------------------------------- */
|
||||
|
||||
#ifndef PROF_EVENT_LIST
|
||||
#ifdef PERF_COUNT_HW_REF_CPU_CYCLES /* since Linux 3.3 */
|
||||
#define PROF_EVENT_LIST PROF_EVENT_HW(REF_CPU_CYCLES)
|
||||
#else
|
||||
#define PROF_EVENT_LIST PROF_EVENT_HW(CPU_CYCLES)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* UTILITY ------------------------------------------------------------------ */
|
||||
|
||||
#define PROF_ASSERT_(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "# %s:%d: PROF error", __FILE__, __LINE__); \
|
||||
if (errno) { \
|
||||
fprintf(stderr, " (%s)", strerror(errno)); \
|
||||
} \
|
||||
printf("\n"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PROF_IOCTL_(mode) \
|
||||
do { \
|
||||
PROF_ASSERT_(ioctl(prof_fd_, \
|
||||
PERF_EVENT_IOC_ ## mode, \
|
||||
PERF_IOC_FLAG_GROUP) != -1); \
|
||||
} while (0)
|
||||
|
||||
#define PROF_READ_COUNTERS_(buffer) \
|
||||
do { \
|
||||
const ssize_t to_read = sizeof(uint64_t) * (prof_event_cnt_ + 1); \
|
||||
PROF_ASSERT_(read(prof_fd_, buffer, to_read) == to_read); \
|
||||
} while (0)
|
||||
|
||||
/* SETUP -------------------------------------------------------------------- */
|
||||
|
||||
static int prof_fd_;
|
||||
static uint64_t prof_event_cnt_;
|
||||
static uint64_t *prof_event_buf_;
|
||||
|
||||
static void prof_init_(uint64_t dummy, ...) {
|
||||
uint32_t type;
|
||||
va_list ap;
|
||||
|
||||
prof_fd_ = -1;
|
||||
prof_event_cnt_ = 0;
|
||||
va_start(ap, dummy);
|
||||
while (type = va_arg(ap, uint32_t), type != (uint32_t)-1) {
|
||||
struct perf_event_attr pe;
|
||||
uint64_t config;
|
||||
int fd;
|
||||
|
||||
config = va_arg(ap, uint64_t);
|
||||
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
pe.read_format = PERF_FORMAT_GROUP;
|
||||
pe.type = type;
|
||||
pe.config = config;
|
||||
#ifdef PROF_USER_EVENTS_ONLY
|
||||
pe.exclude_kernel = 1;
|
||||
pe.exclude_hv = 1;
|
||||
#endif
|
||||
|
||||
fd = syscall(__NR_perf_event_open, &pe, 0, -1, prof_fd_, 0);
|
||||
PROF_ASSERT_(fd != -1);
|
||||
if (prof_fd_ == -1) {
|
||||
prof_fd_ = fd;
|
||||
}
|
||||
|
||||
prof_event_cnt_++;
|
||||
}
|
||||
va_end(ap);
|
||||
|
||||
prof_event_buf_ = (uint64_t *)malloc((prof_event_cnt_ + 1) *
|
||||
sizeof(uint64_t));
|
||||
}
|
||||
|
||||
void __attribute__((constructor)) prof_init()
|
||||
{
|
||||
prof_init_(0, PROF_EVENT_LIST /*,*/ (uint32_t)-1);
|
||||
}
|
||||
|
||||
void __attribute__((destructor)) prof_fini()
|
||||
{
|
||||
PROF_ASSERT_(close(prof_fd_) != -1);
|
||||
free(prof_event_buf_);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* License
|
||||
* -------
|
||||
*
|
||||
* Copyright (c) 2017 Andrea Cardaci <cyrus.and@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
|
@ -1,314 +0,0 @@
|
|||
//RandomX performance test for x86
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_WIN32) || defined(__MINGW32__) || defined(__CYGWIN__) || defined(__CYGWIN32__)
|
||||
#define WINDOWS
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && defined(__x86_64__)
|
||||
#include <x86intrin.h>
|
||||
typedef unsigned __int128 uint128_t;
|
||||
typedef __int128 int128_t;
|
||||
static inline uint64_t umulhi64(uint64_t a, uint64_t b) {
|
||||
return ((uint128_t)a * b) >> 64;
|
||||
}
|
||||
static inline uint64_t imulhi64(int64_t a, int64_t b) {
|
||||
return ((int128_t)a * b) >> 64;
|
||||
}
|
||||
#define ror64 __rorq
|
||||
#define rol64 __rolq
|
||||
#define forceinline inline
|
||||
#ifdef __clang__
|
||||
static inline uint64_t __rolq(uint64_t a, int b) {
|
||||
return (a << b) | (a >> (64 - b));
|
||||
}
|
||||
static inline uint64_t __rorq(uint64_t a, int b) {
|
||||
return (a >> b) | (a << (64 - b));
|
||||
}
|
||||
#endif
|
||||
#elif defined(_MSC_VER) && defined(_M_X64)
|
||||
#include <intrin.h>
|
||||
#include <stdlib.h>
|
||||
#define umulhi64 __umulh
|
||||
static inline uint64_t imulhi64(int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
_mul128(a, b, &hi);
|
||||
return hi;
|
||||
}
|
||||
#define ror64 _rotr64
|
||||
#define rol64 _rotl64
|
||||
#define forceinline __forceinline
|
||||
#else
|
||||
#error "Unsupported platform"
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
double f64;
|
||||
int64_t i64;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
uint32_t u32;
|
||||
} convertible_t;
|
||||
|
||||
forceinline void NOOP(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64;
|
||||
}
|
||||
|
||||
forceinline void FNOOP(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64;
|
||||
}
|
||||
|
||||
forceinline void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 + b.u64;
|
||||
}
|
||||
|
||||
forceinline void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 + b.u32;
|
||||
}
|
||||
|
||||
forceinline void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 - b.u64;
|
||||
}
|
||||
|
||||
forceinline void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 - b.u32;
|
||||
}
|
||||
|
||||
forceinline void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 * b.u64;
|
||||
}
|
||||
|
||||
forceinline void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = umulhi64(a.u64, b.u64);
|
||||
}
|
||||
|
||||
forceinline void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = (uint64_t)a.u32 * b.u32;
|
||||
}
|
||||
|
||||
forceinline void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = (int64_t)a.i32 * b.i32;
|
||||
}
|
||||
|
||||
forceinline void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = imulhi64(a.i64, b.i64);
|
||||
}
|
||||
|
||||
forceinline void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
|
||||
}
|
||||
|
||||
forceinline void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
||||
}
|
||||
|
||||
forceinline void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 & b.u64;
|
||||
}
|
||||
|
||||
forceinline void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 & b.u32;
|
||||
}
|
||||
|
||||
forceinline void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 | b.u64;
|
||||
}
|
||||
|
||||
forceinline void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 | b.u32;
|
||||
}
|
||||
|
||||
forceinline void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 ^ b.u64;
|
||||
}
|
||||
|
||||
forceinline void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 ^ b.u32;
|
||||
}
|
||||
|
||||
forceinline void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 << (b.u64 & 63);
|
||||
}
|
||||
|
||||
forceinline void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 >> (b.u64 & 63);
|
||||
}
|
||||
|
||||
forceinline void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = a.i64 >> (b.u64 & 63);
|
||||
}
|
||||
|
||||
forceinline void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = rol64(a.u64, (b.u64 & 63));
|
||||
}
|
||||
|
||||
forceinline void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = ror64(a.u64, (b.u64 & 63));
|
||||
}
|
||||
|
||||
forceinline void FADD(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64 + (double)b.i64;
|
||||
}
|
||||
|
||||
forceinline void FSUB(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64 - (double)b.i64;
|
||||
}
|
||||
|
||||
forceinline void FMUL(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64 * (double)b.i64;
|
||||
}
|
||||
|
||||
forceinline void FDIV(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64 / (double)b.i64;
|
||||
}
|
||||
|
||||
forceinline void FSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
double d = fabs((double)a.i64);
|
||||
c.f64 = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&d)));
|
||||
}
|
||||
|
||||
static uint32_t mxcsr;
|
||||
|
||||
forceinline void FROUND(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64;
|
||||
_mm_setcsr(mxcsr | ((uint32_t)(a.u64 << 13) & _MM_ROUND_MASK));
|
||||
}
|
||||
|
||||
inline void init_FPU() {
|
||||
mxcsr = (_mm_getcsr() | _MM_FLUSH_ZERO_ON) & ~_MM_ROUND_MASK;
|
||||
_mm_setcsr(mxcsr);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool tryParse(char* buffer, T& out) {
|
||||
std::istringstream ss(buffer);
|
||||
if (!(ss >> out)) {
|
||||
std::cout << "Invalid value '" << buffer << "'" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//#define ITERATIONS 10000000
|
||||
#define SCRATCHPAD_SIZE (16 * 1024)
|
||||
#define SCRATCHPAD_LENGTH (SCRATCHPAD_SIZE / sizeof(convertible_t))
|
||||
#define SCRATCHPAD_MASK (SCRATCHPAD_SIZE / sizeof(convertible_t) - 1)
|
||||
#define SCRATCHPAD_16K(x) scratchpad[(x) & SCRATCHPAD_MASK]
|
||||
|
||||
#define BENCHMARK(FUNC,TYPE) do { \
|
||||
memcpy((void*)scratchpad, input, SCRATCHPAD_SIZE); \
|
||||
tstart = std::chrono::high_resolution_clock::now(); \
|
||||
for (uint64_t i = 0; i < iterations; ++i) { \
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 0), r0, SCRATCHPAD_16K(i + 0)); \
|
||||
SCRATCHPAD_16K(i + 0).u64 ^= r7.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 1), r1, SCRATCHPAD_16K(i + 1)); \
|
||||
SCRATCHPAD_16K(i + 1).u64 ^= r6.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 2), r2, SCRATCHPAD_16K(i + 2)); \
|
||||
SCRATCHPAD_16K(i + 2).u64 ^= r5.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 3), r3, SCRATCHPAD_16K(i + 3)); \
|
||||
SCRATCHPAD_16K(i + 3).u64 ^= r4.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 4), r4, SCRATCHPAD_16K(i + 4)); \
|
||||
SCRATCHPAD_16K(i + 4).u64 ^= r3.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 5), r5, SCRATCHPAD_16K(i + 5)); \
|
||||
SCRATCHPAD_16K(i + 5).u64 ^= r2.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 6), r6, SCRATCHPAD_16K(i + 6)); \
|
||||
SCRATCHPAD_16K(i + 6).u64 ^= r1.u64;\
|
||||
FUNC(SCRATCHPAD_16K(i + 8 + 7), r7, SCRATCHPAD_16K(i + 7)); \
|
||||
SCRATCHPAD_16K(i + 7).u64 ^= r0.u64;\
|
||||
} \
|
||||
tend = std::chrono::high_resolution_clock::now(); \
|
||||
uint64_t acum = 0; \
|
||||
for (int i = 0; i < SCRATCHPAD_LENGTH; ++i) \
|
||||
acum += scratchpad[i].u64; \
|
||||
std::cout << "| " << #FUNC << " | " << std::chrono::duration<double>(tend - tstart).count() << " | " << acum << " |" << std::endl; \
|
||||
} while(false)
|
||||
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
uint64_t iterations;
|
||||
if (argc > 1) {
|
||||
if (!tryParse(argv[1], iterations))
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
iterations = 100000000;
|
||||
}
|
||||
#ifdef WINDOWS
|
||||
_setmode(_fileno(stdin), O_BINARY);
|
||||
#endif
|
||||
convertible_t input[SCRATCHPAD_LENGTH];
|
||||
|
||||
std::cout << "Reading " << sizeof(input) << " bytes from STDIN..." << std::endl;
|
||||
std::cin.read((char*)input, sizeof(input));
|
||||
|
||||
if (!std::cin) {
|
||||
std::cerr << "Insufficient input" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
convertible_t scratchpad[SCRATCHPAD_LENGTH];
|
||||
convertible_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
|
||||
r0.u64 = input[0].u64;
|
||||
r1.u64 = input[1].u64;
|
||||
r2.u64 = input[2].u64;
|
||||
r3.u64 = input[3].u64;
|
||||
r4.u64 = input[4].u64;
|
||||
r5.u64 = input[5].u64;
|
||||
r6.u64 = input[6].u64;
|
||||
r7.u64 = input[7].u64;
|
||||
|
||||
std::chrono::high_resolution_clock::time_point tstart, tend;
|
||||
|
||||
std::cout << iterations << " iterations:" << std::endl << std::endl;
|
||||
|
||||
std::cout << "| operation | time [s] | (result) |" << std::endl;
|
||||
std::cout << "|-----------|----------|----------|" << std::endl;
|
||||
|
||||
BENCHMARK(NOOP, u64);
|
||||
BENCHMARK(ADD_64, u64);
|
||||
BENCHMARK(ADD_32, u64);
|
||||
BENCHMARK(SUB_64, u64);
|
||||
BENCHMARK(SUB_32, u64);
|
||||
BENCHMARK(MUL_64, u64);
|
||||
BENCHMARK(MULH_64, u64);
|
||||
BENCHMARK(MUL_32, u64);
|
||||
BENCHMARK(IMUL_32, u64);
|
||||
BENCHMARK(IMULH_64, u64);
|
||||
BENCHMARK(DIV_64, u64);
|
||||
BENCHMARK(IDIV_64, u64);
|
||||
BENCHMARK(AND_64, u64);
|
||||
BENCHMARK(AND_32, u64);
|
||||
BENCHMARK(OR_64, u64);
|
||||
BENCHMARK(OR_32, u64);
|
||||
BENCHMARK(XOR_64, u64);
|
||||
BENCHMARK(XOR_32, u64);
|
||||
BENCHMARK(SHL_64, u64);
|
||||
BENCHMARK(SHR_64, u64);
|
||||
BENCHMARK(SAR_64, u64);
|
||||
BENCHMARK(ROR_64, u64);
|
||||
BENCHMARK(ROL_64, u64);
|
||||
|
||||
init_FPU();
|
||||
|
||||
BENCHMARK(FNOOP, f64);
|
||||
BENCHMARK(FADD, f64);
|
||||
BENCHMARK(FSUB, f64);
|
||||
BENCHMARK(FMUL, f64);
|
||||
BENCHMARK(FDIV, f64);
|
||||
BENCHMARK(FSQRT, f64);
|
||||
BENCHMARK(FROUND, f64);
|
||||
|
||||
return 0;
|
||||
}
|
Binary file not shown.
Binary file not shown.
595
tests/rx2c.py
595
tests/rx2c.py
|
@ -1,595 +0,0 @@
|
|||
import random
|
||||
import sys
|
||||
import os
|
||||
|
||||
PROGRAM_SIZE = 512
|
||||
INSTRUCTION_COUNT = 1024 * 1024
|
||||
INSTRUCTION_WEIGHTS = [
|
||||
("ADD_64", 16),
|
||||
("ADD_32", 8),
|
||||
("SUB_64", 16),
|
||||
("SUB_32", 8),
|
||||
("MUL_64", 7),
|
||||
("MULH_64", 7),
|
||||
("MUL_32", 7),
|
||||
("IMUL_32", 7),
|
||||
("IMULH_64", 7),
|
||||
("DIV_64", 1),
|
||||
("IDIV_64", 1),
|
||||
("AND_64", 4),
|
||||
("AND_32", 3),
|
||||
("OR_64", 4),
|
||||
("OR_32", 3),
|
||||
("XOR_64", 4),
|
||||
("XOR_32", 3),
|
||||
("SHL_64", 6),
|
||||
("SHR_64", 6),
|
||||
("SAR_64", 6),
|
||||
("ROL_64", 9),
|
||||
("ROR_64", 9),
|
||||
("FADD", 22),
|
||||
("FSUB", 22),
|
||||
("FMUL", 22),
|
||||
("FDIV", 8),
|
||||
("FSQRT", 6),
|
||||
("FROUND", 2),
|
||||
("CALL", 17),
|
||||
("RET", 15),
|
||||
]
|
||||
|
||||
def genBytes(count):
|
||||
return ', '.join(str(random.getrandbits(8)) for i in range(count))
|
||||
|
||||
class OperandType:
|
||||
INT32 = 0
|
||||
UINT32 = 1
|
||||
INT64 = 2
|
||||
UINT64 = 3
|
||||
FLOAT = 4
|
||||
SHIFT = 5
|
||||
|
||||
def declareType(type):
|
||||
converters = {
|
||||
0: "int32_t",
|
||||
1: "uint32_t",
|
||||
2: "int64_t",
|
||||
3: "uint64_t",
|
||||
4: "double",
|
||||
5: "int32_t"
|
||||
}
|
||||
return converters.get(type)
|
||||
|
||||
def toSigned32(x):
|
||||
return x - ((x & 0x80000000) << 1)
|
||||
|
||||
def toSigned64(x):
|
||||
return x - ((x & 0x8000000000000000) << 1)
|
||||
|
||||
def immediateTo(symbol, type):
|
||||
converters = {
|
||||
0: toSigned32(symbol.imm1),
|
||||
1: symbol.imm1,
|
||||
2: toSigned32(symbol.imm1),
|
||||
3: symbol.imm1,
|
||||
4: float(toSigned32(symbol.imm1) << 32),
|
||||
5: symbol.imm0 & 63
|
||||
}
|
||||
return repr(converters.get(type))
|
||||
|
||||
def registerTo(expr, type):
|
||||
converters = {
|
||||
0: "(int64_t){0}",
|
||||
1: "{0}",
|
||||
2: "(int64_t){0}",
|
||||
3: "{0}",
|
||||
4: "{0}",
|
||||
5: "({0} & 63)"
|
||||
}
|
||||
return converters.get(type).format(expr)
|
||||
|
||||
def registerFrom(num, type):
|
||||
converters = {
|
||||
0: "r{0}",
|
||||
1: "r{0}",
|
||||
2: "r{0}",
|
||||
3: "r{0}",
|
||||
4: "((convertible_t)f{0}).u64",
|
||||
5: "r{0}"
|
||||
}
|
||||
return converters.get(type).format(num)
|
||||
|
||||
def convertibleTo(expr, type):
|
||||
converters = {
|
||||
0: "{0}.i32",
|
||||
1: "{0}.u32",
|
||||
2: "{0}.i64",
|
||||
3: "{0}.u64",
|
||||
4: "(double){0}.i64",
|
||||
5: "({0}.u64 & 63)"
|
||||
}
|
||||
return converters.get(type).format(expr)
|
||||
|
||||
def convertibleFrom(expr, type):
|
||||
converters = {
|
||||
0: "{0}.i32",
|
||||
1: "{0}.u32",
|
||||
2: "{0}.i64",
|
||||
3: "{0}.u64",
|
||||
4: "{0}.f64",
|
||||
5: "({0}.u64 & 63)"
|
||||
}
|
||||
return converters.get(type).format(expr)
|
||||
|
||||
def getRegister(num, type):
|
||||
registers = {
|
||||
0: "r{0}",
|
||||
1: "r{0}",
|
||||
2: "r{0}",
|
||||
3: "r{0}",
|
||||
4: "f{0}",
|
||||
5: "r{0}"
|
||||
}
|
||||
return registers.get(type).format(num)
|
||||
|
||||
def writeInitialValues(file):
|
||||
file.write("#ifdef RAM\n")
|
||||
file.write("\tmmu.buffer = (char*)_mm_malloc(DRAM_SIZE, 16);\n")
|
||||
file.write("\tif(!mmu.buffer) {\n")
|
||||
file.write('\t\tprintf("DRAM buffer allocation failed\\n");\n')
|
||||
file.write("\t\treturn 1;\n")
|
||||
file.write("\t}\n")
|
||||
file.write('\tprintf("Initializing DRAM buffer...\\n");\n')
|
||||
file.write("\taesInitialize((__m128i*)aesKey, (__m128i*)aesSeed, (__m128i*)mmu.buffer, DRAM_SIZE);\n")
|
||||
file.write("#endif\n")
|
||||
file.write("\tclock_t clockStart = clock(), clockEnd;\n")
|
||||
for i in range(8):
|
||||
file.write("\tr{0} = *(uint64_t*)(aesSeed + {1});\n".format(i, i * 8))
|
||||
for i in range(8):
|
||||
file.write("\tf{0} = *(int64_t*)(aesSeed + {1});\n".format(i, 64 + i * 8))
|
||||
file.write("\taesInitialize((__m128i*)aesKey, (__m128i*)aesSeed, (__m128i*)scratchpad, SCRATCHPAD_SIZE);\n")
|
||||
file.write("\tmmu.ma = *(addr_t*)(aesKey + 8) & ~7U;\n")
|
||||
file.write("#ifdef PRNTADDR\n")
|
||||
file.write('\tprintf("DRAM address = %#010x\\n", mmu.ma);\n')
|
||||
file.write("#endif\n")
|
||||
file.write("\tmmu.mx = 0;\n")
|
||||
file.write("\tsp = 0;\n")
|
||||
file.write("\tic = {0};\n".format(INSTRUCTION_COUNT))
|
||||
file.write("\tmxcsr = (_mm_getcsr() | _MM_FLUSH_ZERO_ON) & ~_MM_ROUND_MASK; //flush denormals to zero, round to nearest\n")
|
||||
file.write("\t_mm_setcsr(mxcsr);\n")
|
||||
|
||||
def writeEpilog(file):
|
||||
file.write("\tend:\n")
|
||||
file.write("\t\tclockEnd = clock();\n")
|
||||
for i in range(8):
|
||||
file.write('\t\tprintf("r{0} = %-36" PRIu64 " f{0} = %g\\n", r{0}, f{0});\n'.format(i))
|
||||
file.write(("\t\tuint64_t spadsum = 0;\n"
|
||||
"\t\tfor(int i = 0; i < SCRATCHPAD_LENGTH; ++i) {\n"
|
||||
"\t\t spadsum += scratchpad[i].u64;\n"
|
||||
"\t\t}\n"
|
||||
'\t\tprintf("scratchpad sum = %" PRIu64 "\\n", spadsum);\n'
|
||||
'\t\tprintf("runtime: %f\\n", (clockEnd - clockStart) / (double)CLOCKS_PER_SEC);\n'
|
||||
"#ifdef RAM\n"
|
||||
"\t\t_mm_free((void*)mmu.buffer);\n"
|
||||
"#endif\n"))
|
||||
file.write("\t\treturn 0;")
|
||||
file.write("}")
|
||||
|
||||
def writeCommon(file, i, symbol, type, name):
|
||||
file.write("\ti_{0}: {{ //{1}\n".format(i, name))
|
||||
file.write("\t\tif(0 == ic--) goto end;\n")
|
||||
file.write("\t\tr{0} ^= {1};\n".format(symbol.rega, symbol.addr0))
|
||||
file.write("\t\taddr_t addr = r{0};\n".format(symbol.rega))
|
||||
|
||||
def readA(symbol, type):
|
||||
location = {
|
||||
0: "readDram(&mmu, addr)",
|
||||
1: "readDram(&mmu, addr)",
|
||||
2: "readDram(&mmu, addr)",
|
||||
3: "readDram(&mmu, addr)",
|
||||
4: "SCRATCHPAD_256K(addr)",
|
||||
5: "SCRATCHPAD_16K(addr)",
|
||||
6: "SCRATCHPAD_16K(addr)",
|
||||
7: "SCRATCHPAD_16K(addr)",
|
||||
}
|
||||
return convertibleTo(location.get(symbol.loca), type)
|
||||
|
||||
def writeC(symbol, type):
|
||||
location = {
|
||||
0: "SCRATCHPAD_256K(r{0} ^ {1})",
|
||||
1: "SCRATCHPAD_16K(r{0} ^ {1})",
|
||||
2: "SCRATCHPAD_16K(r{0} ^ {1})",
|
||||
3: "SCRATCHPAD_16K(r{0} ^ {1})",
|
||||
4: "",
|
||||
5: "",
|
||||
6: "",
|
||||
7: ""
|
||||
}
|
||||
c = location.get(symbol.locc)
|
||||
if c == "":
|
||||
c = getRegister(symbol.regc, type)
|
||||
else:
|
||||
c = convertibleFrom(c.format(symbol.regc, symbol.addr1), type)
|
||||
return c
|
||||
|
||||
def readB(symbol, type):
|
||||
if symbol.locb < 6:
|
||||
return registerTo(getRegister(symbol.regb, type), type)
|
||||
else:
|
||||
return immediateTo(symbol, type)
|
||||
|
||||
class CodeSymbol:
|
||||
def __init__(self, qi):
|
||||
self.opcode = qi & 255
|
||||
self.loca = (qi >> 8) & 7
|
||||
self.rega = (qi >> 16) & 7
|
||||
self.locb = (qi >> 24) & 7
|
||||
self.regb = (qi >> 32) & 7
|
||||
self.locc = (qi >> 40) & 7
|
||||
self.regc = (qi >> 48) & 7
|
||||
self.imm0 = (qi >> 56) & 255
|
||||
self.addr0 = (qi >> 64) & 0xFFFFFFFF
|
||||
self.addr1 = self.imm1 = qi >> 96
|
||||
|
||||
def writeOperation(file, i, symbol, type, name, op):
|
||||
writeCommon(file, i, symbol, type, name)
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = A {1} B; }}\n".format(writeC(symbol, type), op))
|
||||
|
||||
def write_ADD_64(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT64, 'ADD_64', '+');
|
||||
|
||||
def write_ADD_32(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT32, 'ADD_32', '+');
|
||||
|
||||
def write_SUB_64(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT64, 'SUB_64', '-');
|
||||
|
||||
def write_SUB_32(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT32, 'SUB_32', '-');
|
||||
|
||||
def write_MUL_64(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT64, 'MUL_64', '*');
|
||||
|
||||
def write_MULH_64(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'MULH_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = ((uint128_t)A * B) >> 64; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_MUL_32(file, i, symbol):
|
||||
type = OperandType.UINT32
|
||||
writeCommon(file, i, symbol, type, 'MUL_32')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = (uint64_t)A * B; }}\n".format(writeC(symbol, OperandType.UINT64)))
|
||||
|
||||
def write_IMUL_32(file, i, symbol):
|
||||
type = OperandType.INT32
|
||||
writeCommon(file, i, symbol, type, 'IMUL_32')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = (int64_t)A * B; }}\n".format(writeC(symbol, OperandType.INT64)))
|
||||
|
||||
def write_IMULH_64(file, i, symbol):
|
||||
type = OperandType.INT64
|
||||
writeCommon(file, i, symbol, type, 'IMULH_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = ((int128_t)A * B) >> 64; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_DIV_64(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'DIV_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.UINT32), readB(symbol, OperandType.UINT32)))
|
||||
file.write("\t\tif(B == 0) B = 1;\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = A / B; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_IDIV_64(file, i, symbol):
|
||||
type = OperandType.INT64
|
||||
writeCommon(file, i, symbol, type, 'IDIV_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.INT32), readB(symbol, OperandType.INT32)))
|
||||
file.write("\t\tif(B == 0) B = 1;\n".format(declareType(type), readB(symbol, type)))
|
||||
file.write("\t\t{0} = A / B; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_AND_64(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT64, 'AND_64', '&');
|
||||
|
||||
def write_AND_32(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT32, 'AND_32', '&');
|
||||
|
||||
def write_OR_64(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT64, 'OR_64', '|');
|
||||
|
||||
def write_OR_32(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT32, 'OR_32', '|');
|
||||
|
||||
def write_XOR_64(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT64, 'XOR_64', '^');
|
||||
|
||||
def write_XOR_32(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.UINT32, 'XOR_32', '^');
|
||||
|
||||
def write_SHL_64(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'SHL_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||||
file.write("\t\t{0} = A << B; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_SHR_64(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'SHR_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||||
file.write("\t\t{0} = A >> B; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_SAR_64(file, i, symbol):
|
||||
type = OperandType.INT64
|
||||
writeCommon(file, i, symbol, type, 'SAR_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||||
file.write("\t\t{0} = A >> B; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_ROL_64(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'ROL_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||||
file.write("\t\t{0} = __rolq(A, B); }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_ROR_64(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'ROR_64')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} B = {1};\n".format(declareType(OperandType.SHIFT), readB(symbol, OperandType.SHIFT)))
|
||||
file.write("\t\t{0} = __rorq(A, B); }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_FADD(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.FLOAT, 'FADD', '+');
|
||||
|
||||
def write_FSUB(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.FLOAT, 'FSUB', '-');
|
||||
|
||||
def write_FMUL(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.FLOAT, 'FMUL', '*');
|
||||
|
||||
def write_FDIV(file, i, symbol):
|
||||
writeOperation(file, i, symbol, OperandType.FLOAT, 'FDIV', '/');
|
||||
|
||||
def write_FSQRT(file, i, symbol):
|
||||
type = OperandType.FLOAT
|
||||
writeCommon(file, i, symbol, type, 'FSQRT')
|
||||
file.write("\t\t{0} A = fabs({1});\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\t{0} = _mm_cvtsd_f64(_mm_sqrt_sd(_mm_setzero_pd(), _mm_load_pd(&A))); }}\n".format(writeC(symbol, type)))
|
||||
|
||||
def write_FROUND(file, i, symbol):
|
||||
type = OperandType.FLOAT
|
||||
writeCommon(file, i, symbol, type, 'FROUND')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(OperandType.INT64), readA(symbol, OperandType.INT64)))
|
||||
file.write("\t\t{0} = A;\n".format(writeC(symbol, type)))
|
||||
file.write("\t\t_mm_setcsr(mxcsr | ((uint32_t)(A << 13) & _MM_ROUND_MASK)); }\n")
|
||||
|
||||
def write_CALL(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'CALL')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
if symbol.locb < 6:
|
||||
file.write("\t\tif((uint32_t)r{0} <= {1}) {{\n".format(symbol.regb, symbol.imm1))
|
||||
file.write("\t\t\tPUSH_VALUE(A);\n");
|
||||
file.write("\t\t\tPUSH_ADDRESS(&&i_{0});\n".format((i + 1) & (PROGRAM_SIZE - 1)));
|
||||
file.write("\t\t\tgoto i_{0};\n".format((i + 1 + (symbol.imm0 & ((PROGRAM_SIZE >> 2) - 1))) & (PROGRAM_SIZE - 1)));
|
||||
if symbol.locb < 6:
|
||||
file.write("\t\t}}\n\t\t{0} = A;".format(writeC(symbol, type)))
|
||||
file.write("\t\t}\n")
|
||||
|
||||
def write_RET(file, i, symbol):
|
||||
type = OperandType.UINT64
|
||||
writeCommon(file, i, symbol, type, 'RET')
|
||||
file.write("\t\t{0} A = {1};\n".format(declareType(type), readA(symbol, type)))
|
||||
file.write("\t\tif(!STACK_IS_EMPTY()")
|
||||
if symbol.locb < 6:
|
||||
file.write(" && (uint32_t)r{0} <= {1}".format(symbol.regb, symbol.imm1))
|
||||
file.write(") {\n")
|
||||
file.write("\t\t\tvoid* target = POP_ADDRESS();\n")
|
||||
file.write("\t\t\tuint64_t C = POP_VALUE();\n")
|
||||
file.write("\t\t\t{0} = A ^ C;\n".format(writeC(symbol, type)))
|
||||
file.write("\t\t\tgoto *target;\n")
|
||||
file.write("\t\t}}\n\t\t{0} = A; }}\n".format(writeC(symbol, type)))
|
||||
|
||||
opcodeMap = { }
|
||||
|
||||
def buildOpcodeMap():
|
||||
functions = globals()
|
||||
totalWeight = 0;
|
||||
for instruction, weight in INSTRUCTION_WEIGHTS:
|
||||
func = functions['write_' + instruction]
|
||||
for i in range(weight):
|
||||
opcodeMap[totalWeight] = func
|
||||
totalWeight = totalWeight + 1
|
||||
assert totalWeight == 256
|
||||
|
||||
def writeCode(file, i, symbol):
|
||||
opcodeMap.get(symbol.opcode)(file, i, symbol)
|
||||
|
||||
def writeMain(file):
|
||||
file.write(('__attribute__((optimize("Os"))) int main() {\n'
|
||||
" register uint64_t r0, r1, r2, r3, r4, r5, r6, r7;\n"
|
||||
" register double f0, f1, f2, f3, f4, f5, f6, f7;\n"
|
||||
" register uint64_t ic, sp;\n"
|
||||
" stack_t stack[STACK_LENGTH];\n"
|
||||
" convertible_t scratchpad[SCRATCHPAD_LENGTH] __attribute__ ((aligned (16)));\n"
|
||||
" mmu_t mmu;\n"
|
||||
" uint32_t mxcsr;\n"
|
||||
))
|
||||
|
||||
def writeProlog(file):
|
||||
file.write(("#include <stdint.h>\n"
|
||||
"#include <time.h>\n"
|
||||
"#include <stdio.h>\n"
|
||||
"#include <x86intrin.h>\n"
|
||||
"#include <emmintrin.h>\n"
|
||||
"#include <wmmintrin.h>\n"
|
||||
"#include <math.h>\n"
|
||||
"#include <inttypes.h>\n"
|
||||
"typedef uint32_t addr_t;\n"
|
||||
"typedef unsigned __int128 uint128_t;\n"
|
||||
"typedef __int128 int128_t;\n"
|
||||
"typedef unsigned char byte;\n"
|
||||
"typedef union {\n"
|
||||
" double f64;\n"
|
||||
" int64_t i64;\n"
|
||||
" uint64_t u64;\n"
|
||||
" int32_t i32;\n"
|
||||
" uint32_t u32;\n"
|
||||
"} convertible_t;\n"
|
||||
"typedef union {\n"
|
||||
" uint64_t value;\n"
|
||||
" void* address;\n"
|
||||
"} stack_t;\n"
|
||||
"typedef struct {\n"
|
||||
" addr_t ma;\n"
|
||||
" addr_t mx;\n"
|
||||
"#ifdef RAM\n"
|
||||
" const char* buffer;\n"
|
||||
"#endif\n"
|
||||
"} mmu_t;\n"
|
||||
"#define DRAM_SIZE (1ULL << 32)\n"
|
||||
"#define SCRATCHPAD_SIZE (256 * 1024)\n"
|
||||
"#define SCRATCHPAD_LENGTH (SCRATCHPAD_SIZE / sizeof(convertible_t))\n"
|
||||
"#define SCRATCHPAD_MASK14 (16 * 1024 / sizeof(convertible_t) - 1)\n"
|
||||
"#define SCRATCHPAD_MASK18 (SCRATCHPAD_LENGTH - 1)\n"
|
||||
"#define SCRATCHPAD_16K(x) scratchpad[(x) & SCRATCHPAD_MASK14]\n"
|
||||
"#define SCRATCHPAD_256K(x) scratchpad[(x) & SCRATCHPAD_MASK18]\n"
|
||||
"#define STACK_LENGTH (128 * 1024)\n"
|
||||
"#ifdef RAM\n"
|
||||
"#define DRAM_READ(mmu) (convertible_t)*(uint64_t*)((mmu)->buffer + (mmu)->ma)\n"
|
||||
"#define PREFETCH(mmu) _mm_prefetch(((mmu)->buffer + (mmu)->ma), _MM_HINT_T0)\n"
|
||||
"#else\n"
|
||||
"#define DRAM_READ(mmu) (convertible_t)(uint64_t)__rolq(6364136223846793005ULL*((mmu)->ma)+1442695040888963407ULL,32)\n"
|
||||
"#define PREFETCH(mmu)\n"
|
||||
"#endif\n"
|
||||
"#define PUSH_VALUE(x) stack[sp++].value = x\n"
|
||||
"#define PUSH_ADDRESS(x) stack[sp++].address = x\n"
|
||||
"#define STACK_IS_EMPTY() (sp == 0)\n"
|
||||
"#define POP_VALUE() stack[--sp].value\n"
|
||||
"#define POP_ADDRESS() stack[--sp].address\n"
|
||||
"static convertible_t readDram(mmu_t* mmu, addr_t addr) {\n"
|
||||
" convertible_t data;\n"
|
||||
" data = DRAM_READ(mmu);\n"
|
||||
" mmu->ma += 8;\n"
|
||||
" mmu->mx ^= addr;\n"
|
||||
" if((mmu->mx & 0x1FFF) == 0) {\n"
|
||||
"#ifdef PRNTADDR\n"
|
||||
' printf("DRAM jump %#010x -> %#010x\\n", mmu->ma, mmu->mx);\n'
|
||||
"#endif\n"
|
||||
" mmu->ma = mmu->mx;\n"
|
||||
"#ifdef PREF\n"
|
||||
" PREFETCH(mmu);\n"
|
||||
"#endif\n"
|
||||
" }\n"
|
||||
" return data;\n"
|
||||
"}\n"
|
||||
"static inline __m128i sl_xor(__m128i tmp1) {\n"
|
||||
" __m128i tmp4;\n"
|
||||
" tmp4 = _mm_slli_si128(tmp1, 0x04);\n"
|
||||
" tmp1 = _mm_xor_si128(tmp1, tmp4);\n"
|
||||
" tmp4 = _mm_slli_si128(tmp4, 0x04);\n"
|
||||
" tmp1 = _mm_xor_si128(tmp1, tmp4);\n"
|
||||
" tmp4 = _mm_slli_si128(tmp4, 0x04);\n"
|
||||
" tmp1 = _mm_xor_si128(tmp1, tmp4);\n"
|
||||
" return tmp1;\n"
|
||||
"}\n"
|
||||
"#define AES_GENKEY_SUB(rcon) do { \\\n"
|
||||
" __m128i xout1 = _mm_aeskeygenassist_si128(xout2, rcon); \\\n"
|
||||
" xout1 = _mm_shuffle_epi32(xout1, 0xFF); \\\n"
|
||||
" xout0 = sl_xor(xout0); \\\n"
|
||||
" xout0 = _mm_xor_si128(xout0, xout1); \\\n"
|
||||
" xout1 = _mm_aeskeygenassist_si128(xout0, 0x00); \\\n"
|
||||
" xout1 = _mm_shuffle_epi32(xout1, 0xAA); \\\n"
|
||||
" xout2 = sl_xor(xout2); \\\n"
|
||||
" xout2 = _mm_xor_si128(xout2, xout1); } while(0)\n"
|
||||
"static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9) {\n"
|
||||
" __m128i xout0, xout2;\n"
|
||||
" xout0 = _mm_load_si128(memory);\n"
|
||||
" xout2 = _mm_load_si128(memory+1);\n"
|
||||
" *k0 = xout0;\n"
|
||||
" *k1 = xout2;\n"
|
||||
" AES_GENKEY_SUB(0x01);\n"
|
||||
" *k2 = xout0;\n"
|
||||
" *k3 = xout2;\n"
|
||||
" AES_GENKEY_SUB(0x02);\n"
|
||||
" *k4 = xout0;\n"
|
||||
" *k5 = xout2;\n"
|
||||
" AES_GENKEY_SUB(0x04);\n"
|
||||
" *k6 = xout0;\n"
|
||||
" *k7 = xout2;\n"
|
||||
" AES_GENKEY_SUB(0x08);\n"
|
||||
" *k8 = xout0;\n"
|
||||
" *k9 = xout2;\n"
|
||||
"}\n"
|
||||
"static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) {\n"
|
||||
" *x0 = _mm_aesenc_si128(*x0, key);\n"
|
||||
" *x1 = _mm_aesenc_si128(*x1, key);\n"
|
||||
" *x2 = _mm_aesenc_si128(*x2, key);\n"
|
||||
" *x3 = _mm_aesenc_si128(*x3, key);\n"
|
||||
" *x4 = _mm_aesenc_si128(*x4, key);\n"
|
||||
" *x5 = _mm_aesenc_si128(*x5, key);\n"
|
||||
" *x6 = _mm_aesenc_si128(*x6, key);\n"
|
||||
" *x7 = _mm_aesenc_si128(*x7, key);\n"
|
||||
"}\n"
|
||||
"static void aesInitialize(__m128i* key, __m128i* seed, __m128i* output, size_t count) {\n"
|
||||
" \n"
|
||||
" __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;\n"
|
||||
" __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;\n"
|
||||
" \n"
|
||||
" aes_genkey(key, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);\n"
|
||||
" \n"
|
||||
" xin0 = _mm_load_si128(seed + 0);\n"
|
||||
" xin1 = _mm_load_si128(seed + 1);\n"
|
||||
" xin2 = _mm_load_si128(seed + 2);\n"
|
||||
" xin3 = _mm_load_si128(seed + 3);\n"
|
||||
" xin4 = _mm_load_si128(seed + 4);\n"
|
||||
" xin5 = _mm_load_si128(seed + 5);\n"
|
||||
" xin6 = _mm_load_si128(seed + 6);\n"
|
||||
" xin7 = _mm_load_si128(seed + 7);\n"
|
||||
" \n"
|
||||
" for (size_t i = 0; i < count / sizeof(__m128i); i += 8)\n"
|
||||
" {\n"
|
||||
" aes_round(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" aes_round(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);\n"
|
||||
" \n"
|
||||
" _mm_store_si128(output + i + 0, xin0);\n"
|
||||
" _mm_store_si128(output + i + 1, xin1);\n"
|
||||
" _mm_store_si128(output + i + 2, xin2);\n"
|
||||
" _mm_store_si128(output + i + 3, xin3);\n"
|
||||
" _mm_store_si128(output + i + 4, xin4);\n"
|
||||
" _mm_store_si128(output + i + 5, xin5);\n"
|
||||
" _mm_store_si128(output + i + 6, xin6);\n"
|
||||
" _mm_store_si128(output + i + 7, xin7);\n"
|
||||
" }\n"
|
||||
"}\n"))
|
||||
|
||||
with sys.stdout as file:
|
||||
buildOpcodeMap()
|
||||
writeProlog(file)
|
||||
file.write("const byte aesKey[32] = {{ {0} }};\n".format(genBytes(32)))
|
||||
file.write("const byte aesSeed[128] = {{ {0} }};\n".format(genBytes(128)))
|
||||
writeMain(file)
|
||||
writeInitialValues(file)
|
||||
for i in range(PROGRAM_SIZE):
|
||||
writeCode(file, i, CodeSymbol(random.getrandbits(128)))
|
||||
if PROGRAM_SIZE > 0:
|
||||
file.write("\t\tgoto i_0;\n")
|
||||
writeEpilog(file)
|
|
@ -1,69 +0,0 @@
|
|||
//RandomX ALU + FPU test
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
constexpr int RoundToNearest = 0;
|
||||
constexpr int RoundDown = 1;
|
||||
constexpr int RoundUp = 2;
|
||||
constexpr int RoundToZero = 3;
|
||||
|
||||
typedef union {
|
||||
double f64;
|
||||
int64_t i64;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
uint32_t u32;
|
||||
} convertible_t;
|
||||
|
||||
extern "C" {
|
||||
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void FPINIT();
|
||||
void FADD_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FSUB_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FMUL_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FDIV_64(convertible_t& a, double b, convertible_t& c);
|
||||
void FABSQRT(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
void FROUND(convertible_t& a, convertible_t& b, convertible_t& c);
|
||||
|
||||
inline void FADD(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FADD_64(a, (double)b.i64, c);
|
||||
}
|
||||
|
||||
inline void FSUB(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FSUB_64(a, (double)b.i64, c);
|
||||
}
|
||||
|
||||
inline void FMUL(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FMUL_64(a, (double)b.i64, c);
|
||||
}
|
||||
|
||||
inline void FDIV(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
FDIV_64(a, (double)b.i64, c);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,247 +0,0 @@
|
|||
//RandomX ALU + FPU test
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include "Instructions.h"
|
||||
#include <cfenv>
|
||||
#include <cmath>
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
typedef unsigned __int128 uint128_t;
|
||||
typedef __int128 int128_t;
|
||||
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
|
||||
return ((uint128_t)a * b) >> 64;
|
||||
}
|
||||
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
|
||||
return ((int128_t)a * b) >> 64;
|
||||
}
|
||||
#define umulhi64 __umulhi64
|
||||
#define imulhi64 __imulhi64
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define HAS_VALUE(X) X ## 0
|
||||
#define EVAL_DEFINE(X) HAS_VALUE(X)
|
||||
#include <intrin.h>
|
||||
#include <stdlib.h>
|
||||
#define ror64 _rotr64
|
||||
#define rol64 _rotl64
|
||||
#if EVAL_DEFINE(__MACHINEARM64_X64(1))
|
||||
#define umulhi64 __umulh
|
||||
#endif
|
||||
#if EVAL_DEFINE(__MACHINEX64(1))
|
||||
static inline uint64_t __imulhi64(int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
_mul128(a, b, &hi);
|
||||
return hi;
|
||||
}
|
||||
#define imulhi64 __imulhi64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ror64
|
||||
static inline uint64_t __ror64(uint64_t a, int b) {
|
||||
return (a >> b) | (a << (64 - b));
|
||||
}
|
||||
#define ror64 __ror64
|
||||
#endif
|
||||
|
||||
#ifndef rol64
|
||||
static inline uint64_t __rol64(uint64_t a, int b) {
|
||||
return (a << b) | (a >> (64 - b));
|
||||
}
|
||||
#define rol64 __rol64
|
||||
#endif
|
||||
|
||||
#ifndef sar64
|
||||
#include <type_traits>
|
||||
constexpr int64_t builtintShr64(int64_t value, int shift) noexcept {
|
||||
return value >> shift;
|
||||
}
|
||||
|
||||
struct UsesArithmeticShift : std::integral_constant<bool, builtintShr64(-1LL, 1) == -1LL> {
|
||||
};
|
||||
|
||||
static inline int64_t __sar64(int64_t a, int b) {
|
||||
return UsesArithmeticShift::value ? builtintShr64(a, b) : (a < 0 ? ~(~a >> b) : a >> b);
|
||||
}
|
||||
#define sar64 __sar64
|
||||
#endif
|
||||
|
||||
#ifndef umulhi64
|
||||
#define LO(x) ((x)&0xffffffff)
|
||||
#define HI(x) ((x)>>32)
|
||||
static inline uint64_t __umulhi64(uint64_t a, uint64_t b) {
|
||||
uint64_t ah = HI(a), al = LO(a);
|
||||
uint64_t bh = HI(b), bl = LO(b);
|
||||
uint64_t x00 = al * bl;
|
||||
uint64_t x01 = al * bh;
|
||||
uint64_t x10 = ah * bl;
|
||||
uint64_t x11 = ah * bh;
|
||||
uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
|
||||
uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
|
||||
uint64_t m3 = HI(x11) + HI(m2);
|
||||
|
||||
return (m3 << 32) + LO(m2);
|
||||
}
|
||||
#define umulhi64 __umulhi64
|
||||
#endif
|
||||
|
||||
#ifndef imulhi64
|
||||
static inline int64_t __imulhi64(int64_t a, int64_t b) {
|
||||
int64_t hi = umulhi64(a, b);
|
||||
if (a < 0LL) hi -= b;
|
||||
if (b < 0LL) hi -= a;
|
||||
return hi;
|
||||
}
|
||||
#define imulhi64 __imulhi64
|
||||
#endif
|
||||
|
||||
static double FlushDenormal(double x) {
|
||||
if (std::fpclassify(x) == FP_SUBNORMAL) {
|
||||
return 0;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
#define FTZ(x) FlushDenormal(x)
|
||||
|
||||
namespace RandomX {
|
||||
|
||||
extern "C" {
|
||||
|
||||
void ADD_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 + b.u64;
|
||||
}
|
||||
|
||||
void ADD_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 + b.u32;
|
||||
}
|
||||
|
||||
void SUB_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 - b.u64;
|
||||
}
|
||||
|
||||
void SUB_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 - b.u32;
|
||||
}
|
||||
|
||||
void MUL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 * b.u64;
|
||||
}
|
||||
|
||||
void MULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = umulhi64(a.u64, b.u64);
|
||||
}
|
||||
|
||||
void MUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = (uint64_t)a.u32 * b.u32;
|
||||
}
|
||||
|
||||
void IMUL_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = (int64_t)a.i32 * b.i32;
|
||||
}
|
||||
|
||||
void IMULH_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.i64 = imulhi64(a.i64, b.i64);
|
||||
}
|
||||
|
||||
void DIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 / (b.u32 != 0 ? b.u32 : 1U);
|
||||
}
|
||||
|
||||
void IDIV_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
if (a.i64 == INT64_MIN && b.i64 == -1)
|
||||
c.i64 = INT64_MIN;
|
||||
else
|
||||
c.i64 = a.i64 / (b.i32 != 0 ? b.i32 : 1);
|
||||
}
|
||||
|
||||
void AND_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 & b.u64;
|
||||
}
|
||||
|
||||
void AND_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 & b.u32;
|
||||
}
|
||||
|
||||
void OR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 | b.u64;
|
||||
}
|
||||
|
||||
void OR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 | b.u32;
|
||||
}
|
||||
|
||||
void XOR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 ^ b.u64;
|
||||
}
|
||||
|
||||
void XOR_32(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u32 ^ b.u32;
|
||||
}
|
||||
|
||||
void SHL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 << (b.u64 & 63);
|
||||
}
|
||||
|
||||
void SHR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = a.u64 >> (b.u64 & 63);
|
||||
}
|
||||
|
||||
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = sar64(a.i64, b.u64 & 63);
|
||||
}
|
||||
|
||||
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = rol64(a.u64, (b.u64 & 63));
|
||||
}
|
||||
|
||||
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.u64 = ror64(a.u64, (b.u64 & 63));
|
||||
}
|
||||
|
||||
void FPINIT() {
|
||||
fesetround(FE_TONEAREST);
|
||||
}
|
||||
|
||||
void FADD_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 + b);
|
||||
}
|
||||
|
||||
void FSUB_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 - b);
|
||||
}
|
||||
|
||||
void FMUL_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 * b);
|
||||
}
|
||||
|
||||
void FDIV_64(convertible_t& a, double b, convertible_t& c) {
|
||||
c.f64 = FTZ((double)a.i64 / b);
|
||||
}
|
||||
|
||||
void FABSQRT(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
double d = fabs((double)a.i64);
|
||||
c.f64 = FTZ(sqrt(d));
|
||||
}
|
||||
|
||||
void FROUND(convertible_t& a, convertible_t& b, convertible_t& c) {
|
||||
c.f64 = (double)a.i64;
|
||||
switch (a.u64 & 3) {
|
||||
case RoundDown:
|
||||
fesetround(FE_DOWNWARD);
|
||||
break;
|
||||
case RoundUp:
|
||||
fesetround(FE_UPWARD);
|
||||
break;
|
||||
case RoundToZero:
|
||||
fesetround(FE_TOWARDZERO);
|
||||
break;
|
||||
default:
|
||||
fesetround(FE_TONEAREST);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,276 +0,0 @@
|
|||
;RandomX ALU + FPU test
|
||||
;https://github.com/tevador/RandomX
|
||||
;License: GPL v3
|
||||
|
||||
PUBLIC ADD_64
|
||||
PUBLIC ADD_32
|
||||
PUBLIC SUB_64
|
||||
PUBLIC SUB_32
|
||||
PUBLIC MUL_64
|
||||
PUBLIC MULH_64
|
||||
PUBLIC MUL_32
|
||||
PUBLIC IMUL_32
|
||||
PUBLIC IMULH_64
|
||||
PUBLIC DIV_64
|
||||
PUBLIC IDIV_64
|
||||
PUBLIC AND_64
|
||||
PUBLIC AND_32
|
||||
PUBLIC OR_64
|
||||
PUBLIC OR_32
|
||||
PUBLIC XOR_64
|
||||
PUBLIC XOR_32
|
||||
PUBLIC SHL_64
|
||||
PUBLIC SHR_64
|
||||
PUBLIC SAR_64
|
||||
PUBLIC ROL_64
|
||||
PUBLIC ROR_64
|
||||
PUBLIC FPINIT
|
||||
PUBLIC FADD_64
|
||||
PUBLIC FSUB_64
|
||||
PUBLIC FMUL_64
|
||||
PUBLIC FDIV_64
|
||||
PUBLIC FABSQRT
|
||||
PUBLIC FROUND
|
||||
|
||||
CONST SEGMENT
|
||||
__XMMABS DB 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 07fH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 0ffH, 07fH
|
||||
CONST ENDS
|
||||
|
||||
.code
|
||||
|
||||
ADD_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
add rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ADD_64 ENDP
|
||||
|
||||
ADD_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
add eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ADD_32 ENDP
|
||||
|
||||
SUB_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
sub rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SUB_64 ENDP
|
||||
|
||||
SUB_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
sub eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SUB_32 ENDP
|
||||
|
||||
MUL_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
imul rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
MUL_64 ENDP
|
||||
|
||||
MULH_64 PROC
|
||||
mov rax, QWORD PTR [rdx]
|
||||
mul QWORD PTR [rcx]
|
||||
mov QWORD PTR [r8], rdx
|
||||
ret 0
|
||||
MULH_64 ENDP
|
||||
|
||||
MUL_32 PROC
|
||||
mov r9d, DWORD PTR [rcx]
|
||||
mov eax, DWORD PTR [rdx]
|
||||
imul r9, rax
|
||||
mov QWORD PTR [r8], r9
|
||||
ret 0
|
||||
MUL_32 ENDP
|
||||
|
||||
IMUL_32 PROC
|
||||
movsxd r9, DWORD PTR [rcx]
|
||||
movsxd rax, DWORD PTR [rdx]
|
||||
imul r9, rax
|
||||
mov QWORD PTR [r8], r9
|
||||
ret 0
|
||||
IMUL_32 ENDP
|
||||
|
||||
IMULH_64 PROC
|
||||
mov rax, QWORD PTR [rdx]
|
||||
imul QWORD PTR [rcx]
|
||||
mov QWORD PTR [r8], rdx
|
||||
ret 0
|
||||
IMULH_64 ENDP
|
||||
|
||||
DIV_64 PROC
|
||||
mov r9d, DWORD PTR [rdx]
|
||||
mov eax, 1
|
||||
test r9d, r9d
|
||||
cmovne eax, r9d
|
||||
xor edx, edx
|
||||
mov r9d, eax
|
||||
mov rax, QWORD PTR [rcx]
|
||||
div r9
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
DIV_64 ENDP
|
||||
|
||||
IDIV_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, -9223372036854775808
|
||||
cmp rax, rcx
|
||||
jne SHORT SAFE_IDIV_64
|
||||
cmp QWORD PTR [rdx], -1
|
||||
jne SHORT SAFE_IDIV_64
|
||||
mov QWORD PTR [r8], rcx
|
||||
ret 0
|
||||
SAFE_IDIV_64:
|
||||
mov ecx, DWORD PTR [rdx]
|
||||
test ecx, ecx
|
||||
mov edx, 1
|
||||
cmovne edx, ecx
|
||||
movsxd rcx, edx
|
||||
cqo
|
||||
idiv rcx
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
IDIV_64 ENDP
|
||||
|
||||
AND_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
and rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
AND_64 ENDP
|
||||
|
||||
AND_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
and eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
AND_32 ENDP
|
||||
|
||||
OR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
or rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
OR_64 ENDP
|
||||
|
||||
OR_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
or eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
OR_32 ENDP
|
||||
|
||||
XOR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
xor rax, QWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
XOR_64 ENDP
|
||||
|
||||
XOR_32 PROC
|
||||
mov eax, DWORD PTR [rcx]
|
||||
xor eax, DWORD PTR [rdx]
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
XOR_32 ENDP
|
||||
|
||||
SHL_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
shl rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SHL_64 ENDP
|
||||
|
||||
SHR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
shr rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SHR_64 ENDP
|
||||
|
||||
SAR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
sar rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
SAR_64 ENDP
|
||||
|
||||
ROL_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
rol rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ROL_64 ENDP
|
||||
|
||||
ROR_64 PROC
|
||||
mov rax, QWORD PTR [rcx]
|
||||
mov rcx, QWORD PTR [rdx]
|
||||
ror rax, cl
|
||||
mov QWORD PTR [r8], rax
|
||||
ret 0
|
||||
ROR_64 ENDP
|
||||
|
||||
FPINIT PROC
|
||||
mov DWORD PTR [rsp+8], 40896
|
||||
ldmxcsr DWORD PTR [rsp+8]
|
||||
ret 0
|
||||
FPINIT ENDP
|
||||
|
||||
FADD_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
addsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FADD_64 ENDP
|
||||
|
||||
FSUB_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
subsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FSUB_64 ENDP
|
||||
|
||||
FMUL_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
mulsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FMUL_64 ENDP
|
||||
|
||||
FDIV_64 PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
divsd xmm0, xmm1
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
ret 0
|
||||
FDIV_64 ENDP
|
||||
|
||||
FABSQRT PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
andps xmm0, XMMWORD PTR __XMMABS
|
||||
sqrtsd xmm1, xmm0
|
||||
movsd QWORD PTR [r8], xmm1
|
||||
ret 0
|
||||
FABSQRT ENDP
|
||||
|
||||
FROUND PROC
|
||||
cvtsi2sd xmm0, QWORD PTR [rcx]
|
||||
movsd QWORD PTR [r8], xmm0
|
||||
mov rax, QWORD PTR [rcx]
|
||||
shl rax, 13
|
||||
and eax, 24576
|
||||
or eax, 40896
|
||||
mov DWORD PTR [rsp+8], eax
|
||||
ldmxcsr DWORD PTR [rsp+8]
|
||||
ret 0
|
||||
FROUND ENDP
|
||||
|
||||
END
|
|
@ -1,283 +0,0 @@
|
|||
//RandomX ALU + FPU test
|
||||
//https://github.com/tevador/RandomX
|
||||
//License: GPL v3
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include "Instructions.h"
|
||||
|
||||
using namespace RandomX;
|
||||
|
||||
typedef void(*VmOperation)(convertible_t&, convertible_t&, convertible_t&);
|
||||
|
||||
double rxRound(uint32_t mode, int64_t x, int64_t y, VmOperation op) {
|
||||
convertible_t a, b, c;
|
||||
a.u64 = mode;
|
||||
FROUND(a, b, c);
|
||||
a.i64 = x;
|
||||
b.i64 = y;
|
||||
op(a, b, c);
|
||||
return c.f64;
|
||||
}
|
||||
|
||||
#define CATCH_CONFIG_MAIN
|
||||
#include "catch.hpp"
|
||||
|
||||
#define RX_EXECUTE_U64(va, vb, INST) do { \
|
||||
a.u64 = va; \
|
||||
b.u64 = vb; \
|
||||
INST(a, b, c); \
|
||||
} while(false)
|
||||
|
||||
#define RX_EXECUTE_I64(va, vb, INST) do { \
|
||||
a.i64 = va; \
|
||||
b.i64 = vb; \
|
||||
INST(a, b, c); \
|
||||
} while(false)
|
||||
|
||||
TEST_CASE("Integer addition (64-bit)", "[ADD_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_64);
|
||||
REQUIRE(c.u64 == 0x100000000);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 0x8000000000000000, ADD_64);
|
||||
REQUIRE(c.u64 == 0x0);
|
||||
}
|
||||
|
||||
TEST_CASE("Integer addition (32-bit)", "[ADD_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xFFFFFFFF, 0x1, ADD_32);
|
||||
REQUIRE(c.u64 == 0);
|
||||
|
||||
RX_EXECUTE_U64(0xFF00000000000001, 0x0000000100000001, ADD_32);
|
||||
REQUIRE(c.u64 == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Integer subtraction (64-bit)", "[SUB_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_64);
|
||||
REQUIRE(c.u64 == 0xFFFFFFFF00000002);
|
||||
}
|
||||
|
||||
TEST_CASE("Integer subtraction (32-bit)", "[SUB_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(1, 0xFFFFFFFF, SUB_32);
|
||||
REQUIRE(c.u64 == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned multiplication (64-bit, low half)", "[MUL_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_64);
|
||||
REQUIRE(c.u64 == 0x28723424A9108E51);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned multiplication (64-bit, high half)", "[MULH_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MULH_64);
|
||||
REQUIRE(c.u64 == 0xB4676D31D2B34883);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned multiplication (32-bit x 32-bit -> 64-bit)", "[MUL_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, MUL_32);
|
||||
REQUIRE(c.u64 == 0xB001AA5FA9108E51);
|
||||
}
|
||||
|
||||
TEST_CASE("Signed multiplication (32-bit x 32-bit -> 64-bit)", "[IMUL_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMUL_32);
|
||||
REQUIRE(c.u64 == 0x03EBA0C1A9108E51);
|
||||
}
|
||||
|
||||
TEST_CASE("Signed multiplication (64-bit, high half)", "[IMULH_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xBC550E96BA88A72B, 0xF5391FA9F18D6273, IMULH_64);
|
||||
REQUIRE(c.u64 == 0x02D93EF1269D3EE5);
|
||||
}
|
||||
|
||||
TEST_CASE("Unsigned division (64-bit / 32-bit -> 32-bit)", "[DIV_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 3014068202, DIV_64);
|
||||
REQUIRE(c.u64 == 2911087818);
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 0, DIV_64);
|
||||
REQUIRE(c.u64 == 8774217225983458895);
|
||||
|
||||
RX_EXECUTE_U64(3014068202, 8774217225983458895, DIV_64);
|
||||
REQUIRE(c.u64 == 2);
|
||||
}
|
||||
|
||||
TEST_CASE("Signed division (64-bit / 32-bit -> 32-bit)", "[IDIV_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 3014068202, IDIV_64);
|
||||
REQUIRE(c.u64 == 0xFFFFFFFE67B4994E);
|
||||
|
||||
RX_EXECUTE_U64(8774217225983458895, 0, IDIV_64);
|
||||
REQUIRE(c.u64 == 8774217225983458895);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 0xFFFFFFFFFFFFFFFF, IDIV_64);
|
||||
REQUIRE(c.u64 == 0x8000000000000000);
|
||||
|
||||
RX_EXECUTE_U64(0xFFFFFFFFB3A707EA, 8774217225983458895, IDIV_64);
|
||||
REQUIRE(c.u64 == 0xFFFFFFFFFFFFFFFF);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise AND (64-bit)", "[AND_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_64);
|
||||
REQUIRE(c.u64 == 0x8888888888888888);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise AND (32-bit)", "[AND_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0xCCCCCCCCCCCCCCCC, 0xAAAAAAAAAAAAAAAA, AND_32);
|
||||
REQUIRE(c.u64 == 0x88888888);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise OR (64-bit)", "[OR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_64);
|
||||
REQUIRE(c.u64 == 0xEEEEEEEEEEEEEEEE);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise OR (32-bit)", "[OR_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x4444444444444444, 0xAAAAAAAAAAAAAAAA, OR_32);
|
||||
REQUIRE(c.u64 == 0xEEEEEEEE);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise XOR (64-bit)", "[XOR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_64);
|
||||
REQUIRE(c.u64 == 0x2222222222222222);
|
||||
}
|
||||
|
||||
TEST_CASE("Bitwise XOR (32-bit)", "[XOR_32]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x8888888888888888, 0xAAAAAAAAAAAAAAAA, XOR_32);
|
||||
REQUIRE(c.u64 == 0x22222222);
|
||||
}
|
||||
|
||||
TEST_CASE("Logical left shift (64-bit)", "[SHL_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, SHL_64);
|
||||
REQUIRE(c.u64 == 0x30000000000000);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHL_64);
|
||||
REQUIRE(c.u64 == 6978065200108797952);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, SHL_64);
|
||||
REQUIRE(c.u64 == 0);
|
||||
}
|
||||
|
||||
TEST_CASE("Logical right shift (64-bit)", "[SHR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, SHR_64);
|
||||
REQUIRE(c.u64 == 0);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, SHR_64);
|
||||
REQUIRE(c.u64 == 110985711);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, SHR_64);
|
||||
REQUIRE(c.u64 == 0x4000000000000000);
|
||||
}
|
||||
|
||||
TEST_CASE("Arithmetic right shift (64-bit)", "[SAR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_I64(-9, 2, SAR_64);
|
||||
REQUIRE(c.i64 == -3);
|
||||
|
||||
RX_EXECUTE_I64(INT64_MIN, 63, SAR_64);
|
||||
REQUIRE(c.i64 == -1);
|
||||
|
||||
RX_EXECUTE_I64(INT64_MAX, 163768499474606398, SAR_64);
|
||||
REQUIRE(c.i64 == 1);
|
||||
}
|
||||
|
||||
TEST_CASE("Circular left shift (64-bit)", "[ROL_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, ROL_64);
|
||||
REQUIRE(c.u64 == 0x30000000000000);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROL_64);
|
||||
REQUIRE(c.u64 == 6978065200552740799);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, ROL_64);
|
||||
REQUIRE(c.u64 == 1);
|
||||
}
|
||||
|
||||
TEST_CASE("Circular right shift (64-bit)", "[ROR_64]") {
|
||||
convertible_t a, b, c;
|
||||
|
||||
RX_EXECUTE_U64(0x3, 52, ROR_64);
|
||||
REQUIRE(c.u64 == 12288);
|
||||
|
||||
RX_EXECUTE_U64(953360005391419562, 4569451684712230561, ROR_64);
|
||||
REQUIRE(c.u64 == 0xD835C455069D81EF);
|
||||
|
||||
RX_EXECUTE_U64(0x8000000000000000, 1, ROR_64);
|
||||
REQUIRE(c.u64 == 0x4000000000000000);
|
||||
}
|
||||
|
||||
TEST_CASE("Denormal numbers are flushed to zero", "[FTZ]") {
|
||||
FPINIT();
|
||||
convertible_t a, c;
|
||||
a.i64 = 1;
|
||||
FDIV_64(a, std::numeric_limits<double>::max(), c);
|
||||
REQUIRE(c.f64 == 0.0);
|
||||
}
|
||||
|
||||
TEST_CASE("IEEE-754 compliance", "[FPU]") {
|
||||
FPINIT();
|
||||
convertible_t a, c;
|
||||
|
||||
a.i64 = 1;
|
||||
FDIV_64(a, 0, c);
|
||||
REQUIRE(c.f64 == std::numeric_limits<double>::infinity());
|
||||
|
||||
a.i64 = -1;
|
||||
FDIV_64(a, 0, c);
|
||||
REQUIRE(c.f64 == -std::numeric_limits<double>::infinity());
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, 33073499373184121, -37713516328519941, &FADD) == -4640016955335824.0);
|
||||
REQUIRE(rxRound(RoundDown, 33073499373184121, -37713516328519941, &FADD) == -4640016955335824.0);
|
||||
REQUIRE(rxRound(RoundUp, 33073499373184121, -37713516328519941, &FADD) == -4640016955335812.0);
|
||||
REQUIRE(rxRound(RoundToZero, 33073499373184121, -37713516328519941, &FADD) == -4640016955335816.0);
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107858e+18);
|
||||
REQUIRE(rxRound(RoundDown, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107868e+18);
|
||||
REQUIRE(rxRound(RoundUp, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107848e+18);
|
||||
REQUIRE(rxRound(RoundToZero, -8570200862721897289, -1111111111111111119, &FSUB) == -7.4590897516107848e+18);
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, 1, -10, &FDIV) == -0.10000000000000001);
|
||||
REQUIRE(rxRound(RoundDown, 1, -10, &FDIV) == -0.10000000000000001);
|
||||
REQUIRE(rxRound(RoundUp, 1, -10, &FDIV) == -0.099999999999999992);
|
||||
REQUIRE(rxRound(RoundToZero, 1, -10, &FDIV) == -0.099999999999999992);
|
||||
|
||||
REQUIRE(rxRound(RoundToNearest, -2, 0, &FABSQRT) == 1.4142135623730951);
|
||||
REQUIRE(rxRound(RoundDown, -2, 0, &FABSQRT) == 1.4142135623730949);
|
||||
REQUIRE(rxRound(RoundUp, -2, 0, &FABSQRT) == 1.4142135623730951);
|
||||
REQUIRE(rxRound(RoundToZero, -2, 0, &FABSQRT) == 1.4142135623730949);
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,10 +0,0 @@
|
|||
CXXFLAGS=-Wall -std=c++17 -O0
|
||||
|
||||
TestAluFpu: TestAluFpu.o InstructionsPortable.o
|
||||
$(CXX) TestAluFpu.o InstructionsPortable.o -o $@
|
||||
|
||||
TestAluFpu.o: TestAluFpu.cpp
|
||||
InstructionsPortable.o: InstructionsPortable.cpp
|
||||
|
||||
clean:
|
||||
rm -f TestAluFpu TestAluFpu.o InstructionsPortable.o
|
Loading…
Reference in a new issue