mirror of
https://git.wownero.com/wownero/RandomWOW.git
synced 2024-08-15 00:23:14 +00:00
Branch prediction test programs
This commit is contained in:
parent
3639a5e08d
commit
8a81aa6453
6 changed files with 11833 additions and 0 deletions
2839
tests/branch_prediction/branch_always.c
Normal file
2839
tests/branch_prediction/branch_always.c
Normal file
File diff suppressed because it is too large
Load diff
2839
tests/branch_prediction/branch_mixed.c
Normal file
2839
tests/branch_prediction/branch_mixed.c
Normal file
File diff suppressed because it is too large
Load diff
2967
tests/branch_prediction/branch_predictably.c
Normal file
2967
tests/branch_prediction/branch_predictably.c
Normal file
File diff suppressed because it is too large
Load diff
2839
tests/branch_prediction/branch_randomly.c
Normal file
2839
tests/branch_prediction/branch_randomly.c
Normal file
File diff suppressed because it is too large
Load diff
16
tests/branch_prediction/makefile
Normal file
16
tests/branch_prediction/makefile
Normal file
|
@ -0,0 +1,16 @@
|
|||
all: branch_always branch_predictably branch_randomly branch_mixed
|
||||
|
||||
branch_always:
|
||||
gcc -O0 branch_always.c -o branch_always
|
||||
|
||||
branch_predictably:
|
||||
gcc -O0 branch_predictably.c -o branch_predictably
|
||||
|
||||
branch_randomly:
|
||||
gcc -O0 branch_randomly.c -o branch_randomly
|
||||
|
||||
branch_mixed:
|
||||
gcc -O0 branch_mixed.c -o branch_mixed
|
||||
|
||||
clean:
|
||||
rm branch_always branch_predictably branch_randomly branch_mixed
|
333
tests/branch_prediction/prof.h
Normal file
333
tests/branch_prediction/prof.h
Normal file
|
@ -0,0 +1,333 @@
|
|||
/*
|
||||
* Prof
|
||||
* ====
|
||||
*
|
||||
* Self-contained C/C++ profiler library for Linux.
|
||||
*
|
||||
* Prof offers a quick way to measure performance events (CPU clock cycles,
|
||||
* cache misses, branch mispredictions, etc.) of C/C++ code snippets. Prof is
|
||||
* just a wrapper around the `perf_event_open` system call, its main goal is to
|
||||
* be easy to setup and painless to use for targeted optimizations, namely, when
|
||||
* the hot spot has already been identified. In no way Prof is a replacement for
|
||||
* a fully-fledged profiler like perf, gprof, callgrind, etc.
|
||||
*
|
||||
* Please be aware that Prof uses `__attribute__((constructor))` to be as more
|
||||
* straightforward to setup as possible, so it cannot be included more than
|
||||
* once.
|
||||
*
|
||||
* Examples
|
||||
* --------
|
||||
*
|
||||
* ### Minimal
|
||||
*
|
||||
* The following snippet prints the rough number of CPU clock cycles spent in
|
||||
* executing the code between the two Prof calls:
|
||||
*
|
||||
* ```c
|
||||
* #include "prof.h"
|
||||
*
|
||||
* int main()
|
||||
* {
|
||||
* PROF_START();
|
||||
* // slow code goes here...
|
||||
* PROF_STDOUT();
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* ### Custom options
|
||||
*
|
||||
* The following snippet instead counts both read and write faults of the level
|
||||
* 1 data cache that occur in the userland code between the two Prof calls:
|
||||
*
|
||||
* ```c
|
||||
* #include <stdio.h>
|
||||
*
|
||||
* #define PROF_USER_EVENTS_ONLY
|
||||
* #define PROF_EVENT_LIST \
|
||||
* PROF_EVENT_CACHE(L1D, READ, MISS) \
|
||||
* PROF_EVENT_CACHE(L1D, WRITE, MISS)
|
||||
* #include "prof.h"
|
||||
*
|
||||
* int main()
|
||||
* {
|
||||
* uint64_t faults[2] = { 0 };
|
||||
*
|
||||
* PROF_START();
|
||||
* // slow code goes here...
|
||||
* PROF_DO(faults[index] += counter);
|
||||
*
|
||||
* // fast or uninteresting code goes here...
|
||||
*
|
||||
* PROF_START();
|
||||
* // slow code goes here...
|
||||
* PROF_DO(faults[index] += counter);
|
||||
*
|
||||
* printf("Total L1 faults: R = %lu; W = %lu\n", faults[0], faults[1]);
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* Installation
|
||||
* ------------
|
||||
*
|
||||
* Just include `prof.h`. Here is a quick way to fetch the latest version:
|
||||
*
|
||||
* wget -q https://raw.githubusercontent.com/cyrus-and/prof/master/prof.h
|
||||
*/
|
||||
#ifndef PROF_H
|
||||
#define PROF_H
|
||||
|
||||
#include <errno.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/*
|
||||
* API
|
||||
* ---
|
||||
*/
|
||||
|
||||
/*
|
||||
* Reset the counters and (re)start counting the events.
|
||||
*
|
||||
* The events to be monitored are specified by setting the `PROF_EVENT_LIST`
|
||||
* macro before including this file to a list of `PROF_EVENT_*` invocations;
|
||||
* defaults to counting the number CPU clock cycles.
|
||||
*
|
||||
* If the `PROF_USER_EVENTS_ONLY` macro is defined before including this file
|
||||
* then kernel and hypervisor events are excluded from the count.
|
||||
*/
|
||||
#define PROF_START() \
|
||||
do { \
|
||||
PROF_IOCTL_(ENABLE); \
|
||||
PROF_IOCTL_(RESET); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Specify an event to be monitored, `type` and `config` are defined in the
|
||||
* documentation of the `perf_event_open` system call.
|
||||
*/
|
||||
#define PROF_EVENT(type, config) \
|
||||
(uint32_t)(type), (uint64_t)(config),
|
||||
|
||||
/*
|
||||
* Same as `PROF_EVENT` but for hardware events; prefix `PERF_COUNT_HW_` must be
|
||||
* omitted from `config`.
|
||||
*/
|
||||
#define PROF_EVENT_HW(config) \
|
||||
PROF_EVENT(PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## config)
|
||||
|
||||
/*
|
||||
* Same as `PROF_EVENT` but for software events; prefix `PERF_COUNT_SW_` must be
|
||||
* omitted from `config`.
|
||||
*/
|
||||
#define PROF_EVENT_SW(config) \
|
||||
PROF_EVENT(PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ ## config)
|
||||
|
||||
/*
|
||||
* Same as `PROF_EVENT` but for cache events; prefixes `PERF_COUNT_HW_CACHE_`,
|
||||
* `PERF_COUNT_HW_CACHE_OP_` and `PERF_COUNT_HW_CACHE_RESULT_` must be omitted
|
||||
* from `cache`, `op` and `result`, respectively. Again `cache`, `op` and
|
||||
* `result` are defined in the documentation of the `perf_event_open` system
|
||||
* call.
|
||||
*/
|
||||
#define PROF_EVENT_CACHE(cache, op, result) \
|
||||
PROF_EVENT(PERF_TYPE_HW_CACHE, \
|
||||
(PERF_COUNT_HW_CACHE_ ## cache) | \
|
||||
(PERF_COUNT_HW_CACHE_OP_ ## op << 8) | \
|
||||
(PERF_COUNT_HW_CACHE_RESULT_ ## result << 16))
|
||||
|
||||
/*
|
||||
* Stop counting the events. The counter array can then be accessed with
|
||||
* `PROF_COUNTERS`.
|
||||
*/
|
||||
#define PROF_STOP() \
|
||||
do { \
|
||||
PROF_IOCTL_(DISABLE); \
|
||||
PROF_READ_COUNTERS_(prof_event_buf_); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Access the counter array. The order of counters is the same of the events
|
||||
* defined in `PROF_EVENT_LIST`. Elements of this array are 64 bit unsigned
|
||||
* integers.
|
||||
*/
|
||||
#define PROF_COUNTERS \
|
||||
(prof_event_buf_ + 1)
|
||||
|
||||
/*
|
||||
* Stop counting the events and execute the code provided by `block` for each
|
||||
* event. Within `code`: `index` refers to the event position index in the
|
||||
* counter array defined by `PROF_COUNTERS`; `counter` is the actual value of
|
||||
* the counter. `index` is a 64 bit unsigned integer.
|
||||
*/
|
||||
#define PROF_DO(block) \
|
||||
do { \
|
||||
uint64_t i_; \
|
||||
PROF_STOP(); \
|
||||
for (i_ = 0; i_ < prof_event_cnt_; i_++) { \
|
||||
uint64_t index = i_; \
|
||||
uint64_t counter = prof_event_buf_[i_ + 1]; \
|
||||
(void)index; \
|
||||
(void)counter; \
|
||||
block; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Same as `PROF_DO` except that `callback` is the name of a *callable* object
|
||||
* (e.g. a function) which, for each event, is be called with the two parameters
|
||||
* `index` and `counter`.
|
||||
*/
|
||||
#define PROF_CALL(callback) \
|
||||
PROF_DO(callback(index, counter))
|
||||
|
||||
/*
|
||||
* Stop counting the events and write to `file` (a stdio.h `FILE *`) as many
|
||||
* lines as are events in `PROF_EVENT_LIST`. Each line contains `index` and
|
||||
* `counter` (as defined by `PROF_DO`) separated by a tabulation character. If
|
||||
* there is only one event then `index` is omitted.
|
||||
*/
|
||||
#define PROF_FILE(file) \
|
||||
PROF_DO(if (prof_event_cnt_ > 1) { \
|
||||
fprintf((file), "%lu\t%lu\n", index, counter); \
|
||||
} else { \
|
||||
fprintf((file), "%lu\n", counter); \
|
||||
} \
|
||||
)
|
||||
|
||||
/*
|
||||
* Same as `PROF_LOG_FILE` except that `file` is `stdout`.
|
||||
*/
|
||||
#define PROF_STDOUT() \
|
||||
PROF_FILE(stdout)
|
||||
|
||||
/*
|
||||
* Same as `PROF_LOG_FILE` except that `file` is `stderr`.
|
||||
*/
|
||||
#define PROF_STDERR() \
|
||||
PROF_FILE(stderr)
|
||||
|
||||
/* DEFAULTS ----------------------------------------------------------------- */
|
||||
|
||||
#ifndef PROF_EVENT_LIST
|
||||
#ifdef PERF_COUNT_HW_REF_CPU_CYCLES /* since Linux 3.3 */
|
||||
#define PROF_EVENT_LIST PROF_EVENT_HW(REF_CPU_CYCLES)
|
||||
#else
|
||||
#define PROF_EVENT_LIST PROF_EVENT_HW(CPU_CYCLES)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* UTILITY ------------------------------------------------------------------ */
|
||||
|
||||
#define PROF_ASSERT_(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "# %s:%d: PROF error", __FILE__, __LINE__); \
|
||||
if (errno) { \
|
||||
fprintf(stderr, " (%s)", strerror(errno)); \
|
||||
} \
|
||||
printf("\n"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define PROF_IOCTL_(mode) \
|
||||
do { \
|
||||
PROF_ASSERT_(ioctl(prof_fd_, \
|
||||
PERF_EVENT_IOC_ ## mode, \
|
||||
PERF_IOC_FLAG_GROUP) != -1); \
|
||||
} while (0)
|
||||
|
||||
#define PROF_READ_COUNTERS_(buffer) \
|
||||
do { \
|
||||
const ssize_t to_read = sizeof(uint64_t) * (prof_event_cnt_ + 1); \
|
||||
PROF_ASSERT_(read(prof_fd_, buffer, to_read) == to_read); \
|
||||
} while (0)
|
||||
|
||||
/* SETUP -------------------------------------------------------------------- */
|
||||
|
||||
static int prof_fd_;
|
||||
static uint64_t prof_event_cnt_;
|
||||
static uint64_t *prof_event_buf_;
|
||||
|
||||
static void prof_init_(uint64_t dummy, ...) {
|
||||
uint32_t type;
|
||||
va_list ap;
|
||||
|
||||
prof_fd_ = -1;
|
||||
prof_event_cnt_ = 0;
|
||||
va_start(ap, dummy);
|
||||
while (type = va_arg(ap, uint32_t), type != (uint32_t)-1) {
|
||||
struct perf_event_attr pe;
|
||||
uint64_t config;
|
||||
int fd;
|
||||
|
||||
config = va_arg(ap, uint64_t);
|
||||
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
pe.read_format = PERF_FORMAT_GROUP;
|
||||
pe.type = type;
|
||||
pe.config = config;
|
||||
#ifdef PROF_USER_EVENTS_ONLY
|
||||
pe.exclude_kernel = 1;
|
||||
pe.exclude_hv = 1;
|
||||
#endif
|
||||
|
||||
fd = syscall(__NR_perf_event_open, &pe, 0, -1, prof_fd_, 0);
|
||||
PROF_ASSERT_(fd != -1);
|
||||
if (prof_fd_ == -1) {
|
||||
prof_fd_ = fd;
|
||||
}
|
||||
|
||||
prof_event_cnt_++;
|
||||
}
|
||||
va_end(ap);
|
||||
|
||||
prof_event_buf_ = (uint64_t *)malloc((prof_event_cnt_ + 1) *
|
||||
sizeof(uint64_t));
|
||||
}
|
||||
|
||||
void __attribute__((constructor)) prof_init()
|
||||
{
|
||||
prof_init_(0, PROF_EVENT_LIST /*,*/ (uint32_t)-1);
|
||||
}
|
||||
|
||||
void __attribute__((destructor)) prof_fini()
|
||||
{
|
||||
PROF_ASSERT_(close(prof_fd_) != -1);
|
||||
free(prof_event_buf_);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* License
|
||||
* -------
|
||||
*
|
||||
* Copyright (c) 2017 Andrea Cardaci <cyrus.and@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
Loading…
Reference in a new issue