aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2024-03-28 11:30:41 +0200
committerMartin Storsjö <martin@martin.st>2024-04-02 10:35:29 +0000
commit5e31720b8902ec9bcf1f3aaa9a135ee34b58af30 (patch)
tree789461c03738fdc5c1d319c54556603415f57388
parentabc8a1689fbefec880bb3c0064c66afcb1e9d4b9 (diff)
downloadlibdav1d-5e31720b8902ec9bcf1f3aaa9a135ee34b58af30.tar.gz
checkasm: Add support for the private macOS kperf API for benchmarking
On AArch64, the performance counter registers usually are restricted and not accessible from user space. On macOS, we currently use mach_absolute_time() as timer on aarch64. This measures wallclock time but with a very coarse resolution. There is a private API, kperf, that one can use for getting high precision timers though. Unfortunately, it requires running the checkasm binary as root (e.g. with sudo). Also, as it is a private, undocumented API, it can potentially change at any time. This is handled by adding a new meson build option, for switching to this timer. If the timer source in checkasm could be changed at runtime with an option, this wouldn't need to be a build time option. This allows getting benchmarks like this: mc_8tap_regular_w16_hv_8bpc_c: 1522.1 ( 1.00x) mc_8tap_regular_w16_hv_8bpc_neon: 331.8 ( 4.59x) Instead of this: mc_8tap_regular_w16_hv_8bpc_c: 9.0 ( 1.00x) mc_8tap_regular_w16_hv_8bpc_neon: 1.9 ( 4.76x) Co-authored-by: J. Dekker <jdek@itanimul.li>
-rw-r--r--meson.build2
-rw-r--r--meson_options.txt5
-rw-r--r--tests/checkasm/checkasm.c83
-rw-r--r--tests/checkasm/checkasm.h3
4 files changed, 93 insertions, 0 deletions
diff --git a/meson.build b/meson.build
index e371415..a2637ed 100644
--- a/meson.build
+++ b/meson.build
@@ -81,6 +81,8 @@ cdata.set10('TRIM_DSP_FUNCTIONS', get_option('trim_dsp') == 'true' or
# Logging option
cdata.set10('CONFIG_LOG', get_option('logging'))
+cdata.set10('CONFIG_MACOS_KPERF', get_option('macos_kperf'))
+
#
# OS/Compiler checks and defines
#
diff --git a/meson_options.txt b/meson_options.txt
index c04deff..b0b45b4 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -68,3 +68,8 @@ option('trim_dsp',
choices: ['true', 'false', 'if-release'],
value: 'if-release',
description: 'Eliminate redundant DSP functions where possible')
+
+option('macos_kperf',
+ type: 'boolean',
+ value: false,
+ description: 'Use the private macOS kperf API for benchmarking')
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 9a01da7..fd11c0d 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -54,6 +54,9 @@
#include <mach/mach_time.h>
#endif
#endif
+#if CONFIG_MACOS_KPERF
+#include <dlfcn.h>
+#endif
#define COLOR_RED 31
#define COLOR_GREEN 32
@@ -206,6 +209,82 @@ int xor128_rand(void) {
return w >> 1;
}
+#if CONFIG_MACOS_KPERF
+
+static int (*kpc_get_thread_counters)(int, unsigned int, void *);
+
+#define CFGWORD_EL0A64EN_MASK (0x20000)
+
+#define CPMU_CORE_CYCLE 0x02
+
+#define KPC_CLASS_FIXED_MASK (1 << 0)
+#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
+
+#define COUNTERS_COUNT 10
+#define CONFIG_COUNT 8
+#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
+
+static int kperf_init(void) {
+ uint64_t config[COUNTERS_COUNT] = { 0 };
+
+ void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/kperf", RTLD_LAZY);
+ if (!kperf) {
+ fprintf(stderr, "checkasm: Unable to load kperf: %s\n", dlerror());
+ return 1;
+ }
+
+ int (*kpc_force_all_ctrs_set)(int) = dlsym(kperf, "kpc_force_all_ctrs_set");
+ int (*kpc_set_counting)(uint32_t) = dlsym(kperf, "kpc_set_counting");
+ int (*kpc_set_thread_counting)(uint32_t) = dlsym(kperf, "kpc_set_thread_counting");
+ int (*kpc_set_config)(uint32_t, void *) = dlsym(kperf, "kpc_set_config");
+ uint32_t (*kpc_get_counter_count)(uint32_t) = dlsym(kperf, "kpc_get_counter_count");
+ uint32_t (*kpc_get_config_count)(uint32_t) = dlsym(kperf, "kpc_get_config_count");
+ kpc_get_thread_counters = dlsym(kperf, "kpc_get_thread_counters");
+
+ if (!kpc_get_thread_counters) {
+ fprintf(stderr, "checkasm: Unable to load kpc_get_thread_counters\n");
+ return 1;
+ }
+
+ if (!kpc_get_counter_count || kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {
+ fprintf(stderr, "checkasm: Unxpected kpc_get_counter_count\n");
+ return 1;
+ }
+ if (!kpc_get_config_count || kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
+ fprintf(stderr, "checkasm: Unxpected kpc_get_config_count\n");
+ return 1;
+ }
+
+ config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
+
+ if (!kpc_set_config || kpc_set_config(KPC_MASK, config)) {
+ fprintf(stderr, "checkasm: The kperf API needs to be run as root\n");
+ return 1;
+ }
+ if (!kpc_force_all_ctrs_set || kpc_force_all_ctrs_set(1)) {
+ fprintf(stderr, "checkasm: kpc_force_all_ctrs_set failed\n");
+ return 1;
+ }
+ if (!kpc_set_counting || kpc_set_counting(KPC_MASK)) {
+ fprintf(stderr, "checkasm: kpc_set_counting failed\n");
+ return 1;
+ }
+ if (!kpc_set_counting || kpc_set_thread_counting(KPC_MASK)) {
+ fprintf(stderr, "checkasm: kpc_set_thread_counting failed\n");
+ return 1;
+ }
+ return 0;
+}
+
+uint64_t checkasm_kperf_cycles(void) {
+ uint64_t counters[COUNTERS_COUNT];
+ if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters))
+ return -1;
+
+ return counters[0];
+}
+#endif
+
static int is_negative(const intfloat u) {
return u.i >> 31;
}
@@ -714,6 +793,10 @@ int main(int argc, char *argv[]) {
#ifdef readtime
if (state.run_mode == RUN_BENCHMARK) {
+#if CONFIG_MACOS_KPERF
+ if (kperf_init())
+ return 1;
+#endif
if (!checkasm_save_context()) {
checkasm_set_signal_handler_state(1);
readtime();
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index eeda5df..8baeec6 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -146,6 +146,9 @@ static inline uint64_t readtime(void) {
}
#define readtime readtime
#endif
+#elif CONFIG_MACOS_KPERF
+uint64_t checkasm_kperf_cycles(void);
+#define readtime() checkasm_kperf_cycles()
#elif (ARCH_AARCH64 || ARCH_ARM) && defined(__APPLE__)
#include <mach/mach_time.h>
#define readtime() mach_absolute_time()