From 29f0e2ced866e321d8293d4d1bdae71a5fdc90ec Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Mon, 30 Sep 2019 15:22:14 -0700 Subject: New pthreadpool_parallelize_* API --- bench/latency.cc | 65 +++++++++++++++++++++++++++++++++++------------------ bench/throughput.cc | 51 ++++++++++++++++++++++++++++------------- 2 files changed, 78 insertions(+), 38 deletions(-) (limited to 'bench') diff --git a/bench/latency.cc b/bench/latency.cc index f20a794..f500cdf 100644 --- a/bench/latency.cc +++ b/bench/latency.cc @@ -6,67 +6,88 @@ static void SetNumberOfThreads(benchmark::internal::Benchmark* benchmark) { - const int maxThreads = sysconf(_SC_NPROCESSORS_ONLN); - for (int t = 0; t <= maxThreads; t++) { + const int max_threads = sysconf(_SC_NPROCESSORS_ONLN); + for (int t = 1; t <= max_threads; t++) { benchmark->Arg(t); } } -static void compute_1d(void* context, size_t x) { +static void compute_1d(void*, size_t x) { } -static void pthreadpool_compute_1d(benchmark::State& state) { +static void pthreadpool_parallelize_1d(benchmark::State& state) { const uint32_t threads = static_cast(state.range(0)); - pthreadpool_t threadpool = threads == 0 ? NULL : pthreadpool_create(threads); + pthreadpool_t threadpool = pthreadpool_create(threads); while (state.KeepRunning()) { - pthreadpool_compute_1d(threadpool, compute_1d, NULL, threads); + pthreadpool_parallelize_1d( + threadpool, + compute_1d, + nullptr /* context */, + threads, + 0 /* flags */); } pthreadpool_destroy(threadpool); } -BENCHMARK(pthreadpool_compute_1d)->UseRealTime()->Apply(SetNumberOfThreads); +BENCHMARK(pthreadpool_parallelize_1d)->UseRealTime()->Apply(SetNumberOfThreads); -static void compute_1d_tiled(void* context, size_t x0, size_t xn) { +static void compute_1d_tile_1d(void*, size_t, size_t) { } -static void pthreadpool_compute_1d_tiled(benchmark::State& state) { +static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) { const uint32_t threads = static_cast(state.range(0)); - pthreadpool_t threadpool = threads == 0 ? NULL : pthreadpool_create(threads); + pthreadpool_t threadpool = pthreadpool_create(threads); while (state.KeepRunning()) { - pthreadpool_compute_1d_tiled(threadpool, compute_1d_tiled, NULL, threads, 1); + pthreadpool_parallelize_1d_tile_1d( + threadpool, + compute_1d_tile_1d, + nullptr /* context */, + threads, 1, + 0 /* flags */); } pthreadpool_destroy(threadpool); } -BENCHMARK(pthreadpool_compute_1d_tiled)->UseRealTime()->Apply(SetNumberOfThreads); +BENCHMARK(pthreadpool_parallelize_1d_tile_1d)->UseRealTime()->Apply(SetNumberOfThreads); -static void compute_2d(void* context, size_t x, size_t y) { +static void compute_2d(void*, size_t, size_t) { } -static void pthreadpool_compute_2d(benchmark::State& state) { +static void pthreadpool_parallelize_2d(benchmark::State& state) { const uint32_t threads = static_cast(state.range(0)); - pthreadpool_t threadpool = threads == 0 ? NULL : pthreadpool_create(threads); + pthreadpool_t threadpool = pthreadpool_create(threads); while (state.KeepRunning()) { - pthreadpool_compute_2d(threadpool, compute_2d, NULL, 1, threads); + pthreadpool_parallelize_2d( + threadpool, + compute_2d, + nullptr /* context */, + 1, threads, + 0 /* flags */); } pthreadpool_destroy(threadpool); } -BENCHMARK(pthreadpool_compute_2d)->UseRealTime()->Apply(SetNumberOfThreads); +BENCHMARK(pthreadpool_parallelize_2d)->UseRealTime()->Apply(SetNumberOfThreads); -static void compute_2d_tiled(void* context, size_t x0, size_t y0, size_t xn, size_t yn) { +static void compute_2d_tile_2d(void*, size_t, size_t, size_t, size_t) { } -static void pthreadpool_compute_2d_tiled(benchmark::State& state) { +static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) { const uint32_t threads = static_cast(state.range(0)); - pthreadpool_t threadpool = threads == 0 ? NULL : pthreadpool_create(threads); + pthreadpool_t threadpool = pthreadpool_create(threads); while (state.KeepRunning()) { - pthreadpool_compute_2d_tiled(threadpool, compute_2d_tiled, NULL, 1, threads, 1, 1); + pthreadpool_parallelize_2d_tile_2d( + threadpool, + compute_2d_tile_2d, + nullptr /* context */, + 1, threads, + 1, 1, + 0 /* flags */); } pthreadpool_destroy(threadpool); } -BENCHMARK(pthreadpool_compute_2d_tiled)->UseRealTime()->Apply(SetNumberOfThreads); +BENCHMARK(pthreadpool_parallelize_2d_tile_2d)->UseRealTime()->Apply(SetNumberOfThreads); BENCHMARK_MAIN(); diff --git a/bench/throughput.cc b/bench/throughput.cc index cef3442..2242ccb 100644 --- a/bench/throughput.cc +++ b/bench/throughput.cc @@ -1,80 +1,99 @@ #include -#include - #include -static void compute_1d(void* context, size_t x) { +static void compute_1d(void*, size_t) { } -static void pthreadpool_compute_1d(benchmark::State& state) { +static void pthreadpool_parallelize_1d(benchmark::State& state) { pthreadpool_t threadpool = pthreadpool_create(0); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { - pthreadpool_compute_1d(threadpool, compute_1d, NULL, items * threads); + pthreadpool_parallelize_1d( + threadpool, + compute_1d, + nullptr /* context */, + items * threads, + 0 /* flags */); } pthreadpool_destroy(threadpool); /* Do not normalize by thread */ state.SetItemsProcessed(int64_t(state.iterations()) * items); } -BENCHMARK(pthreadpool_compute_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); +BENCHMARK(pthreadpool_parallelize_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); -static void compute_1d_tiled(void* context, size_t x0, size_t xn) { +static void compute_1d_tile_1d(void*, size_t, size_t) { } -static void pthreadpool_compute_1d_tiled(benchmark::State& state) { +static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) { pthreadpool_t threadpool = pthreadpool_create(0); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { - pthreadpool_compute_1d_tiled(threadpool, compute_1d_tiled, NULL, items * threads, 1); + pthreadpool_parallelize_1d_tile_1d( + threadpool, + compute_1d_tile_1d, + nullptr /* context */, + items * threads, 1, + 0 /* flags */); } pthreadpool_destroy(threadpool); /* Do not normalize by thread */ state.SetItemsProcessed(int64_t(state.iterations()) * items); } -BENCHMARK(pthreadpool_compute_1d_tiled)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); +BENCHMARK(pthreadpool_parallelize_1d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); static void compute_2d(void* context, size_t x, size_t y) { } -static void pthreadpool_compute_2d(benchmark::State& state) { +static void pthreadpool_parallelize_2d(benchmark::State& state) { pthreadpool_t threadpool = pthreadpool_create(0); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { - pthreadpool_compute_2d(threadpool, compute_2d, NULL, threads, items); + pthreadpool_parallelize_2d( + threadpool, + compute_2d, + nullptr /* context */, + threads, items, + 0 /* flags */); } pthreadpool_destroy(threadpool); /* Do not normalize by thread */ state.SetItemsProcessed(int64_t(state.iterations()) * items); } -BENCHMARK(pthreadpool_compute_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); +BENCHMARK(pthreadpool_parallelize_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); static void compute_2d_tiled(void* context, size_t x0, size_t y0, size_t xn, size_t yn) { } -static void pthreadpool_compute_2d_tiled(benchmark::State& state) { +static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) { pthreadpool_t threadpool = pthreadpool_create(0); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { - pthreadpool_compute_2d_tiled(threadpool, compute_2d_tiled, NULL, threads, items, 1, 1); + pthreadpool_parallelize_2d_tile_2d( + threadpool, + compute_2d_tiled, + nullptr /* context */, + threads, items, + 1, 1, + 0 /* flags */); } pthreadpool_destroy(threadpool); /* Do not normalize by thread */ state.SetItemsProcessed(int64_t(state.iterations()) * items); } -BENCHMARK(pthreadpool_compute_2d_tiled)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); +BENCHMARK(pthreadpool_parallelize_2d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); BENCHMARK_MAIN(); -- cgit v1.2.3