From 344531b40881b1ee41508a9c70c8fbbef3bd6cad Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Sun, 6 Dec 2020 22:09:17 -0800 Subject: Throughput benchmarks for 3D/4D/5D/6D parallelization with 1D and no tiling --- bench/throughput.cc | 322 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 315 insertions(+), 7 deletions(-) (limited to 'bench') diff --git a/bench/throughput.cc b/bench/throughput.cc index 2242ccb..47c8da7 100644 --- a/bench/throughput.cc +++ b/bench/throughput.cc @@ -7,7 +7,7 @@ static void compute_1d(void*, size_t) { } static void pthreadpool_parallelize_1d(benchmark::State& state) { - pthreadpool_t threadpool = pthreadpool_create(0); + pthreadpool_t threadpool = pthreadpool_create(2); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { @@ -30,7 +30,7 @@ static void compute_1d_tile_1d(void*, size_t, size_t) { } static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) { - pthreadpool_t threadpool = pthreadpool_create(0); + pthreadpool_t threadpool = pthreadpool_create(2); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { @@ -49,11 +49,11 @@ static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) { BENCHMARK(pthreadpool_parallelize_1d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); -static void compute_2d(void* context, size_t x, size_t y) { +static void compute_2d(void*, size_t, size_t) { } static void pthreadpool_parallelize_2d(benchmark::State& state) { - pthreadpool_t threadpool = pthreadpool_create(0); + pthreadpool_t threadpool = pthreadpool_create(2); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { @@ -72,17 +72,41 @@ static void pthreadpool_parallelize_2d(benchmark::State& state) { BENCHMARK(pthreadpool_parallelize_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); -static void compute_2d_tiled(void* context, size_t x0, size_t y0, size_t xn, size_t yn) { +static void compute_2d_tile_1d(void*, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_2d_tile_1d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_2d_tile_1d( + threadpool, + compute_2d_tile_1d, + nullptr /* context */, + threads, items, + 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_2d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_2d_tile_2d(void*, size_t, size_t, size_t, size_t) { } static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) { - pthreadpool_t threadpool = pthreadpool_create(0); + pthreadpool_t threadpool = pthreadpool_create(2); const size_t threads = pthreadpool_get_threads_count(threadpool); const size_t items = static_cast(state.range(0)); while (state.KeepRunning()) { pthreadpool_parallelize_2d_tile_2d( threadpool, - compute_2d_tiled, + compute_2d_tile_2d, nullptr /* context */, threads, items, 1, 1, @@ -96,4 +120,288 @@ static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) { BENCHMARK(pthreadpool_parallelize_2d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); +static void compute_3d(void*, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_3d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_3d( + threadpool, + compute_3d, + nullptr /* context */, + 1, threads, items, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_3d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_3d_tile_1d(void*, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_3d_tile_1d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_3d_tile_1d( + threadpool, + compute_3d_tile_1d, + nullptr /* context */, + 1, threads, items, + 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_3d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_3d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_3d_tile_2d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_3d_tile_2d( + threadpool, + compute_3d_tile_2d, + nullptr /* context */, + 1, threads, items, + 1, 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_3d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_4d(void*, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_4d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_4d( + threadpool, + compute_4d, + nullptr /* context */, + 1, 1, threads, items, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_4d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_4d_tile_1d(void*, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_4d_tile_1d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_4d_tile_1d( + threadpool, + compute_4d_tile_1d, + nullptr /* context */, + 1, 1, threads, items, + 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_4d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_4d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_4d_tile_2d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_4d_tile_2d( + threadpool, + compute_4d_tile_2d, + nullptr /* context */, + 1, 1, threads, items, + 1, 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_4d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_5d(void*, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_5d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_5d( + threadpool, + compute_5d, + nullptr /* context */, + 1, 1, 1, threads, items, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_5d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_5d_tile_1d(void*, size_t, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_5d_tile_1d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_5d_tile_1d( + threadpool, + compute_5d_tile_1d, + nullptr /* context */, + 1, 1, 1, threads, items, + 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_5d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_5d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_5d_tile_2d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_5d_tile_2d( + threadpool, + compute_5d_tile_2d, + nullptr /* context */, + 1, 1, 1, threads, items, + 1, 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_5d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_6d(void*, size_t, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_6d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_6d( + threadpool, + compute_6d, + nullptr /* context */, + 1, 1, 1, 1, threads, items, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_6d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_6d_tile_1d(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_6d_tile_1d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_6d_tile_1d( + threadpool, + compute_6d_tile_1d, + nullptr /* context */, + 1, 1, 1, 1, threads, items, + 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_6d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + +static void compute_6d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t) { +} + +static void pthreadpool_parallelize_6d_tile_2d(benchmark::State& state) { + pthreadpool_t threadpool = pthreadpool_create(2); + const size_t threads = pthreadpool_get_threads_count(threadpool); + const size_t items = static_cast(state.range(0)); + while (state.KeepRunning()) { + pthreadpool_parallelize_6d_tile_2d( + threadpool, + compute_6d_tile_2d, + nullptr /* context */, + 1, 1, 1, 1, threads, items, + 1, 1, + 0 /* flags */); + } + pthreadpool_destroy(threadpool); + + /* Do not normalize by thread */ + state.SetItemsProcessed(int64_t(state.iterations()) * items); +} +BENCHMARK(pthreadpool_parallelize_6d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000); + + BENCHMARK_MAIN(); -- cgit v1.2.3