Throughput benchmarks for 3D/4D/5D/6D parallelization with 1D and no tiling

author: Marat Dukhan <maratek@google.com> 2020-12-06 22:09:17 -0800
committer: Marat Dukhan <maratek@google.com> 2020-12-06 22:09:17 -0800
commit: 344531b40881b1ee41508a9c70c8fbbef3bd6cad (patch)
tree: ad680cf9e13508397f805dd6b3e1954db23769ce
parent: 545ebe9f225aec6dca49109516fac02e973a3de2 (diff)
download: pthreadpool-344531b40881b1ee41508a9c70c8fbbef3bd6cad.tar.gz
1 files changed, 315 insertions, 7 deletions
diff --git a/bench/throughput.cc b/bench/throughput.cc
index 2242ccb..47c8da7 100644
--- a/bench/throughput.cc
+++ b/bench/throughput.cc
@@ -7,7 +7,7 @@ static void compute_1d(void*, size_t) {
 }
 
 static void pthreadpool_parallelize_1d(benchmark::State& state) {
-	pthreadpool_t threadpool = pthreadpool_create(0);
+	pthreadpool_t threadpool = pthreadpool_create(2);
 	const size_t threads = pthreadpool_get_threads_count(threadpool);
 	const size_t items = static_cast<size_t>(state.range(0));
 	while (state.KeepRunning()) {
@@ -30,7 +30,7 @@ static void compute_1d_tile_1d(void*, size_t, size_t) {
 }
 
 static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) {
-	pthreadpool_t threadpool = pthreadpool_create(0);
+	pthreadpool_t threadpool = pthreadpool_create(2);
 	const size_t threads = pthreadpool_get_threads_count(threadpool);
 	const size_t items = static_cast<size_t>(state.range(0));
 	while (state.KeepRunning()) {
@@ -49,11 +49,11 @@ static void pthreadpool_parallelize_1d_tile_1d(benchmark::State& state) {
 BENCHMARK(pthreadpool_parallelize_1d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
 
 
-static void compute_2d(void* context, size_t x, size_t y) {
+static void compute_2d(void*, size_t, size_t) {
 }
 
 static void pthreadpool_parallelize_2d(benchmark::State& state) {
-	pthreadpool_t threadpool = pthreadpool_create(0);
+	pthreadpool_t threadpool = pthreadpool_create(2);
 	const size_t threads = pthreadpool_get_threads_count(threadpool);
 	const size_t items = static_cast<size_t>(state.range(0));
 	while (state.KeepRunning()) {
@@ -72,17 +72,41 @@ static void pthreadpool_parallelize_2d(benchmark::State& state) {
 BENCHMARK(pthreadpool_parallelize_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
 
 
-static void compute_2d_tiled(void* context, size_t x0, size_t y0, size_t xn, size_t yn) {
+static void compute_2d_tile_1d(void*, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_2d_tile_1d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_2d_tile_1d(
+			threadpool,
+			compute_2d_tile_1d,
+			nullptr /* context */,
+			threads, items,
+			1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_2d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_2d_tile_2d(void*, size_t, size_t, size_t, size_t) {
 }
 
 static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) {
-	pthreadpool_t threadpool = pthreadpool_create(0);
+	pthreadpool_t threadpool = pthreadpool_create(2);
 	const size_t threads = pthreadpool_get_threads_count(threadpool);
 	const size_t items = static_cast<size_t>(state.range(0));
 	while (state.KeepRunning()) {
 		pthreadpool_parallelize_2d_tile_2d(
 			threadpool,
-			compute_2d_tiled,
+			compute_2d_tile_2d,
 			nullptr /* context */,
 			threads, items,
 			1, 1,
@@ -96,4 +120,288 @@ static void pthreadpool_parallelize_2d_tile_2d(benchmark::State& state) {
 BENCHMARK(pthreadpool_parallelize_2d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
 
 
+static void compute_3d(void*, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_3d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_3d(
+			threadpool,
+			compute_3d,
+			nullptr /* context */,
+			1, threads, items,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_3d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_3d_tile_1d(void*, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_3d_tile_1d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_3d_tile_1d(
+			threadpool,
+			compute_3d_tile_1d,
+			nullptr /* context */,
+			1, threads, items,
+			1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_3d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_3d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_3d_tile_2d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_3d_tile_2d(
+			threadpool,
+			compute_3d_tile_2d,
+			nullptr /* context */,
+			1, threads, items,
+			1, 1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_3d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_4d(void*, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_4d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_4d(
+			threadpool,
+			compute_4d,
+			nullptr /* context */,
+			1, 1, threads, items,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_4d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_4d_tile_1d(void*, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_4d_tile_1d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_4d_tile_1d(
+			threadpool,
+			compute_4d_tile_1d,
+			nullptr /* context */,
+			1, 1, threads, items,
+			1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_4d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_4d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_4d_tile_2d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_4d_tile_2d(
+			threadpool,
+			compute_4d_tile_2d,
+			nullptr /* context */,
+			1, 1, threads, items,
+			1, 1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_4d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_5d(void*, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_5d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_5d(
+			threadpool,
+			compute_5d,
+			nullptr /* context */,
+			1, 1, 1, threads, items,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_5d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_5d_tile_1d(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_5d_tile_1d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_5d_tile_1d(
+			threadpool,
+			compute_5d_tile_1d,
+			nullptr /* context */,
+			1, 1, 1, threads, items,
+			1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_5d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_5d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_5d_tile_2d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_5d_tile_2d(
+			threadpool,
+			compute_5d_tile_2d,
+			nullptr /* context */,
+			1, 1, 1, threads, items,
+			1, 1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_5d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_6d(void*, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_6d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_6d(
+			threadpool,
+			compute_6d,
+			nullptr /* context */,
+			1, 1, 1, 1, threads, items,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_6d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_6d_tile_1d(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_6d_tile_1d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_6d_tile_1d(
+			threadpool,
+			compute_6d_tile_1d,
+			nullptr /* context */,
+			1, 1, 1, 1, threads, items,
+			1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_6d_tile_1d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
+static void compute_6d_tile_2d(void*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t) {
+}
+
+static void pthreadpool_parallelize_6d_tile_2d(benchmark::State& state) {
+	pthreadpool_t threadpool = pthreadpool_create(2);
+	const size_t threads = pthreadpool_get_threads_count(threadpool);
+	const size_t items = static_cast<size_t>(state.range(0));
+	while (state.KeepRunning()) {
+		pthreadpool_parallelize_6d_tile_2d(
+			threadpool,
+			compute_6d_tile_2d,
+			nullptr /* context */,
+			1, 1, 1, 1, threads, items,
+			1, 1,
+			0 /* flags */);
+	}
+	pthreadpool_destroy(threadpool);
+
+	/* Do not normalize by thread */
+	state.SetItemsProcessed(int64_t(state.iterations()) * items);
+}
+BENCHMARK(pthreadpool_parallelize_6d_tile_2d)->UseRealTime()->RangeMultiplier(10)->Range(10, 1000000);
+
+
 BENCHMARK_MAIN();
author	Marat Dukhan <maratek@google.com>	2020-12-06 22:09:17 -0800
committer	Marat Dukhan <maratek@google.com>	2020-12-06 22:09:17 -0800
commit	344531b40881b1ee41508a9c70c8fbbef3bd6cad (patch)
tree	ad680cf9e13508397f805dd6b3e1954db23769ce
parent	545ebe9f225aec6dca49109516fac02e973a3de2 (diff)
download	pthreadpool-344531b40881b1ee41508a9c70c8fbbef3bd6cad.tar.gz