aboutsummaryrefslogtreecommitdiff
path: root/src/threadpool-object.h
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2020-04-05 21:39:58 -0700
committerMarat Dukhan <maratek@google.com>2020-04-05 21:39:58 -0700
commit4694c12cac1833653ceb80de5c0564f24cd412be (patch)
tree27a788e80d8faac0c4a19429726e33d7534ad944 /src/threadpool-object.h
parentbe9c89379384a261026c8bf517ec3ed651bb171c (diff)
downloadpthreadpool-4694c12cac1833653ceb80de5c0564f24cd412be.tar.gz
Optimized pthreadpool_parallelize_* functions
Eliminate function call and division per each processed item in the multi-threaded case
Diffstat (limited to 'src/threadpool-object.h')
-rw-r--r--src/threadpool-object.h317
1 files changed, 315 insertions, 2 deletions
diff --git a/src/threadpool-object.h b/src/threadpool-object.h
index 0b52964..81e3515 100644
--- a/src/threadpool-object.h
+++ b/src/threadpool-object.h
@@ -18,6 +18,9 @@
#include <dispatch/dispatch.h>
#endif
+/* Dependencies */
+#include <fxdiv.h>
+
/* Library header */
#include <pthreadpool.h>
@@ -69,15 +72,312 @@ PTHREADPOOL_STATIC_ASSERT(sizeof(struct thread_info) % PTHREADPOOL_CACHELINE_SIZ
struct pthreadpool_1d_with_uarch_params {
/**
- * Copy of the default uarch index argument passed to a microarchitecture-aware parallelization function.
+ * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function.
*/
uint32_t default_uarch_index;
/**
- * Copy of the max uarch index argument passed to a microarchitecture-aware parallelization function.
+ * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_1d_with_uarch function.
*/
uint32_t max_uarch_index;
};
+struct pthreadpool_1d_tile_1d_params {
+ /**
+ * Copy of the range argument passed to the pthreadpool_parallelize_1d_tile_1d function.
+ */
+ size_t range;
+ /**
+ * Copy of the tile argument passed to the pthreadpool_parallelize_1d_tile_1d function.
+ */
+ size_t tile;
+};
+
+struct pthreadpool_2d_params {
+ /**
+ * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_2d function.
+ */
+ struct fxdiv_divisor_size_t range_j;
+};
+
+struct pthreadpool_2d_tile_1d_params {
+ /**
+ * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_1d function.
+ */
+ size_t range_j;
+ /**
+ * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_1d function.
+ */
+ size_t tile_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_j;
+};
+
+struct pthreadpool_2d_tile_2d_params {
+ /**
+ * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d function.
+ */
+ size_t range_i;
+ /**
+ * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d function.
+ */
+ size_t tile_i;
+ /**
+ * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d function.
+ */
+ size_t range_j;
+ /**
+ * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d function.
+ */
+ size_t tile_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_j;
+};
+
+struct pthreadpool_2d_tile_2d_with_uarch_params {
+ /**
+ * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
+ */
+ uint32_t default_uarch_index;
+ /**
+ * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
+ */
+ uint32_t max_uarch_index;
+ /**
+ * Copy of the range_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
+ */
+ size_t range_i;
+ /**
+ * Copy of the tile_i argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
+ */
+ size_t tile_i;
+ /**
+ * Copy of the range_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
+ */
+ size_t range_j;
+ /**
+ * Copy of the tile_j argument passed to the pthreadpool_parallelize_2d_tile_2d_with_uarch function.
+ */
+ size_t tile_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_j;
+};
+
+struct pthreadpool_3d_tile_2d_params {
+ /**
+ * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d function.
+ */
+ size_t range_j;
+ /**
+ * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d function.
+ */
+ size_t tile_j;
+ /**
+ * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d function.
+ */
+ size_t range_k;
+ /**
+ * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d function.
+ */
+ size_t tile_k;
+ /**
+ * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_k;
+};
+
+struct pthreadpool_3d_tile_2d_with_uarch_params {
+ /**
+ * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
+ */
+ uint32_t default_uarch_index;
+ /**
+ * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
+ */
+ uint32_t max_uarch_index;
+ /**
+ * Copy of the range_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
+ */
+ size_t range_j;
+ /**
+ * Copy of the tile_j argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
+ */
+ size_t tile_j;
+ /**
+ * Copy of the range_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
+ */
+ size_t range_k;
+ /**
+ * Copy of the tile_k argument passed to the pthreadpool_parallelize_3d_tile_2d_with_uarch function.
+ */
+ size_t tile_k;
+ /**
+ * FXdiv divisor for the divide_round_up(range_j, tile_j) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_k, tile_k) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_k;
+};
+
+struct pthreadpool_4d_tile_2d_params {
+ /**
+ * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d function.
+ */
+ size_t range_k;
+ /**
+ * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d function.
+ */
+ size_t tile_k;
+ /**
+ * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d function.
+ */
+ size_t range_l;
+ /**
+ * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d function.
+ */
+ size_t tile_l;
+ /**
+ * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d function.
+ */
+ struct fxdiv_divisor_size_t range_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_kl;
+ /**
+ * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_l;
+};
+
+struct pthreadpool_4d_tile_2d_with_uarch_params {
+ /**
+ * Copy of the default_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ uint32_t default_uarch_index;
+ /**
+ * Copy of the max_uarch_index argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ uint32_t max_uarch_index;
+ /**
+ * Copy of the range_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ size_t range_k;
+ /**
+ * Copy of the tile_k argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ size_t tile_k;
+ /**
+ * Copy of the range_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ size_t range_l;
+ /**
+ * Copy of the tile_l argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ size_t tile_l;
+ /**
+ * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_4d_tile_2d_with_uarch function.
+ */
+ struct fxdiv_divisor_size_t range_j;
+ /**
+ * FXdiv divisor for the divide_round_up(range_k, tile_k) * divide_round_up(range_l, tile_l) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_kl;
+ /**
+ * FXdiv divisor for the divide_round_up(range_l, tile_l) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_l;
+};
+
+struct pthreadpool_5d_tile_2d_params {
+ /**
+ * Copy of the range_l argument passed to the pthreadpool_parallelize_5d_tile_2d function.
+ */
+ size_t range_l;
+ /**
+ * Copy of the tile_l argument passed to the pthreadpool_parallelize_5d_tile_2d function.
+ */
+ size_t tile_l;
+ /**
+ * Copy of the range_m argument passed to the pthreadpool_parallelize_5d_tile_2d function.
+ */
+ size_t range_m;
+ /**
+ * Copy of the tile_m argument passed to the pthreadpool_parallelize_5d_tile_2d function.
+ */
+ size_t tile_m;
+ /**
+ * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_5d_tile_2d function.
+ */
+ struct fxdiv_divisor_size_t range_j;
+ /**
+ * FXdiv divisor for the range_k argument passed to the pthreadpool_parallelize_5d_tile_2d function.
+ */
+ struct fxdiv_divisor_size_t range_k;
+ /**
+ * FXdiv divisor for the divide_round_up(range_l, tile_l) * divide_round_up(range_m, tile_m) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_lm;
+ /**
+ * FXdiv divisor for the divide_round_up(range_m, tile_m) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_m;
+};
+
+struct pthreadpool_6d_tile_2d_params {
+ /**
+ * Copy of the range_k argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ size_t range_k;
+ /**
+ * Copy of the range_m argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ size_t range_m;
+ /**
+ * Copy of the tile_m argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ size_t tile_m;
+ /**
+ * Copy of the range_n argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ size_t range_n;
+ /**
+ * Copy of the tile_n argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ size_t tile_n;
+ /**
+ * FXdiv divisor for the range_j argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ struct fxdiv_divisor_size_t range_j;
+ /**
+ * FXdiv divisor for the range_k * range_l value.
+ */
+ struct fxdiv_divisor_size_t range_kl;
+ /**
+ * FXdiv divisor for the range_l argument passed to the pthreadpool_parallelize_6d_tile_2d function.
+ */
+ struct fxdiv_divisor_size_t range_l;
+ /**
+ * FXdiv divisor for the divide_round_up(range_m, tile_m) * divide_round_up(range_n, tile_n) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_mn;
+ /**
+ * FXdiv divisor for the divide_round_up(range_n, tile_n) value.
+ */
+ struct fxdiv_divisor_size_t tile_range_n;
+};
+
struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool {
#if !PTHREADPOOL_USE_GCD
/**
@@ -94,10 +394,12 @@ struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool {
*/
pthreadpool_atomic_uint32_t has_active_threads;
#endif
+#if !PTHREADPOOL_USE_GCD
/**
* The last command submitted to the thread pool.
*/
pthreadpool_atomic_uint32_t command;
+#endif
/**
* The entry point function to call for each thread in the thread pool for parallelization tasks.
*/
@@ -116,6 +418,17 @@ struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool {
*/
union {
struct pthreadpool_1d_with_uarch_params parallelize_1d_with_uarch;
+ struct pthreadpool_1d_tile_1d_params parallelize_1d_tile_1d;
+ struct pthreadpool_2d_params parallelize_2d;
+ struct pthreadpool_2d_tile_1d_params parallelize_2d_tile_1d;
+ struct pthreadpool_2d_tile_2d_params parallelize_2d_tile_2d;
+ struct pthreadpool_2d_tile_2d_with_uarch_params parallelize_2d_tile_2d_with_uarch;
+ struct pthreadpool_3d_tile_2d_params parallelize_3d_tile_2d;
+ struct pthreadpool_3d_tile_2d_with_uarch_params parallelize_3d_tile_2d_with_uarch;
+ struct pthreadpool_4d_tile_2d_params parallelize_4d_tile_2d;
+ struct pthreadpool_4d_tile_2d_with_uarch_params parallelize_4d_tile_2d_with_uarch;
+ struct pthreadpool_5d_tile_2d_params parallelize_5d_tile_2d;
+ struct pthreadpool_6d_tile_2d_params parallelize_6d_tile_2d;
} params;
/**
* Copy of the flags passed to a parallelization function.