aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@gmail.com>2020-04-10 17:16:42 -0700
committerMarat Dukhan <maratek@gmail.com>2020-04-10 17:16:42 -0700
commitddd479bcd8ba87094132ecf438dd76eb08ee3b0d (patch)
treeb10c6742fbc0ea3c83ac014971159360b3933c40 /src
parentfa7261344568f86760231591a7158519fd43f382 (diff)
downloadpthreadpool-ddd479bcd8ba87094132ecf438dd76eb08ee3b0d.tar.gz
Replace atomic fetch_sub with decrement_fetch primitive
Decrement-fetch is a closer match to the primitive used in implementation
Diffstat (limited to 'src')
-rw-r--r--src/portable-api.c26
-rw-r--r--src/pthreads.c4
-rw-r--r--src/threadpool-atomics.h28
-rw-r--r--src/windows.c2
4 files changed, 28 insertions, 32 deletions
diff --git a/src/portable-api.c b/src/portable-api.c
index 77d2e9b..84d6eda 100644
--- a/src/portable-api.c
+++ b/src/portable-api.c
@@ -50,7 +50,7 @@ static void thread_parallelize_1d(struct pthreadpool* threadpool, struct thread_
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
task(argument, index);
}
}
@@ -90,7 +90,7 @@ static void thread_parallelize_1d_with_uarch(struct pthreadpool* threadpool, str
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
task(argument, uarch_index, index);
}
}
@@ -126,7 +126,7 @@ static void thread_parallelize_1d_tile_1d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t tile_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t tile_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const size_t tile_start = tile_index * tile;
task(argument, tile_start, min(range - tile_start, tile));
}
@@ -167,7 +167,7 @@ static void thread_parallelize_2d(struct pthreadpool* threadpool, struct thread_
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t index_i_j = fxdiv_divide_size_t(linear_index, range_j);
task(argument, index_i_j.quotient, index_i_j.remainder);
}
@@ -211,7 +211,7 @@ static void thread_parallelize_2d_tile_1d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(linear_index, tile_range_j);
const size_t start_j = tile_index_i_j.remainder * tile_j;
task(argument, tile_index_i_j.quotient, start_j, min(range_j - start_j, tile_j));
@@ -258,7 +258,7 @@ static void thread_parallelize_2d_tile_2d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(linear_index, tile_range_j);
const size_t start_i = tile_index_i_j.quotient * tile_i;
const size_t start_j = tile_index_i_j.remainder * tile_j;
@@ -315,7 +315,7 @@ static void thread_parallelize_2d_tile_2d_with_uarch(struct pthreadpool* threadp
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(linear_index, tile_range_j);
const size_t start_i = tile_index_i_j.quotient * tile_i;
const size_t start_j = tile_index_i_j.remainder * tile_j;
@@ -370,7 +370,7 @@ static void thread_parallelize_3d_tile_2d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
const size_t start_j = tile_index_i_j.remainder * tile_j;
@@ -435,7 +435,7 @@ static void thread_parallelize_3d_tile_2d_with_uarch(struct pthreadpool* threadp
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
const size_t start_j = tile_index_i_j.remainder * tile_j;
@@ -497,7 +497,7 @@ static void thread_parallelize_4d_tile_2d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
const struct fxdiv_result_size_t index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, range_j);
const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
@@ -569,7 +569,7 @@ static void thread_parallelize_4d_tile_2d_with_uarch(struct pthreadpool* threadp
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
const struct fxdiv_result_size_t index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, range_j);
const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
@@ -638,7 +638,7 @@ static void thread_parallelize_5d_tile_2d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_ijk_lm = fxdiv_divide_size_t(linear_index, tile_range_lm);
const struct fxdiv_result_size_t index_ij_k = fxdiv_divide_size_t(tile_index_ijk_lm.quotient, range_k);
const struct fxdiv_result_size_t tile_index_l_m = fxdiv_divide_size_t(tile_index_ijk_lm.remainder, tile_range_m);
@@ -716,7 +716,7 @@ static void thread_parallelize_6d_tile_2d(struct pthreadpool* threadpool, struct
{
struct thread_info* other_thread = &threadpool->threads[tid];
while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) {
- const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1;
+ const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end);
const struct fxdiv_result_size_t tile_index_ijkl_mn = fxdiv_divide_size_t(linear_index, tile_range_mn);
const struct fxdiv_result_size_t index_ij_kl = fxdiv_divide_size_t(tile_index_ijkl_mn.quotient, range_kl);
const struct fxdiv_result_size_t tile_index_m_n = fxdiv_divide_size_t(tile_index_ijkl_mn.remainder, tile_range_n);
diff --git a/src/pthreads.c b/src/pthreads.c
index e70ab18..a7e4619 100644
--- a/src/pthreads.c
+++ b/src/pthreads.c
@@ -79,13 +79,13 @@
static void checkin_worker_thread(struct pthreadpool* threadpool) {
#if PTHREADPOOL_USE_FUTEX
- if (pthreadpool_fetch_sub_relaxed_size_t(&threadpool->active_threads, 1) == 1) {
+ if (pthreadpool_decrement_fetch_relaxed_size_t(&threadpool->active_threads) == 0) {
pthreadpool_store_relaxed_uint32_t(&threadpool->has_active_threads, 0);
futex_wake_all(&threadpool->has_active_threads);
}
#else
pthread_mutex_lock(&threadpool->completion_mutex);
- if (pthreadpool_fetch_sub_relaxed_size_t(&threadpool->active_threads, 1) == 1) {
+ if (pthreadpool_decrement_fetch_relaxed_size_t(&threadpool->active_threads) == 0) {
pthread_cond_signal(&threadpool->completion_condvar);
}
pthread_mutex_unlock(&threadpool->completion_mutex);
diff --git a/src/threadpool-atomics.h b/src/threadpool-atomics.h
index be6c465..0b3459b 100644
--- a/src/threadpool-atomics.h
+++ b/src/threadpool-atomics.h
@@ -76,11 +76,10 @@
__c11_atomic_store(address, value, __ATOMIC_RELEASE);
}
- static inline size_t pthreadpool_fetch_sub_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t decrement)
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
{
- return __c11_atomic_fetch_sub(address, decrement, __ATOMIC_RELAXED);
+ return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1;
}
static inline bool pthreadpool_try_decrement_relaxed_size_t(
@@ -166,11 +165,10 @@
*address = value;
}
- static inline size_t pthreadpool_fetch_sub_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t decrement)
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
{
- return (size_t) _InterlockedExchangeAdd64((__int64 volatile*) address, (__int64) -decrement);
+ return (size_t) _InterlockedDecrement64((__int64 volatile*) address);
}
static inline bool pthreadpool_try_decrement_relaxed_size_t(
@@ -258,11 +256,10 @@
*address = value;
}
- static inline size_t pthreadpool_fetch_sub_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t decrement)
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
{
- return (size_t) _InterlockedExchangeAdd((long volatile*) address, (long) -decrement);
+ return (size_t) _InterlockedDecrement((long volatile*) address);
}
static inline bool pthreadpool_try_decrement_relaxed_size_t(
@@ -348,11 +345,10 @@
atomic_store_explicit(address, value, memory_order_release);
}
- static inline size_t pthreadpool_fetch_sub_relaxed_size_t(
- pthreadpool_atomic_size_t* address,
- size_t decrement)
+ static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
+ pthreadpool_atomic_size_t* address)
{
- return atomic_fetch_sub_explicit(address, decrement, memory_order_relaxed);
+ return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1;
}
static inline bool pthreadpool_try_decrement_relaxed_size_t(
diff --git a/src/windows.c b/src/windows.c
index 2bea3bc..144da9d 100644
--- a/src/windows.c
+++ b/src/windows.c
@@ -27,7 +27,7 @@
static void checkin_worker_thread(struct pthreadpool* threadpool, uint32_t event_index) {
- if (pthreadpool_fetch_sub_relaxed_size_t(&threadpool->active_threads, 1) == 1) {
+ if (pthreadpool_decrement_fetch_relaxed_size_t(&threadpool->active_threads) == 0) {
SetEvent(threadpool->completion_event[event_index]);
}
}