diff options
-rw-r--r-- | src/portable-api.c | 26 | ||||
-rw-r--r-- | src/pthreads.c | 4 | ||||
-rw-r--r-- | src/threadpool-atomics.h | 28 | ||||
-rw-r--r-- | src/windows.c | 2 |
4 files changed, 28 insertions, 32 deletions
diff --git a/src/portable-api.c b/src/portable-api.c index 77d2e9b..84d6eda 100644 --- a/src/portable-api.c +++ b/src/portable-api.c @@ -50,7 +50,7 @@ static void thread_parallelize_1d(struct pthreadpool* threadpool, struct thread_ { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); task(argument, index); } } @@ -90,7 +90,7 @@ static void thread_parallelize_1d_with_uarch(struct pthreadpool* threadpool, str { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); task(argument, uarch_index, index); } } @@ -126,7 +126,7 @@ static void thread_parallelize_1d_tile_1d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t tile_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t tile_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const size_t tile_start = tile_index * tile; task(argument, tile_start, min(range - tile_start, tile)); } @@ -167,7 +167,7 @@ static void thread_parallelize_2d(struct pthreadpool* threadpool, struct thread_ { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t index_i_j = fxdiv_divide_size_t(linear_index, range_j); task(argument, index_i_j.quotient, index_i_j.remainder); } @@ -211,7 +211,7 @@ static void thread_parallelize_2d_tile_1d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(linear_index, tile_range_j); const size_t start_j = tile_index_i_j.remainder * tile_j; task(argument, tile_index_i_j.quotient, start_j, min(range_j - start_j, tile_j)); @@ -258,7 +258,7 @@ static void thread_parallelize_2d_tile_2d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(linear_index, tile_range_j); const size_t start_i = tile_index_i_j.quotient * tile_i; const size_t start_j = tile_index_i_j.remainder * tile_j; @@ -315,7 +315,7 @@ static void thread_parallelize_2d_tile_2d_with_uarch(struct pthreadpool* threadp { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(linear_index, tile_range_j); const size_t start_i = tile_index_i_j.quotient * tile_i; const size_t start_j = tile_index_i_j.remainder * tile_j; @@ -370,7 +370,7 @@ static void thread_parallelize_3d_tile_2d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k); const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j); const size_t start_j = tile_index_i_j.remainder * tile_j; @@ -435,7 +435,7 @@ static void thread_parallelize_3d_tile_2d_with_uarch(struct pthreadpool* threadp { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k); const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j); const size_t start_j = tile_index_i_j.remainder * tile_j; @@ -497,7 +497,7 @@ static void thread_parallelize_4d_tile_2d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl); const struct fxdiv_result_size_t index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, range_j); const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l); @@ -569,7 +569,7 @@ static void thread_parallelize_4d_tile_2d_with_uarch(struct pthreadpool* threadp { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl); const struct fxdiv_result_size_t index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, range_j); const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l); @@ -638,7 +638,7 @@ static void thread_parallelize_5d_tile_2d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_ijk_lm = fxdiv_divide_size_t(linear_index, tile_range_lm); const struct fxdiv_result_size_t index_ij_k = fxdiv_divide_size_t(tile_index_ijk_lm.quotient, range_k); const struct fxdiv_result_size_t tile_index_l_m = fxdiv_divide_size_t(tile_index_ijk_lm.remainder, tile_range_m); @@ -716,7 +716,7 @@ static void thread_parallelize_6d_tile_2d(struct pthreadpool* threadpool, struct { struct thread_info* other_thread = &threadpool->threads[tid]; while (pthreadpool_try_decrement_relaxed_size_t(&other_thread->range_length)) { - const size_t linear_index = pthreadpool_fetch_sub_relaxed_size_t(&other_thread->range_end, 1) - 1; + const size_t linear_index = pthreadpool_decrement_fetch_relaxed_size_t(&other_thread->range_end); const struct fxdiv_result_size_t tile_index_ijkl_mn = fxdiv_divide_size_t(linear_index, tile_range_mn); const struct fxdiv_result_size_t index_ij_kl = fxdiv_divide_size_t(tile_index_ijkl_mn.quotient, range_kl); const struct fxdiv_result_size_t tile_index_m_n = fxdiv_divide_size_t(tile_index_ijkl_mn.remainder, tile_range_n); diff --git a/src/pthreads.c b/src/pthreads.c index e70ab18..a7e4619 100644 --- a/src/pthreads.c +++ b/src/pthreads.c @@ -79,13 +79,13 @@ static void checkin_worker_thread(struct pthreadpool* threadpool) { #if PTHREADPOOL_USE_FUTEX - if (pthreadpool_fetch_sub_relaxed_size_t(&threadpool->active_threads, 1) == 1) { + if (pthreadpool_decrement_fetch_relaxed_size_t(&threadpool->active_threads) == 0) { pthreadpool_store_relaxed_uint32_t(&threadpool->has_active_threads, 0); futex_wake_all(&threadpool->has_active_threads); } #else pthread_mutex_lock(&threadpool->completion_mutex); - if (pthreadpool_fetch_sub_relaxed_size_t(&threadpool->active_threads, 1) == 1) { + if (pthreadpool_decrement_fetch_relaxed_size_t(&threadpool->active_threads) == 0) { pthread_cond_signal(&threadpool->completion_condvar); } pthread_mutex_unlock(&threadpool->completion_mutex); diff --git a/src/threadpool-atomics.h b/src/threadpool-atomics.h index be6c465..0b3459b 100644 --- a/src/threadpool-atomics.h +++ b/src/threadpool-atomics.h @@ -76,11 +76,10 @@ __c11_atomic_store(address, value, __ATOMIC_RELEASE); } - static inline size_t pthreadpool_fetch_sub_relaxed_size_t( - pthreadpool_atomic_size_t* address, - size_t decrement) + static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( + pthreadpool_atomic_size_t* address) { - return __c11_atomic_fetch_sub(address, decrement, __ATOMIC_RELAXED); + return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1; } static inline bool pthreadpool_try_decrement_relaxed_size_t( @@ -166,11 +165,10 @@ *address = value; } - static inline size_t pthreadpool_fetch_sub_relaxed_size_t( - pthreadpool_atomic_size_t* address, - size_t decrement) + static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( + pthreadpool_atomic_size_t* address) { - return (size_t) _InterlockedExchangeAdd64((__int64 volatile*) address, (__int64) -decrement); + return (size_t) _InterlockedDecrement64((__int64 volatile*) address); } static inline bool pthreadpool_try_decrement_relaxed_size_t( @@ -258,11 +256,10 @@ *address = value; } - static inline size_t pthreadpool_fetch_sub_relaxed_size_t( - pthreadpool_atomic_size_t* address, - size_t decrement) + static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( + pthreadpool_atomic_size_t* address) { - return (size_t) _InterlockedExchangeAdd((long volatile*) address, (long) -decrement); + return (size_t) _InterlockedDecrement((long volatile*) address); } static inline bool pthreadpool_try_decrement_relaxed_size_t( @@ -348,11 +345,10 @@ atomic_store_explicit(address, value, memory_order_release); } - static inline size_t pthreadpool_fetch_sub_relaxed_size_t( - pthreadpool_atomic_size_t* address, - size_t decrement) + static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( + pthreadpool_atomic_size_t* address) { - return atomic_fetch_sub_explicit(address, decrement, memory_order_relaxed); + return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1; } static inline bool pthreadpool_try_decrement_relaxed_size_t( diff --git a/src/windows.c b/src/windows.c index 2bea3bc..144da9d 100644 --- a/src/windows.c +++ b/src/windows.c @@ -27,7 +27,7 @@ static void checkin_worker_thread(struct pthreadpool* threadpool, uint32_t event_index) { - if (pthreadpool_fetch_sub_relaxed_size_t(&threadpool->active_threads, 1) == 1) { + if (pthreadpool_decrement_fetch_relaxed_size_t(&threadpool->active_threads) == 0) { SetEvent(threadpool->completion_event[event_index]); } } |