aboutsummaryrefslogtreecommitdiff
path: root/src/threadpool-utils.h
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@gmail.com>2020-04-07 19:14:45 -0700
committerMarat Dukhan <maratek@gmail.com>2020-04-07 19:15:27 -0700
commitbe1bd8ed45f30ccdc23e5dcbf3896c1ae85f1ef3 (patch)
tree8ac2f511c4d3cc540cab4a0a0b716b7595f50ea1 /src/threadpool-utils.h
parentfa67ff531c0f9999c742d500a4fa061b96937297 (diff)
downloadpthreadpool-be1bd8ed45f30ccdc23e5dcbf3896c1ae85f1ef3.tar.gz
Windows implementation using Events
Diffstat (limited to 'src/threadpool-utils.h')
-rw-r--r--src/threadpool-utils.h38
1 files changed, 30 insertions, 8 deletions
diff --git a/src/threadpool-utils.h b/src/threadpool-utils.h
index 1d147e0..a86392b 100644
--- a/src/threadpool-utils.h
+++ b/src/threadpool-utils.h
@@ -3,12 +3,20 @@
#include <stdint.h>
#include <stddef.h>
-#if defined(__SSE__) || defined(__x86_64__)
-#include <xmmintrin.h>
+/* SSE-specific headers */
+#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
+ #include <xmmintrin.h>
#endif
+/* MSVC-specific headers */
+#if defined(_MSC_VER) && _MSC_VER >= 1920
+ #include <intrin.h>
+ #include <immintrin.h>
+#endif
+
+
struct fpu_state {
-#if defined(__SSE__) || defined(__x86_64__)
+#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
uint32_t mxcsr;
#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
uint32_t fpscr;
@@ -21,7 +29,7 @@ struct fpu_state {
static inline struct fpu_state get_fpu_state() {
struct fpu_state state = { 0 };
-#if defined(__SSE__) || defined(__x86_64__)
+#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
state.mxcsr = (uint32_t) _mm_getcsr();
#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
__asm__ __volatile__("VMRS %[fpscr], fpscr" : [fpscr] "=r" (state.fpscr));
@@ -32,7 +40,7 @@ static inline struct fpu_state get_fpu_state() {
}
static inline void set_fpu_state(const struct fpu_state state) {
-#if defined(__SSE__) || defined(__x86_64__)
+#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
_mm_setcsr((unsigned int) state.mxcsr);
#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
__asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [fpscr] "r" (state.fpscr));
@@ -42,7 +50,7 @@ static inline void set_fpu_state(const struct fpu_state state) {
}
static inline void disable_fpu_denormals() {
-#if defined(__SSE__) || defined(__x86_64__)
+#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
_mm_setcsr(_mm_getcsr() | 0x8040);
#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
uint32_t fpscr;
@@ -65,14 +73,23 @@ static inline void disable_fpu_denormals() {
static inline size_t multiply_divide(size_t a, size_t b, size_t d) {
#if defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 4)
return (size_t) (((uint64_t) a) * ((uint64_t) b)) / ((uint64_t) d);
- #elif defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 8)
+ #elif defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 8) && defined(__SIZEOF_INT128__)
return (size_t) (((__uint128_t) a) * ((__uint128_t) b)) / ((__uint128_t) d);
+ #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && (defined(_M_AMD64) || defined(_M_X64))
+ uint64_t product_hi;
+ const uint64_t product_lo = _umul128(a, b, &product_hi);
+ uint64_t remainder;
+ return (size_t) _udiv128(product_hi, product_lo, d, &remainder);
+ #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && defined(_M_IX86)
+ const unsigned __int64 product_full = __emulu((unsigned int) a, (unsigned int) b);
+ unsigned int remainder;
+ return (size_t) _udiv64(product_full, (unsigned int) d, &remainder);
#else
#error "Platform-specific implementation of multiply_divide required"
#endif
}
-static inline size_t modulo_decrement(uint32_t i, uint32_t n) {
+static inline size_t modulo_decrement(size_t i, size_t n) {
/* Wrap modulo n, if needed */
if (i == 0) {
i = n;
@@ -89,6 +106,11 @@ static inline size_t divide_round_up(size_t dividend, size_t divisor) {
}
}
+/* Windows headers define min and max macros; undefine it here */
+#ifdef min
+ #undef min
+#endif
+
static inline size_t min(size_t a, size_t b) {
return a < b ? a : b;
}