diff options
author | Marat Dukhan <maratek@gmail.com> | 2020-04-07 19:14:45 -0700 |
---|---|---|
committer | Marat Dukhan <maratek@gmail.com> | 2020-04-07 19:15:27 -0700 |
commit | be1bd8ed45f30ccdc23e5dcbf3896c1ae85f1ef3 (patch) | |
tree | 8ac2f511c4d3cc540cab4a0a0b716b7595f50ea1 /src/threadpool-utils.h | |
parent | fa67ff531c0f9999c742d500a4fa061b96937297 (diff) | |
download | pthreadpool-be1bd8ed45f30ccdc23e5dcbf3896c1ae85f1ef3.tar.gz |
Windows implementation using Events
Diffstat (limited to 'src/threadpool-utils.h')
-rw-r--r-- | src/threadpool-utils.h | 38 |
1 files changed, 30 insertions, 8 deletions
diff --git a/src/threadpool-utils.h b/src/threadpool-utils.h index 1d147e0..a86392b 100644 --- a/src/threadpool-utils.h +++ b/src/threadpool-utils.h @@ -3,12 +3,20 @@ #include <stdint.h> #include <stddef.h> -#if defined(__SSE__) || defined(__x86_64__) -#include <xmmintrin.h> +/* SSE-specific headers */ +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + #include <xmmintrin.h> #endif +/* MSVC-specific headers */ +#if defined(_MSC_VER) && _MSC_VER >= 1920 + #include <intrin.h> + #include <immintrin.h> +#endif + + struct fpu_state { -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) uint32_t mxcsr; #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) uint32_t fpscr; @@ -21,7 +29,7 @@ struct fpu_state { static inline struct fpu_state get_fpu_state() { struct fpu_state state = { 0 }; -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) state.mxcsr = (uint32_t) _mm_getcsr(); #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) __asm__ __volatile__("VMRS %[fpscr], fpscr" : [fpscr] "=r" (state.fpscr)); @@ -32,7 +40,7 @@ static inline struct fpu_state get_fpu_state() { } static inline void set_fpu_state(const struct fpu_state state) { -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) _mm_setcsr((unsigned int) state.mxcsr); #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) __asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [fpscr] "r" (state.fpscr)); @@ -42,7 +50,7 @@ static inline void set_fpu_state(const struct fpu_state state) { } static inline void disable_fpu_denormals() { -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) _mm_setcsr(_mm_getcsr() | 0x8040); #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) uint32_t fpscr; @@ -65,14 +73,23 @@ static inline void disable_fpu_denormals() { static inline size_t multiply_divide(size_t a, size_t b, size_t d) { #if defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 4) return (size_t) (((uint64_t) a) * ((uint64_t) b)) / ((uint64_t) d); - #elif defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 8) + #elif defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 8) && defined(__SIZEOF_INT128__) return (size_t) (((__uint128_t) a) * ((__uint128_t) b)) / ((__uint128_t) d); + #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && (defined(_M_AMD64) || defined(_M_X64)) + uint64_t product_hi; + const uint64_t product_lo = _umul128(a, b, &product_hi); + uint64_t remainder; + return (size_t) _udiv128(product_hi, product_lo, d, &remainder); + #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && defined(_M_IX86) + const unsigned __int64 product_full = __emulu((unsigned int) a, (unsigned int) b); + unsigned int remainder; + return (size_t) _udiv64(product_full, (unsigned int) d, &remainder); #else #error "Platform-specific implementation of multiply_divide required" #endif } -static inline size_t modulo_decrement(uint32_t i, uint32_t n) { +static inline size_t modulo_decrement(size_t i, size_t n) { /* Wrap modulo n, if needed */ if (i == 0) { i = n; @@ -89,6 +106,11 @@ static inline size_t divide_round_up(size_t dividend, size_t divisor) { } } +/* Windows headers define min and max macros; undefine it here */ +#ifdef min + #undef min +#endif + static inline size_t min(size_t a, size_t b) { return a < b ? a : b; } |