diff options
Diffstat (limited to 'src/threadpool-utils.h')
-rw-r--r-- | src/threadpool-utils.h | 49 |
1 files changed, 43 insertions, 6 deletions
diff --git a/src/threadpool-utils.h b/src/threadpool-utils.h index 65c7fb0..24fee43 100644 --- a/src/threadpool-utils.h +++ b/src/threadpool-utils.h @@ -1,13 +1,24 @@ #pragma once #include <stdint.h> +#include <stddef.h> -#if defined(__SSE__) || defined(__x86_64__) -#include <xmmintrin.h> +/* SSE-specific headers */ +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) + #include <xmmintrin.h> #endif +/* MSVC-specific headers */ +#if defined(_MSC_VER) && _MSC_VER >= 1920 + #include <intrin.h> + #if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) + #include <immintrin.h> + #endif +#endif + + struct fpu_state { -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) uint32_t mxcsr; #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) uint32_t fpscr; @@ -20,7 +31,7 @@ struct fpu_state { static inline struct fpu_state get_fpu_state() { struct fpu_state state = { 0 }; -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) state.mxcsr = (uint32_t) _mm_getcsr(); #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) __asm__ __volatile__("VMRS %[fpscr], fpscr" : [fpscr] "=r" (state.fpscr)); @@ -31,7 +42,7 @@ static inline struct fpu_state get_fpu_state() { } static inline void set_fpu_state(const struct fpu_state state) { -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) _mm_setcsr((unsigned int) state.mxcsr); #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) __asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [fpscr] "r" (state.fpscr)); @@ -41,7 +52,7 @@ static inline void set_fpu_state(const struct fpu_state state) { } static inline void disable_fpu_denormals() { -#if defined(__SSE__) || defined(__x86_64__) +#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) _mm_setcsr(_mm_getcsr() | 0x8040); #elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) uint32_t fpscr; @@ -60,3 +71,29 @@ static inline void disable_fpu_denormals() { : [fpcr] "=r" (fpcr)); #endif } + +static inline size_t modulo_decrement(size_t i, size_t n) { + /* Wrap modulo n, if needed */ + if (i == 0) { + i = n; + } + /* Decrement input variable */ + return i - 1; +} + +static inline size_t divide_round_up(size_t dividend, size_t divisor) { + if (dividend % divisor == 0) { + return dividend / divisor; + } else { + return dividend / divisor + 1; + } +} + +/* Windows headers define min and max macros; undefine it here */ +#ifdef min + #undef min +#endif + +static inline size_t min(size_t a, size_t b) { + return a < b ? a : b; +} |