aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2020-05-07 17:23:12 -0700
committerMarat Dukhan <maratek@google.com>2020-05-07 17:23:12 -0700
commit5690b5ceada160444a916d31ef72e381f5e52d67 (patch)
tree4cdd199bf3460b66ad85a0ee76ed33ef117c264e
parentada4eedd7113b82e7d1ab35394b78eaf03a60906 (diff)
downloadpthreadpool-5690b5ceada160444a916d31ef72e381f5e52d67.tar.gz
MSVC-compatible FPU state functions
-rw-r--r--src/threadpool-utils.h35
1 files changed, 26 insertions, 9 deletions
diff --git a/src/threadpool-utils.h b/src/threadpool-utils.h
index 5443c6e..d95e3a5 100644
--- a/src/threadpool-utils.h
+++ b/src/threadpool-utils.h
@@ -9,7 +9,7 @@
#endif
/* MSVC-specific headers */
-#if defined(_MSC_VER) && _MSC_VER >= 1920
+#if defined(_MSC_VER)
#include <intrin.h>
#if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)
#include <immintrin.h>
@@ -20,9 +20,9 @@
struct fpu_state {
#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
uint32_t mxcsr;
-#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
+#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) || defined(_MSC_VER) && defined(_M_ARM)
uint32_t fpscr;
-#elif defined(__aarch64__)
+#elif defined(__GNUC__) && defined(__aarch64__) || defined(_MSC_VER) && defined(_M_ARM64)
uint64_t fpcr;
#else
char unused;
@@ -33,9 +33,13 @@ static inline struct fpu_state get_fpu_state() {
struct fpu_state state = { 0 };
#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
state.mxcsr = (uint32_t) _mm_getcsr();
-#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
+#elif defined(_MSC_VER) && defined(_M_ARM)
+ state.fpscr = (uint32_t) _MoveFromCoprocessor(10, 7, 1, 0, 0);
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+ state.fpcr = (uint64_t) _ReadStatusReg(0x5A20);
+#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
__asm__ __volatile__("VMRS %[fpscr], fpscr" : [fpscr] "=r" (state.fpscr));
-#elif defined(__aarch64__)
+#elif defined(__GNUC__) && defined(__aarch64__)
__asm__ __volatile__("MRS %[fpcr], fpcr" : [fpcr] "=r" (state.fpcr));
#endif
return state;
@@ -44,9 +48,13 @@ static inline struct fpu_state get_fpu_state() {
static inline void set_fpu_state(const struct fpu_state state) {
#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
_mm_setcsr((unsigned int) state.mxcsr);
-#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
+#elif defined(_MSC_VER) && defined(_M_ARM)
+ _MoveToCoprocessor((int) state.fpscr, 10, 7, 1, 0, 0);
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+ _WriteStatusReg(0x5A20, (__int64) state.fpcr);
+#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
__asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [fpscr] "r" (state.fpscr));
-#elif defined(__aarch64__)
+#elif defined(__GNUC__) && defined(__aarch64__)
__asm__ __volatile__("MSR fpcr, %[fpcr]" : : [fpcr] "r" (state.fpcr));
#endif
}
@@ -54,7 +62,16 @@ static inline void set_fpu_state(const struct fpu_state state) {
static inline void disable_fpu_denormals() {
#if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
_mm_setcsr(_mm_getcsr() | 0x8040);
-#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
+#elif defined(_MSC_VER) && defined(_M_ARM)
+ int fpscr = _MoveFromCoprocessor(10, 7, 1, 0, 0);
+ fpscr |= 0x1000000;
+ _MoveToCoprocessor(fpscr, 10, 7, 1, 0, 0);
+#elif defined(_MSC_VER) && defined(_M_ARM64)
+ __int64 fpcr = _ReadStatusReg(0x5A20);
+ fpcr |= 0x1080000;
+ _WriteStatusReg(0x5A20, fpcr);
+#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0)
+ uint32_t fpscr;
#if defined(__thumb__) && !defined(__thumb2__)
__asm__ __volatile__(
"VMRS %[fpscr], fpscr\n"
@@ -70,7 +87,7 @@ static inline void disable_fpu_denormals() {
"VMSR fpscr, %[fpscr]\n"
: [fpscr] "=r" (fpscr));
#endif
-#elif defined(__aarch64__)
+#elif defined(__GNUC__) && defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__(
"MRS %[fpcr], fpcr\n"