From 5690b5ceada160444a916d31ef72e381f5e52d67 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Thu, 7 May 2020 17:23:12 -0700 Subject: MSVC-compatible FPU state functions --- src/threadpool-utils.h | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/threadpool-utils.h b/src/threadpool-utils.h index 5443c6e..d95e3a5 100644 --- a/src/threadpool-utils.h +++ b/src/threadpool-utils.h @@ -9,7 +9,7 @@ #endif /* MSVC-specific headers */ -#if defined(_MSC_VER) && _MSC_VER >= 1920 +#if defined(_MSC_VER) #include #if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) #include @@ -20,9 +20,9 @@ struct fpu_state { #if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) uint32_t mxcsr; -#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) +#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) || defined(_MSC_VER) && defined(_M_ARM) uint32_t fpscr; -#elif defined(__aarch64__) +#elif defined(__GNUC__) && defined(__aarch64__) || defined(_MSC_VER) && defined(_M_ARM64) uint64_t fpcr; #else char unused; @@ -33,9 +33,13 @@ static inline struct fpu_state get_fpu_state() { struct fpu_state state = { 0 }; #if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) state.mxcsr = (uint32_t) _mm_getcsr(); -#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) +#elif defined(_MSC_VER) && defined(_M_ARM) + state.fpscr = (uint32_t) _MoveFromCoprocessor(10, 7, 1, 0, 0); +#elif defined(_MSC_VER) && defined(_M_ARM64) + state.fpcr = (uint64_t) _ReadStatusReg(0x5A20); +#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) __asm__ __volatile__("VMRS %[fpscr], fpscr" : [fpscr] "=r" (state.fpscr)); -#elif defined(__aarch64__) +#elif defined(__GNUC__) && defined(__aarch64__) __asm__ __volatile__("MRS %[fpcr], fpcr" : [fpcr] "=r" (state.fpcr)); #endif return state; @@ -44,9 +48,13 @@ static inline struct fpu_state get_fpu_state() { static inline void set_fpu_state(const struct fpu_state state) { #if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) _mm_setcsr((unsigned int) state.mxcsr); -#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) +#elif defined(_MSC_VER) && defined(_M_ARM) + _MoveToCoprocessor((int) state.fpscr, 10, 7, 1, 0, 0); +#elif defined(_MSC_VER) && defined(_M_ARM64) + _WriteStatusReg(0x5A20, (__int64) state.fpcr); +#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) __asm__ __volatile__("VMSR fpscr, %[fpscr]" : : [fpscr] "r" (state.fpscr)); -#elif defined(__aarch64__) +#elif defined(__GNUC__) && defined(__aarch64__) __asm__ __volatile__("MSR fpcr, %[fpcr]" : : [fpcr] "r" (state.fpcr)); #endif } @@ -54,7 +62,16 @@ static inline void set_fpu_state(const struct fpu_state state) { static inline void disable_fpu_denormals() { #if defined(__SSE__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1) _mm_setcsr(_mm_getcsr() | 0x8040); -#elif defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) +#elif defined(_MSC_VER) && defined(_M_ARM) + int fpscr = _MoveFromCoprocessor(10, 7, 1, 0, 0); + fpscr |= 0x1000000; + _MoveToCoprocessor(fpscr, 10, 7, 1, 0, 0); +#elif defined(_MSC_VER) && defined(_M_ARM64) + __int64 fpcr = _ReadStatusReg(0x5A20); + fpcr |= 0x1080000; + _WriteStatusReg(0x5A20, fpcr); +#elif defined(__GNUC__) && defined(__arm__) && defined(__ARM_FP) && (__ARM_FP != 0) + uint32_t fpscr; #if defined(__thumb__) && !defined(__thumb2__) __asm__ __volatile__( "VMRS %[fpscr], fpscr\n" @@ -70,7 +87,7 @@ static inline void disable_fpu_denormals() { "VMSR fpscr, %[fpscr]\n" : [fpscr] "=r" (fpscr)); #endif -#elif defined(__aarch64__) +#elif defined(__GNUC__) && defined(__aarch64__) uint64_t fpcr; __asm__ __volatile__( "MRS %[fpcr], fpcr\n" -- cgit v1.2.3