aboutsummaryrefslogtreecommitdiff
path: root/src/xnnpack/intrinsics-polyfill.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/xnnpack/intrinsics-polyfill.h')
-rw-r--r--src/xnnpack/intrinsics-polyfill.h33
1 files changed, 27 insertions, 6 deletions
diff --git a/src/xnnpack/intrinsics-polyfill.h b/src/xnnpack/intrinsics-polyfill.h
index 51da04e82..caa192d30 100644
--- a/src/xnnpack/intrinsics-polyfill.h
+++ b/src/xnnpack/intrinsics-polyfill.h
@@ -5,6 +5,27 @@
#pragma once
+#include <xnnpack/common.h>
+
+
+#if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))
+#include <xmmintrin.h>
+
+static XNN_INTRINSIC XNN_DISABLE_TSAN
+__m128 _mm_loadu_ps_notsan(const float* address) {
+ return _mm_loadu_ps(address);
+}
+#endif
+
+#if defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
+#include <emmintrin.h>
+
+static XNN_INTRINSIC XNN_DISABLE_TSAN
+__m128i _mm_loadu_si128_notsan(const __m128i* address) {
+ return _mm_loadu_si128(address);
+}
+#endif
+
#ifdef __AVX512F__
#include <immintrin.h>
@@ -15,8 +36,8 @@
(defined(__clang__) && defined(__apple_build_version__) && (__apple_build_version__ < 11000000)) || \
(defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1800))
-static inline __mmask16 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu32_mask16(unsigned int mask) {
+static XNN_INTRINSIC
+__mmask16 _cvtu32_mask16(unsigned int mask) {
return (__mmask16) mask;
}
@@ -27,8 +48,8 @@ _cvtu32_mask16(unsigned int mask) {
(defined(__clang__) && (__clang_major__ < 4)) || \
(defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1800))
-static inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_add_ps(__m512 v) {
+static XNN_INTRINSIC
+float _mm512_reduce_add_ps(__m512 v) {
#if __AVX512DQ__
const __m256 sum2 = _mm256_add_ps(_mm512_castps512_ps256(v), _mm512_extractf32x8_ps(v, 1));
#else
@@ -40,8 +61,8 @@ _mm512_reduce_add_ps(__m512 v) {
return _mm_cvtss_f32(sum16);
}
-static inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_max_ps(__m512 v) {
+static XNN_INTRINSIC
+float _mm512_reduce_max_ps(__m512 v) {
#if __AVX512DQ__
const __m256 sum2 = _mm256_max_ps(_mm512_castps512_ps256(v), _mm512_extractf32x8_ps(v, 1));
#else