Disable TSan in potentially in-place micro-kernels with OOB reads

SIMD micro-kernels read beyound end of input, and when the operation is in-place and the out-of-bounds elements were written by a different thread, TSan goes crazy. This doesn't cause any errors, though, because all computations out-of-bounds elements contribute to are discarded. PiperOrigin-RevId: 308848666
author: Marat Dukhan <maratek@google.com> 2020-04-28 10:26:13 -0700
committer: XNNPACK Team <xnnpack-github-robot@google.com> 2020-04-28 10:26:50 -0700
commit: 8aaf186f756f4ca7937e0ad24468cb1e17f0da65 (patch)
tree: 839033e40db2381fd27c8c2d1c527d988a4bb232 /src/xnnpack/intrinsics-polyfill.h
parent: 01849010c146d681fbde8a482f94ef9b213c9e64 (diff)
download: XNNPACK-8aaf186f756f4ca7937e0ad24468cb1e17f0da65.tar.gz
1 files changed, 27 insertions, 6 deletions
diff --git a/src/xnnpack/intrinsics-polyfill.h b/src/xnnpack/intrinsics-polyfill.h
index 51da04e82..caa192d30 100644
--- a/src/xnnpack/intrinsics-polyfill.h
+++ b/src/xnnpack/intrinsics-polyfill.h
@@ -5,6 +5,27 @@
 
 #pragma once
 
+#include <xnnpack/common.h>
+
+
+#if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))
+#include <xmmintrin.h>
+
+static XNN_INTRINSIC XNN_DISABLE_TSAN
+__m128 _mm_loadu_ps_notsan(const float* address) {
+  return _mm_loadu_ps(address);
+}
+#endif
+
+#if defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2))
+#include <emmintrin.h>
+
+static XNN_INTRINSIC XNN_DISABLE_TSAN
+__m128i _mm_loadu_si128_notsan(const __m128i* address) {
+  return _mm_loadu_si128(address);
+}
+#endif
+
 
 #ifdef __AVX512F__
 #include <immintrin.h>
@@ -15,8 +36,8 @@
     (defined(__clang__) && defined(__apple_build_version__) && (__apple_build_version__ < 11000000)) || \
     (defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1800))
 
-static inline __mmask16 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_cvtu32_mask16(unsigned int mask) {
+static XNN_INTRINSIC
+__mmask16 _cvtu32_mask16(unsigned int mask) {
   return (__mmask16) mask;
 }
 
@@ -27,8 +48,8 @@ _cvtu32_mask16(unsigned int mask) {
     (defined(__clang__) && (__clang_major__ < 4)) || \
     (defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1800))
 
-static inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_add_ps(__m512 v) {
+static XNN_INTRINSIC
+float _mm512_reduce_add_ps(__m512 v) {
 #if __AVX512DQ__
   const __m256 sum2 = _mm256_add_ps(_mm512_castps512_ps256(v), _mm512_extractf32x8_ps(v, 1));
 #else
@@ -40,8 +61,8 @@ _mm512_reduce_add_ps(__m512 v) {
   return _mm_cvtss_f32(sum16);
 }
 
-static inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_max_ps(__m512 v) {
+static XNN_INTRINSIC
+float _mm512_reduce_max_ps(__m512 v) {
 #if __AVX512DQ__
   const __m256 sum2 = _mm256_max_ps(_mm512_castps512_ps256(v), _mm512_extractf32x8_ps(v, 1));
 #else
author	Marat Dukhan <maratek@google.com>	2020-04-28 10:26:13 -0700
committer	XNNPACK Team <xnnpack-github-robot@google.com>	2020-04-28 10:26:50 -0700
commit	8aaf186f756f4ca7937e0ad24468cb1e17f0da65 (patch)
tree	839033e40db2381fd27c8c2d1c527d988a4bb232 /src/xnnpack/intrinsics-polyfill.h
parent	01849010c146d681fbde8a482f94ef9b213c9e64 (diff)
download	XNNPACK-8aaf186f756f4ca7937e0ad24468cb1e17f0da65.tar.gz