diff options
author | Victoria <victoria.zhislina@intel.com> | 2018-02-12 16:48:22 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-12 16:48:22 +0300 |
commit | 850115d2e949e9f29ce1a43037f1c92545a61efc (patch) | |
tree | a06d33b290ab6a98b98025862bf85d37ddeb0b14 | |
parent | 8e122fd4e105574da29385f2fbbd981f56679bc8 (diff) | |
parent | b02af7817a02ca4d651f77dfe9ad8f66358e827c (diff) | |
download | neon_2_sse-850115d2e949e9f29ce1a43037f1c92545a61efc.tar.gz |
Merge pull request #12 from sharkfox/master
fix pointer truncation warning when using MSVC2015
-rw-r--r-- | NEON_2_SSE.h | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/NEON_2_SSE.h b/NEON_2_SSE.h index e8fd7ab..956c8f7 100644 --- a/NEON_2_SSE.h +++ b/NEON_2_SSE.h @@ -9237,7 +9237,7 @@ poly16x8_t vsliq_n_p16(poly16x8_t a, poly16x8_t b, __constrange(0,15) int c); // // it loads a 32-byte block aligned on a 16-byte boundary and extracts the 16 bytes corresponding to the unaligned access //If the ptr is aligned then could use __m128i _mm_load_si128 ((__m128i*) ptr) instead; #define LOAD_SI128(ptr) \ - ( ((unsigned long)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : _mm_loadu_si128((__m128i*)(ptr)) + ( ((uintptr_t)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : _mm_loadu_si128((__m128i*)(ptr)) uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, d1}, [r0] #define vld1q_u8 LOAD_SI128 @@ -9274,7 +9274,7 @@ f2 = _mm_set_ps (ptr[7], ptr[6], ptr[5], ptr[4]); float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 {d0, d1}, [r0] _NEON2SSE_INLINE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr) { - if( (((unsigned long)(ptr)) & 15 ) == 0 ) //16 bits aligned + if( (((uintptr_t)(ptr)) & 15 ) == 0 ) //16 bits aligned return _mm_load_ps(ptr); else return _mm_loadu_ps(ptr); @@ -9333,7 +9333,7 @@ poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, [r float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 {d0, d1}, [r0] _NEON2SSE_INLINE float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr) { - if ((((unsigned long)(ptr)) & 15) == 0) //16 bits aligned + if ((((uintptr_t)(ptr)) & 15) == 0) //16 bits aligned return _mm_load_pd(ptr); else return _mm_loadu_pd(ptr); @@ -9574,7 +9574,7 @@ poly16x4_t vld1_dup_p16(__transfersize(1) poly16_t const * ptr); // VLD1.16 {d0[ // If ptr is 16bit aligned and you need to store data without cache pollution then use void _mm_stream_si128 ((__m128i*)ptr, val); //here we assume the case of NOT 16bit aligned ptr possible. If it is aligned we could to use _mm_store_si128 like shown in the following macro #define STORE_SI128(ptr, val) \ - (((unsigned long)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val); + (((uintptr_t)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val); void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 {d0, d1}, [r0] #define vst1q_u8 STORE_SI128 @@ -9606,7 +9606,7 @@ void vst1q_f16(__transfersize(8) __fp16 * ptr, float16x8_t val); // VST1.16 {d0, void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 {d0, d1}, [r0] _NEON2SSE_INLINE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val) { - if( ((unsigned long)(ptr) & 15) == 0 ) //16 bits aligned + if( ((uintptr_t)(ptr) & 15) == 0 ) //16 bits aligned _mm_store_ps (ptr, val); else _mm_storeu_ps (ptr, val); |