diff options
author | Stephen Hines <srhines@google.com> | 2015-03-30 16:53:30 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2015-03-30 16:53:30 -0700 |
commit | 15679301bad171fecfd2393d057bdf6152928f77 (patch) | |
tree | 20d2bb01cf29af9552cd5bbdb94148a8a1a2698a /lib | |
parent | 0d97dd81b09a704f897d300edae3bf96f8c98ec1 (diff) | |
download | 3.6-15679301bad171fecfd2393d057bdf6152928f77.tar.gz |
Update Darwin clang prebuilts for rebase to r230699.android-wear-5.1.1_r1android-wear-5.1.0_r1
Change-Id: Ieacdf028fae7bc6110746be40ffa4061c975bc7b
Diffstat (limited to 'lib')
-rw-r--r-- | lib/clang/3.6/include/Intrin.h | 35 | ||||
-rw-r--r-- | lib/clang/3.6/include/__stddef_max_align_t.h | 9 | ||||
-rw-r--r-- | lib/clang/3.6/include/altivec.h | 64 | ||||
-rw-r--r-- | lib/clang/3.6/include/arm_acle.h | 68 | ||||
-rw-r--r-- | lib/clang/3.6/include/avx512bwintrin.h | 309 | ||||
-rw-r--r-- | lib/clang/3.6/include/avx512erintrin.h | 326 | ||||
-rw-r--r-- | lib/clang/3.6/include/avx512fintrin.h | 634 | ||||
-rw-r--r-- | lib/clang/3.6/include/avx512vlbwintrin.h | 606 | ||||
-rw-r--r-- | lib/clang/3.6/include/avx512vlintrin.h | 610 | ||||
-rw-r--r-- | lib/clang/3.6/include/avxintrin.h | 73 | ||||
-rw-r--r-- | lib/clang/3.6/include/emmintrin.h | 132 | ||||
-rw-r--r-- | lib/clang/3.6/include/module.modulemap | 2 | ||||
-rw-r--r-- | lib/clang/3.6/include/shaintrin.h | 12 | ||||
-rw-r--r-- | lib/clang/3.6/include/unwind.h | 24 | ||||
-rw-r--r-- | lib/clang/3.6/include/xmmintrin.h | 48 | ||||
-rwxr-xr-x | lib/libc++.dylib | bin | 1470444 -> 1475056 bytes |
16 files changed, 2529 insertions, 423 deletions
diff --git a/lib/clang/3.6/include/Intrin.h b/lib/clang/3.6/include/Intrin.h index 016c480..727a55e 100644 --- a/lib/clang/3.6/include/Intrin.h +++ b/lib/clang/3.6/include/Intrin.h @@ -289,6 +289,7 @@ void _WriteBarrier(void); unsigned __int32 xbegin(void); void _xend(void); static __inline__ +#define _XCR_XFEATURE_ENABLED_MASK 0 unsigned __int64 __cdecl _xgetbv(unsigned int); void __cdecl _xrstor(void const *, unsigned __int64); void __cdecl _xsave(void *, unsigned __int64); @@ -330,7 +331,6 @@ unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned char _Shift); static __inline__ void __stosq(unsigned __int64 *, unsigned __int64, size_t); -unsigned __int64 __umulh(unsigned __int64, unsigned __int64); unsigned char __vmx_on(unsigned __int64 *); unsigned char __vmx_vmclear(unsigned __int64 *); unsigned char __vmx_vmlaunch(void); @@ -416,10 +416,25 @@ __int64 _sarx_i64(__int64, unsigned int); int __cdecl _setjmpex(jmp_buf); #endif unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); -unsigned __int64 shrx_u64(unsigned __int64, unsigned int); -unsigned __int64 _umul128(unsigned __int64 _Multiplier, - unsigned __int64 _Multiplicand, - unsigned __int64 *_HighProduct); +unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); +/* + * Multiply two 64-bit integers and obtain a 64-bit result. + * The low-half is returned directly and the high half is in an out parameter. + */ +static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand, + unsigned __int64 *_HighProduct) { + unsigned __int128 _FullProduct = + (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; + *_HighProduct = _FullProduct >> 64; + return _FullProduct; +} +static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +__umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) { + unsigned __int128 _FullProduct = + (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand; + return _FullProduct >> 64; +} void __cdecl _xrstor64(void const *, unsigned __int64); void __cdecl _xsave64(void *, unsigned __int64); void __cdecl _xsaveopt64(void *, unsigned __int64); @@ -766,17 +781,17 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination, \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) static __inline__ void __attribute__((__always_inline__, __nodebug__)) -__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadWriteBarrier(void) { __asm__ volatile ("" : : : "memory"); } static __inline__ void __attribute__((__always_inline__, __nodebug__)) -__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _ReadBarrier(void) { __asm__ volatile ("" : : : "memory"); } static __inline__ void __attribute__((__always_inline__, __nodebug__)) -__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _WriteBarrier(void) { __asm__ volatile ("" : : : "memory"); } @@ -929,14 +944,14 @@ __readmsr(unsigned long __register) { return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; } -static __inline__ unsigned long __attribute__((always_inline, __nodebug__)) +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __readcr3(void) { unsigned long __cr3_val; __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory"); return __cr3_val; } -static __inline__ void __attribute__((always_inline, __nodebug__)) +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __writecr3(unsigned int __cr3_val) { __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory"); } diff --git a/lib/clang/3.6/include/__stddef_max_align_t.h b/lib/clang/3.6/include/__stddef_max_align_t.h index a06f412..1e10ca9 100644 --- a/lib/clang/3.6/include/__stddef_max_align_t.h +++ b/lib/clang/3.6/include/__stddef_max_align_t.h @@ -26,15 +26,18 @@ #ifndef __CLANG_MAX_ALIGN_T_DEFINED #define __CLANG_MAX_ALIGN_T_DEFINED -#ifndef _MSC_VER +#if defined(_MSC_VER) +typedef double max_align_t; +#elif defined(__APPLE__) +typedef long double max_align_t; +#else +// Define 'max_align_t' to match the GCC definition. typedef struct { long long __clang_max_align_nonce1 __attribute__((__aligned__(__alignof__(long long)))); long double __clang_max_align_nonce2 __attribute__((__aligned__(__alignof__(long double)))); } max_align_t; -#else -typedef double max_align_t; #endif #endif diff --git a/lib/clang/3.6/include/altivec.h b/lib/clang/3.6/include/altivec.h index eded7b2..b8a8869 100644 --- a/lib/clang/3.6/include/altivec.h +++ b/lib/clang/3.6/include/altivec.h @@ -2270,7 +2270,7 @@ vec_vlogefp(vector float __a) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const signed char *__b) { @@ -2289,7 +2289,7 @@ vec_lvsl(int __a, const signed char *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const unsigned char *__b) { @@ -2308,7 +2308,7 @@ vec_lvsl(int __a, const unsigned char *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const short *__b) { @@ -2327,7 +2327,7 @@ vec_lvsl(int __a, const short *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const unsigned short *__b) { @@ -2346,7 +2346,7 @@ vec_lvsl(int __a, const unsigned short *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const int *__b) { @@ -2365,7 +2365,7 @@ vec_lvsl(int __a, const int *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const unsigned int *__b) { @@ -2384,7 +2384,7 @@ vec_lvsl(int __a, const unsigned int *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsl(int __a, const float *__b) { @@ -2405,7 +2405,7 @@ vec_lvsl(int __a, const float *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const signed char *__b) { @@ -2424,7 +2424,7 @@ vec_lvsr(int __a, const signed char *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const unsigned char *__b) { @@ -2443,7 +2443,7 @@ vec_lvsr(int __a, const unsigned char *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const short *__b) { @@ -2462,7 +2462,7 @@ vec_lvsr(int __a, const short *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const unsigned short *__b) { @@ -2481,7 +2481,7 @@ vec_lvsr(int __a, const unsigned short *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const int *__b) { @@ -2500,7 +2500,7 @@ vec_lvsr(int __a, const int *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const unsigned int *__b) { @@ -2519,7 +2519,7 @@ vec_lvsr(int __a, const unsigned int *__b) #ifdef __LITTLE_ENDIAN__ static vector unsigned char __ATTRS_o_ai -__attribute__((deprecated("use assignment for unaligned little endian \ +__attribute__((__deprecated__("use assignment for unaligned little endian \ loads/stores"))) vec_lvsr(int __a, const float *__b) { @@ -4735,7 +4735,7 @@ vec_vpkswus(vector unsigned int __a, vector unsigned int __b) // in that the vec_xor can be recognized as a vec_nor (and for P8 and // later, possibly a vec_nand). -vector signed char __ATTRS_o_ai +static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4750,7 +4750,7 @@ vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __ #endif } -vector unsigned char __ATTRS_o_ai +static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) @@ -4767,7 +4767,7 @@ vec_perm(vector unsigned char __a, #endif } -vector bool char __ATTRS_o_ai +static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4782,7 +4782,7 @@ vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) #endif } -vector short __ATTRS_o_ai +static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4797,7 +4797,7 @@ vec_perm(vector short __a, vector short __b, vector unsigned char __c) #endif } -vector unsigned short __ATTRS_o_ai +static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a, vector unsigned short __b, vector unsigned char __c) @@ -4814,7 +4814,7 @@ vec_perm(vector unsigned short __a, #endif } -vector bool short __ATTRS_o_ai +static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4829,7 +4829,7 @@ vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c) #endif } -vector pixel __ATTRS_o_ai +static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4844,7 +4844,7 @@ vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c) #endif } -vector int __ATTRS_o_ai +static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4857,7 +4857,7 @@ vec_perm(vector int __a, vector int __b, vector unsigned char __c) #endif } -vector unsigned int __ATTRS_o_ai +static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4872,7 +4872,7 @@ vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char #endif } -vector bool int __ATTRS_o_ai +static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4887,7 +4887,7 @@ vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) #endif } -vector float __ATTRS_o_ai +static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4903,7 +4903,7 @@ vec_perm(vector float __a, vector float __b, vector unsigned char __c) } #ifdef __VSX__ -vector long long __ATTRS_o_ai +static vector long long __ATTRS_o_ai vec_perm(vector long long __a, vector long long __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -4916,7 +4916,7 @@ vec_perm(vector long long __a, vector long long __b, vector unsigned char __c) #endif } -vector unsigned long long __ATTRS_o_ai +static vector unsigned long long __ATTRS_o_ai vec_perm(vector unsigned long long __a, vector unsigned long long __b, vector unsigned char __c) { @@ -4932,7 +4932,7 @@ vec_perm(vector unsigned long long __a, vector unsigned long long __b, #endif } -vector double __ATTRS_o_ai +static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b, vector unsigned char __c) { #ifdef __LITTLE_ENDIAN__ @@ -8664,11 +8664,11 @@ vec_sum2s(vector int __a, vector int __b) #ifdef __LITTLE_ENDIAN__ vector int __c = (vector signed int) vec_perm(__b, __b, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); __c = __builtin_altivec_vsum2sws(__a, __c); return (vector signed int) vec_perm(__c, __c, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); #else return __builtin_altivec_vsum2sws(__a, __b); #endif @@ -8682,11 +8682,11 @@ vec_vsum2sws(vector int __a, vector int __b) #ifdef __LITTLE_ENDIAN__ vector int __c = (vector signed int) vec_perm(__b, __b, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); __c = __builtin_altivec_vsum2sws(__a, __c); return (vector signed int) vec_perm(__c, __c, (vector unsigned char) - (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); + (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11)); #else return __builtin_altivec_vsum2sws(__a, __b); #endif diff --git a/lib/clang/3.6/include/arm_acle.h b/lib/clang/3.6/include/arm_acle.h index 814df2c..6c56f3b 100644 --- a/lib/clang/3.6/include/arm_acle.h +++ b/lib/clang/3.6/include/arm_acle.h @@ -45,23 +45,23 @@ extern "C" { /* 8.4 Hints */ #if !defined(_MSC_VER) -static __inline__ void __attribute__((always_inline, nodebug)) __wfi(void) { +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { __builtin_arm_wfi(); } -static __inline__ void __attribute__((always_inline, nodebug)) __wfe(void) { +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) { __builtin_arm_wfe(); } -static __inline__ void __attribute__((always_inline, nodebug)) __sev(void) { +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) { __builtin_arm_sev(); } -static __inline__ void __attribute__((always_inline, nodebug)) __sevl(void) { +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) { __builtin_arm_sevl(); } -static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) { +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) { __builtin_arm_yield(); } #endif @@ -71,7 +71,7 @@ static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) { #endif /* 8.5 Swap */ -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t x, volatile uint32_t *p) { uint32_t v; do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p)); @@ -102,28 +102,28 @@ static __inline__ uint32_t __attribute__((always_inline, nodebug)) #endif /* 8.7 NOP */ -static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) { +static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) { __builtin_arm_nop(); } /* 9 DATA-PROCESSING INTRINSICS */ /* 9.2 Miscellaneous data-processing intrinsics */ /* ROR */ -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __ror(uint32_t x, uint32_t y) { y %= 32; if (y == 0) return x; return (x >> y) | (x << (32 - y)); } -static __inline__ uint64_t __attribute__((always_inline, nodebug)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rorll(uint64_t x, uint32_t y) { y %= 64; if (y == 0) return x; return (x >> y) | (x << (64 - y)); } -static __inline__ unsigned long __attribute__((always_inline, nodebug)) +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rorl(unsigned long x, uint32_t y) { #if __SIZEOF_LONG__ == 4 return __ror(x, y); @@ -134,28 +134,28 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug)) /* CLZ */ -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __clz(uint32_t t) { return __builtin_clz(t); } -static __inline__ unsigned long __attribute__((always_inline, nodebug)) +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __clzl(unsigned long t) { return __builtin_clzl(t); } -static __inline__ uint64_t __attribute__((always_inline, nodebug)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __clzll(uint64_t t) { return __builtin_clzll(t); } /* REV */ -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev(uint32_t t) { return __builtin_bswap32(t); } -static __inline__ unsigned long __attribute__((always_inline, nodebug)) +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __revl(unsigned long t) { #if __SIZEOF_LONG__ == 4 return __builtin_bswap32(t); @@ -164,40 +164,40 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug)) #endif } -static __inline__ uint64_t __attribute__((always_inline, nodebug)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __revll(uint64_t t) { return __builtin_bswap64(t); } /* REV16 */ -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rev16(uint32_t t) { return __ror(__rev(t), 16); } -static __inline__ unsigned long __attribute__((always_inline, nodebug)) +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rev16l(unsigned long t) { return __rorl(__revl(t), sizeof(long) / 2); } -static __inline__ uint64_t __attribute__((always_inline, nodebug)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rev16ll(uint64_t t) { return __rorll(__revll(t), 32); } /* REVSH */ -static __inline__ int16_t __attribute__((always_inline, nodebug)) +static __inline__ int16_t __attribute__((__always_inline__, __nodebug__)) __revsh(int16_t t) { return __builtin_bswap16(t); } /* RBIT */ -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __rbit(uint32_t t) { return __builtin_arm_rbit(t); } -static __inline__ uint64_t __attribute__((always_inline, nodebug)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rbitll(uint64_t t) { #if __ARM_32BIT_STATE return (((uint64_t) __builtin_arm_rbit(t)) << 32) | @@ -207,7 +207,7 @@ static __inline__ uint64_t __attribute__((always_inline, nodebug)) #endif } -static __inline__ unsigned long __attribute__((always_inline, nodebug)) +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) __rbitl(unsigned long t) { #if __SIZEOF_LONG__ == 4 return __rbit(t); @@ -230,17 +230,17 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug)) /* 9.4.2 Saturating addition and subtraction intrinsics */ #if __ARM_32BIT_STATE -static __inline__ int32_t __attribute__((always_inline, nodebug)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qadd(int32_t t, int32_t v) { return __builtin_arm_qadd(t, v); } -static __inline__ int32_t __attribute__((always_inline, nodebug)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qsub(int32_t t, int32_t v) { return __builtin_arm_qsub(t, v); } -static __inline__ int32_t __attribute__((always_inline, nodebug)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) __qdbl(int32_t t) { return __builtin_arm_qadd(t, t); } @@ -248,42 +248,42 @@ __qdbl(int32_t t) { /* 9.7 CRC32 intrinsics */ #if __ARM_FEATURE_CRC32 -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32b(uint32_t a, uint8_t b) { return __builtin_arm_crc32b(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32h(uint32_t a, uint16_t b) { return __builtin_arm_crc32h(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32w(uint32_t a, uint32_t b) { return __builtin_arm_crc32w(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32d(uint32_t a, uint64_t b) { return __builtin_arm_crc32d(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32cb(uint32_t a, uint8_t b) { return __builtin_arm_crc32cb(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32ch(uint32_t a, uint16_t b) { return __builtin_arm_crc32ch(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32cw(uint32_t a, uint32_t b) { return __builtin_arm_crc32cw(a, b); } -static __inline__ uint32_t __attribute__((always_inline, nodebug)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __crc32cd(uint32_t a, uint64_t b) { return __builtin_arm_crc32cd(a, b); } diff --git a/lib/clang/3.6/include/avx512bwintrin.h b/lib/clang/3.6/include/avx512bwintrin.h index bc4d4ac..acc3da2 100644 --- a/lib/clang/3.6/include/avx512bwintrin.h +++ b/lib/clang/3.6/include/avx512bwintrin.h @@ -21,13 +21,16 @@ * *===-----------------------------------------------------------------------=== */ +#ifndef __IMMINTRIN_H +#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." +#endif #ifndef __AVX512BWINTRIN_H #define __AVX512BWINTRIN_H typedef unsigned int __mmask32; typedef unsigned long long __mmask64; -typedef char __v64qi __attribute__ ((vector_size (64))); +typedef char __v64qi __attribute__ ((__vector_size__ (64))); typedef short __v32hi __attribute__ ((__vector_size__ (64))); @@ -45,6 +48,18 @@ _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { __u); } +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, + __u); +} + static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, @@ -57,4 +72,296 @@ _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { __u); } +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + __u); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + (__mmask64)-1); +} + +static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { + return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { + return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, + __u); +} + +#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)-1); }) + +#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)(m)); }) + +#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)-1); }) + +#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ + (__v64qi)(__m512i)(b), \ + (p), (__mmask64)(m)); }) + +#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)(m)); }) + +#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ + (__v32hi)(__m512i)(b), \ + (p), (__mmask32)(m)); }) + #endif diff --git a/lib/clang/3.6/include/avx512erintrin.h b/lib/clang/3.6/include/avx512erintrin.h index 1a5ea15..57c61aa 100644 --- a/lib/clang/3.6/include/avx512erintrin.h +++ b/lib/clang/3.6/include/avx512erintrin.h @@ -28,85 +28,259 @@ #define __AVX512ERINTRIN_H +// exp2a23 +#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_exp2a23_pd(A) \ + _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_exp2a23_pd(S, M, A) \ + _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_exp2a23_pd(M, A) \ + _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_exp2a23_ps(A) \ + _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_exp2a23_ps(S, M, A) \ + _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_exp2a23_ps(M, A) \ + _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) + // rsqrt28 -static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) -_mm512_rsqrt28_round_pd (__m512d __A, int __R) -{ - return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A, - (__v8df)_mm512_setzero_pd(), - (__mmask8)-1, - __R); -} -static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) -_mm512_rsqrt28_round_ps(__m512 __A, int __R) -{ - return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A, - (__v16sf)_mm512_setzero_ps(), - (__mmask16)-1, - __R); -} - -static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) -_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) -{ - return (__m128) __builtin_ia32_rsqrt28ss_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1, - __R); -} - -static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) -_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) -{ - return (__m128d) __builtin_ia32_rsqrt28sd_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) -1, - __R); -} +#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_rsqrt28_pd(A) \ + _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rsqrt28_pd(S, M, A) \ + _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rsqrt28_pd(M, A) \ + _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), \ + (__mmask16)(M), (R)); }) + +#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (R)); }) + +#define _mm512_rsqrt28_ps(A) \ + _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rsqrt28_ps(S, M, A) \ + _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rsqrt28_ps(M, A) \ + _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rsqrt28_ss(A, B) \ + _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rsqrt28_ss(S, M, A, B) \ + _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rsqrt28_ss(M, A, B) \ + _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) +#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rsqrt28_sd(A, B) \ + _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rsqrt28_sd(S, M, A, B) \ + _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rsqrt28_sd(M, A, B) \ + _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) // rcp28 -static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) -_mm512_rcp28_round_pd (__m512d __A, int __R) -{ - return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A, - (__v8df)_mm512_setzero_pd(), - (__mmask8)-1, - __R); -} - -static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) -_mm512_rcp28_round_ps (__m512 __A, int __R) -{ - return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A, - (__v16sf)_mm512_setzero_ps (), - (__mmask16)-1, - __R); -} - -static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) -_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) -{ - return (__m128) __builtin_ia32_rcp28ss_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) -1, - __R); -} -static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) -_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) -{ - return (__m128d) __builtin_ia32_rcp28sd_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) -1, - __R); -} +#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \ + (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ + (__v8df)_mm512_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm512_rcp28_pd(A) \ + _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rcp28_pd(S, M, A) \ + _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rcp28_pd(M, A) \ + _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(S), \ + (__mmask16)(M), (R)); }) + +#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \ + (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)(M), (R)); }) + +#define _mm512_rcp28_ps(A) \ + _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_rcp28_ps(S, M, A) \ + _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_maskz_rcp28_ps(M, A) \ + _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \ + (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rcp28_ss(A, B) \ + _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rcp28_ss(S, M, A, B) \ + _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rcp28_ss(M, A, B) \ + _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)-1, (R)); }) + +#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)(__m128d)(S), \ + (__mmask8)(M), (R)); }) + +#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \ + (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), \ + (__v2df)_mm_setzero_pd(), \ + (__mmask8)(M), (R)); }) + +#define _mm_rcp28_sd(A, B) \ + _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_mask_rcp28_sd(S, M, A, B) \ + _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) + +#define _mm_maskz_rcp28_sd(M, A, B) \ + _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) #endif // __AVX512ERINTRIN_H diff --git a/lib/clang/3.6/include/avx512fintrin.h b/lib/clang/3.6/include/avx512fintrin.h index 9591dcf..c6d46cb 100644 --- a/lib/clang/3.6/include/avx512fintrin.h +++ b/lib/clang/3.6/include/avx512fintrin.h @@ -492,20 +492,13 @@ _mm512_abs_epi32(__m512i __A) (__mmask16) -1); } -static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) -_mm512_roundscale_ps(__m512 __A, const int __imm) -{ - return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, - (__v16sf) __A, -1, - _MM_FROUND_CUR_DIRECTION); -} -static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) -_mm512_roundscale_pd(__m512d __A, const int __imm) -{ - return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, - (__v8df) __A, -1, - _MM_FROUND_CUR_DIRECTION); -} +#define _mm512_roundscale_ps(A, B) __extension__ ({ \ + (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \ + -1, _MM_FROUND_CUR_DIRECTION); }) + +#define _mm512_roundscale_pd(A, B) __extension__ ({ \ + (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \ + -1, _MM_FROUND_CUR_DIRECTION); }) static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) @@ -613,25 +606,35 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) (__mmask16) -1); } -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I) -{ - return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A, - (__v8di)__B, - __I, - (__v8di)_mm512_setzero_si512(), - (__mmask8) -1); -} - -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) -{ - return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A, - (__v16si)__B, - __I, - (__v16si)_mm512_setzero_si512(), - (__mmask16) -1); -} +#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ + (I), (__v8di)_mm512_setzero_si512(), \ + (__mmask8)-1); }) + +#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ + (I), (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1); }) + +/* Vector Extract */ + +#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ + __m512d __A = (A); \ + (__m256d) \ + __builtin_ia32_extractf64x4_mask((__v8df)__A, \ + (I), \ + (__v4df)_mm256_setzero_si256(), \ + (__mmask8) -1); }) + +#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ + __m512 __A = (A); \ + (__m128) \ + __builtin_ia32_extractf32x4_mask((__v16sf)__A, \ + (I), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) -1); }) /* Vector Blend */ @@ -669,22 +672,37 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Compare */ -static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p) -{ - return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a, - (__v16sf) b, p, (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); -} +#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (P), (__mmask16)-1, (R)); }) -static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P) -{ - return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, - (__v8df) __Y, __P, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); -} +#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + (P), (__mmask16)(U), (R)); }) + +#define _mm512_cmp_ps_mask(A, B, P) \ + _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ + _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (P), (__mmask8)-1, (R)); }) + +#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ + (__v8df)(__m512d)(B), \ + (P), (__mmask8)(U), (R)); }) + +#define _mm512_cmp_pd_mask(A, B, P) \ + _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) + +#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ + _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) /* Conversion */ @@ -698,25 +716,15 @@ _mm512_cvttps_epu32(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) -_mm512_cvt_roundepi32_ps(__m512i __A, const int __R) -{ - return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - __R); -} +#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) -static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) -_mm512_cvt_roundepu32_ps(__m512i __A, const int __R) -{ - return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - __R); -} +#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) static __inline __m512d __attribute__ (( __always_inline__, __nodebug__)) _mm512_cvtepi32_pd(__m256i __A) @@ -735,25 +743,16 @@ _mm512_cvtepu32_pd(__m256i __A) _mm512_setzero_pd (), (__mmask8) -1); } -static __inline __m256 __attribute__ (( __always_inline__, __nodebug__)) -_mm512_cvt_roundpd_ps(__m512d __A, const int __R) -{ - return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1, - __R); -} -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvtps_ph(__m512 __A, const int __I) -{ - return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, - __I, - (__v16hi) - _mm256_setzero_si256 (), - -1); -} +#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ + (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvtps_ph(A, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \ + (__v16hi)_mm256_setzero_si256(), \ + -1); }) static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) _mm512_cvtph_ps(__m256i __A) @@ -783,60 +782,67 @@ _mm512_cvttpd_epi32(__m512d a) _MM_FROUND_CUR_DIRECTION); } -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1, - __R); -} -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvtt_roundps_epi32(__m512 __A, const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1, - __R); -} +#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (R)); }) -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundps_epi32(__m512 __A, const int __R) +#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8) -1, (R)); }) + +/* Unpack and Interleave */ +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_pd(__m512d __a, __m512d __b) { - return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1, - __R); + return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundpd_epi32(__m512d __A, const int __R) + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_pd(__m512d __a, __m512d __b) { - return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1, - __R); + return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundps_epu32(__m512 __A, const int __R) + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_ps(__m512 __a, __m512 __b) { - return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1, - __R); + return __builtin_shufflevector(__a, __b, + 2, 18, 3, 19, + 2+4, 18+4, 3+4, 19+4, + 2+8, 18+8, 3+8, 19+8, + 2+12, 18+12, 3+12, 19+12); } -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundpd_epu32(__m512d __A, const int __R) + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_ps(__m512 __a, __m512 __b) { - return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1, - __R); + return __builtin_shufflevector(__a, __b, + 0, 16, 1, 17, + 0+4, 16+4, 1+4, 17+4, + 0+8, 16+8, 1+8, 17+8, + 0+12, 16+12, 1+12, 17+12); } /* Bit Test */ @@ -895,12 +901,30 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) (__mmask8) __U); } +static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_load_ps(__mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_load_pd(__mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + static __inline __m512d __attribute__((__always_inline__, __nodebug__)) _mm512_loadu_pd(double const *__p) { struct __loadu_pd { __m512d __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__p)->__v; } @@ -909,10 +933,28 @@ _mm512_loadu_ps(float const *__p) { struct __loadu_ps { __m512 __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_ps*)__p)->__v; } +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_load_ps(double const *__p) +{ + return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_load_pd(float const *__p) +{ + return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + /* SIMD store ops */ static __inline void __attribute__ ((__always_inline__, __nodebug__)) @@ -955,9 +997,9 @@ _mm512_storeu_ps(void *__P, __m512 __A) } static __inline void __attribute__ ((__always_inline__, __nodebug__)) -_mm512_store_ps(void *__P, __m512 __A) +_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) { - *(__m512*)__P = __A; + __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } static __inline void __attribute__ ((__always_inline__, __nodebug__)) @@ -966,6 +1008,19 @@ _mm512_store_pd(void *__P, __m512d __A) *(__m512d*)__P = __A; } +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) +{ + __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, + (__mmask16) __U); +} + +static __inline void __attribute__ ((__always_inline__, __nodebug__)) +_mm512_store_ps(void *__P, __m512 __A) +{ + *(__m512*)__P = __A; +} + /* Mask ops */ static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) @@ -988,6 +1043,18 @@ _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { __u); } +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, + __u); +} + static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, @@ -1000,4 +1067,303 @@ _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { (__mmask8)-1); } +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { + return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { + return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, + __u); +} + +#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)-1); }) + +#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)-1); }) + +#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)-1); }) + +#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)-1); }) + +#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)(m)); }) + +#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \ + (__mmask16)(m)); }) + +#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)(m)); }) + +#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ + __m512i __a = (a); \ + __m512i __b = (b); \ + (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \ + (__mmask8)(m)); }) #endif // __AVX512FINTRIN_H diff --git a/lib/clang/3.6/include/avx512vlbwintrin.h b/lib/clang/3.6/include/avx512vlbwintrin.h index 11333f8..0746f43 100644 --- a/lib/clang/3.6/include/avx512vlbwintrin.h +++ b/lib/clang/3.6/include/avx512vlbwintrin.h @@ -42,6 +42,17 @@ _mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { __u); } +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0, + __u); +} static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) { @@ -55,6 +66,18 @@ _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { __u); } +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0, + __u); +} + static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b, @@ -67,6 +90,17 @@ _mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { __u); } +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0, + __u); +} static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) { @@ -80,4 +114,576 @@ _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { __u); } +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) { + return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + __u); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + (__mmask32)-1); +} + +static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) { + return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + __u); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + (__mmask16)-1); +} + +static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) { + return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4, + __u); +} + +#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)(m)); }) + +#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ + (__v16qi)(__m128i)(b), \ + (p), (__mmask16)(m)); }) + +#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)(m)); }) + +#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)-1); }) + +#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ + (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ + (__v32qi)(__m256i)(b), \ + (p), (__mmask32)(m)); }) + +#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ + (__v8hi)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)(m)); }) + +#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)-1); }) + +#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ + (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ + (__v16hi)(__m256i)(b), \ + (p), (__mmask16)(m)); }) + #endif /* __AVX512VLBWINTRIN_H */ diff --git a/lib/clang/3.6/include/avx512vlintrin.h b/lib/clang/3.6/include/avx512vlintrin.h index 8a374b1..b460992 100644 --- a/lib/clang/3.6/include/avx512vlintrin.h +++ b/lib/clang/3.6/include/avx512vlintrin.h @@ -42,6 +42,17 @@ _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { __u); } +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, + __u); +} static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { @@ -56,6 +67,18 @@ _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { } static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, (__mmask8)-1); @@ -67,6 +90,17 @@ _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { __u); } +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, + __u); +} static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { @@ -80,4 +114,580 @@ _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { __u); } +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, + __u); +} + + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, + __u); +} + + + + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { + return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, + __u); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, + (__mmask8)-1); +} + +static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__)) +_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { + return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, + __u); +} + +#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ + (__v4si)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ + (__v8si)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ + (__v2di)(__m128i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + +#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)-1); }) + +#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ + (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ + (__v4di)(__m256i)(b), \ + (p), (__mmask8)(m)); }) + #endif /* __AVX512VLINTRIN_H */ diff --git a/lib/clang/3.6/include/avxintrin.h b/lib/clang/3.6/include/avxintrin.h index 4e1044a..d7c7f46 100644 --- a/lib/clang/3.6/include/avxintrin.h +++ b/lib/clang/3.6/include/avxintrin.h @@ -257,8 +257,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c) static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_permutevar_ps(__m256 __a, __m256i __c) { - return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, - (__v8si)__c); + return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c); } #define _mm_permute_pd(A, C) __extension__ ({ \ @@ -444,21 +443,21 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); }) static __inline int __attribute__((__always_inline__, __nodebug__)) -_mm256_extract_epi32(__m256i __a, int const __imm) +_mm256_extract_epi32(__m256i __a, const int __imm) { __v8si __b = (__v8si)__a; return __b[__imm & 7]; } static __inline int __attribute__((__always_inline__, __nodebug__)) -_mm256_extract_epi16(__m256i __a, int const __imm) +_mm256_extract_epi16(__m256i __a, const int __imm) { __v16hi __b = (__v16hi)__a; return __b[__imm & 15]; } static __inline int __attribute__((__always_inline__, __nodebug__)) -_mm256_extract_epi8(__m256i __a, int const __imm) +_mm256_extract_epi8(__m256i __a, const int __imm) { __v32qi __b = (__v32qi)__a; return __b[__imm & 31]; @@ -515,7 +514,7 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm) #ifdef __x86_64__ static __inline __m256i __attribute__((__always_inline__, __nodebug__)) -_mm256_insert_epi64(__m256i __a, int __b, int const __imm) +_mm256_insert_epi64(__m256i __a, long long __b, int const __imm) { __v4di __c = (__v4di)__a; __c[__imm & 3] = __b; @@ -785,7 +784,7 @@ _mm256_loadu_pd(double const *__p) { struct __loadu_pd { __m256d __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__p)->__v; } @@ -794,7 +793,7 @@ _mm256_loadu_ps(float const *__p) { struct __loadu_ps { __m256 __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_ps*)__p)->__v; } @@ -809,7 +808,7 @@ _mm256_loadu_si256(__m256i const *__p) { struct __loadu_si256 { __m256i __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_si256*)__p)->__v; } @@ -935,23 +934,23 @@ _mm256_set_pd(double __a, double __b, double __c, double __d) static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_set_ps(float __a, float __b, float __c, float __d, - float __e, float __f, float __g, float __h) + float __e, float __f, float __g, float __h) { return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, - int __i4, int __i5, int __i6, int __i7) + int __i4, int __i5, int __i6, int __i7) { return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, - short __w11, short __w10, short __w09, short __w08, - short __w07, short __w06, short __w05, short __w04, - short __w03, short __w02, short __w01, short __w00) + short __w11, short __w10, short __w09, short __w08, + short __w07, short __w06, short __w05, short __w04, + short __w03, short __w02, short __w01, short __w00) { return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; @@ -959,13 +958,13 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, - char __b27, char __b26, char __b25, char __b24, - char __b23, char __b22, char __b21, char __b20, - char __b19, char __b18, char __b17, char __b16, - char __b15, char __b14, char __b13, char __b12, - char __b11, char __b10, char __b09, char __b08, - char __b07, char __b06, char __b05, char __b04, - char __b03, char __b02, char __b01, char __b00) + char __b27, char __b26, char __b25, char __b24, + char __b23, char __b22, char __b21, char __b20, + char __b19, char __b18, char __b17, char __b16, + char __b15, char __b14, char __b13, char __b12, + char __b11, char __b10, char __b09, char __b08, + char __b07, char __b06, char __b05, char __b04, + char __b03, char __b02, char __b01, char __b00) { return (__m256i)(__v32qi){ __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, @@ -990,23 +989,23 @@ _mm256_setr_pd(double __a, double __b, double __c, double __d) static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_setr_ps(float __a, float __b, float __c, float __d, - float __e, float __f, float __g, float __h) + float __e, float __f, float __g, float __h) { return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, - int __i4, int __i5, int __i6, int __i7) + int __i4, int __i5, int __i6, int __i7) { return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, - short __w11, short __w10, short __w09, short __w08, - short __w07, short __w06, short __w05, short __w04, - short __w03, short __w02, short __w01, short __w00) + short __w11, short __w10, short __w09, short __w08, + short __w07, short __w06, short __w05, short __w04, + short __w03, short __w02, short __w01, short __w00) { return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09, __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; @@ -1014,19 +1013,19 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, - char __b27, char __b26, char __b25, char __b24, - char __b23, char __b22, char __b21, char __b20, - char __b19, char __b18, char __b17, char __b16, - char __b15, char __b14, char __b13, char __b12, - char __b11, char __b10, char __b09, char __b08, - char __b07, char __b06, char __b05, char __b04, - char __b03, char __b02, char __b01, char __b00) + char __b27, char __b26, char __b25, char __b24, + char __b23, char __b22, char __b21, char __b20, + char __b19, char __b18, char __b17, char __b16, + char __b15, char __b14, char __b13, char __b12, + char __b11, char __b10, char __b09, char __b08, + char __b07, char __b06, char __b05, char __b04, + char __b03, char __b02, char __b01, char __b00) { return (__m256i)(__v32qi){ __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24, - __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, - __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, - __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; + __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, + __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, + __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) @@ -1195,7 +1194,7 @@ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) { struct __loadu_si128 { __m128i __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); __m256i __v256 = _mm256_castsi128_si256( ((struct __loadu_si128*)__addr_lo)->__v); return _mm256_insertf128_si256(__v256, diff --git a/lib/clang/3.6/include/emmintrin.h b/lib/clang/3.6/include/emmintrin.h index b3f8569..c764d68 100644 --- a/lib/clang/3.6/include/emmintrin.h +++ b/lib/clang/3.6/include/emmintrin.h @@ -155,148 +155,148 @@ _mm_xor_pd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 0); + return (__m128d)__builtin_ia32_cmpeqpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 1); + return (__m128d)__builtin_ia32_cmpltpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmple_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 2); + return (__m128d)__builtin_ia32_cmplepd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 1); + return (__m128d)__builtin_ia32_cmpltpd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 2); + return (__m128d)__builtin_ia32_cmplepd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 7); + return (__m128d)__builtin_ia32_cmpordpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 3); + return (__m128d)__builtin_ia32_cmpunordpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 4); + return (__m128d)__builtin_ia32_cmpneqpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 5); + return (__m128d)__builtin_ia32_cmpnltpd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__a, __b, 6); + return (__m128d)__builtin_ia32_cmpnlepd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 5); + return (__m128d)__builtin_ia32_cmpnltpd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_pd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmppd(__b, __a, 6); + return (__m128d)__builtin_ia32_cmpnlepd(__b, __a); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0); + return (__m128d)__builtin_ia32_cmpeqsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1); + return (__m128d)__builtin_ia32_cmpltsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmple_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2); + return (__m128d)__builtin_ia32_cmplesd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); + __m128d __c = __builtin_ia32_cmpltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); + __m128d __c = __builtin_ia32_cmplesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7); + return (__m128d)__builtin_ia32_cmpordsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3); + return (__m128d)__builtin_ia32_cmpunordsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4); + return (__m128d)__builtin_ia32_cmpneqsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5); + return (__m128d)__builtin_ia32_cmpnltsd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6); + return (__m128d)__builtin_ia32_cmpnlesd(__a, __b); } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); + __m128d __c = __builtin_ia32_cmpnltsd(__b, __a); return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_sd(__m128d __a, __m128d __b) { - __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); + __m128d __c = __builtin_ia32_cmpnlesd(__b, __a); return (__m128d) { __c[0], __a[1] }; } @@ -489,7 +489,7 @@ _mm_loadu_pd(double const *__dp) { struct __loadu_pd { __m128d __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__dp)->__v; } @@ -825,11 +825,28 @@ _mm_xor_si128(__m128i __a, __m128i __b) return __a ^ __b; } -#define _mm_slli_si128(a, count) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); }) +#define _mm_slli_si128(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \ + (__v16qi)(__m128i)(a), \ + ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); }) + +#define _mm_bslli_si128(a, imm) \ + _mm_slli_si128((a), (imm)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_slli_epi16(__m128i __a, int __count) @@ -891,12 +908,28 @@ _mm_sra_epi32(__m128i __a, __m128i __count) return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); } - -#define _mm_srli_si128(a, count) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); }) +#define _mm_srli_si128(a, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \ + (__v16qi)_mm_setzero_si128(), \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); }) + +#define _mm_bsrli_si128(a, imm) \ + _mm_srli_si128((a), (imm)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_srli_epi16(__m128i __a, int __count) @@ -1070,7 +1103,7 @@ _mm_loadu_si128(__m128i const *__p) { struct __loadu_si128 { __m128i __v; - } __attribute__((packed, may_alias)); + } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_si128*)__p)->__v; } @@ -1284,27 +1317,21 @@ _mm_movemask_epi8(__m128i __a) } #define _mm_shuffle_epi32(a, imm) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \ + (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \ + (__v4si)_mm_set1_epi32(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ + (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ + (__v8hi)_mm_set1_epi16(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 4, 5, 6, 7); }) #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ + (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \ + (__v8hi)_mm_set1_epi16(0), \ 0, 1, 2, 3, \ 4 + (((imm) & 0x03) >> 0), \ 4 + (((imm) & 0x0c) >> 2), \ @@ -1396,11 +1423,8 @@ _mm_movemask_pd(__m128d __a) } #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128d __a = (a); \ - __m128d __b = (b); \ - _Pragma("clang diagnostic pop"); \ - __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); }) + __builtin_shufflevector((__m128d)(a), (__m128d)(b), \ + (i) & 1, (((i) & 2) >> 1) + 2); }) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_castpd_ps(__m128d __a) diff --git a/lib/clang/3.6/include/module.modulemap b/lib/clang/3.6/include/module.modulemap index 3c42477..062464e 100644 --- a/lib/clang/3.6/include/module.modulemap +++ b/lib/clang/3.6/include/module.modulemap @@ -1,4 +1,4 @@ -module _Builtin_intrinsics [system] { +module _Builtin_intrinsics [system] [extern_c] { explicit module altivec { requires altivec header "altivec.h" diff --git a/lib/clang/3.6/include/shaintrin.h b/lib/clang/3.6/include/shaintrin.h index 66ed055..391a4bb 100644 --- a/lib/clang/3.6/include/shaintrin.h +++ b/lib/clang/3.6/include/shaintrin.h @@ -38,37 +38,37 @@ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha1nexte_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha1nexte(__X, __Y); + return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha1msg1_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha1msg1(__X, __Y); + return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha1msg2_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha1msg2(__X, __Y); + return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) { - return __builtin_ia32_sha256rnds2(__X, __Y, __Z); + return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha256msg1_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha256msg1(__X, __Y); + return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y); } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_sha256msg2_epu32(__m128i __X, __m128i __Y) { - return __builtin_ia32_sha256msg2(__X, __Y); + return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y); } #endif /* __SHAINTRIN_H */ diff --git a/lib/clang/3.6/include/unwind.h b/lib/clang/3.6/include/unwind.h index 685c1df..303d792 100644 --- a/lib/clang/3.6/include/unwind.h +++ b/lib/clang/3.6/include/unwind.h @@ -26,8 +26,8 @@ #ifndef __CLANG_UNWIND_H #define __CLANG_UNWIND_H -#if __has_include_next(<unwind.h>) -/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available, +#if defined(__APPLE__) && __has_include_next(<unwind.h>) +/* Darwin (from 11.x on) provide an unwind.h. If that's available, * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * so define that around the include.*/ # ifndef _GNU_SOURCE @@ -199,6 +199,8 @@ _Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); _Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); +_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *); + void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); @@ -233,9 +235,9 @@ void *_Unwind_FindEnclosingFunction(void *); #ifdef __APPLE__ _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) - __attribute__((unavailable)); + __attribute__((__unavailable__)); _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) - __attribute__((unavailable)); + __attribute__((__unavailable__)); /* Darwin-specific functions */ void __register_frame(const void *); @@ -249,15 +251,15 @@ struct dwarf_eh_bases { void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); void __register_frame_info_bases(const void *, void *, void *, void *) - __attribute__((unavailable)); -void __register_frame_info(const void *, void *) __attribute__((unavailable)); + __attribute__((__unavailable__)); +void __register_frame_info(const void *, void *) __attribute__((__unavailable__)); void __register_frame_info_table_bases(const void *, void*, void *, void *) - __attribute__((unavailable)); + __attribute__((__unavailable__)); void __register_frame_info_table(const void *, void *) - __attribute__((unavailable)); -void __register_frame_table(const void *) __attribute__((unavailable)); -void __deregister_frame_info(const void *) __attribute__((unavailable)); -void __deregister_frame_info_bases(const void *)__attribute__((unavailable)); + __attribute__((__unavailable__)); +void __register_frame_table(const void *) __attribute__((__unavailable__)); +void __deregister_frame_info(const void *) __attribute__((__unavailable__)); +void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__)); #else diff --git a/lib/clang/3.6/include/xmmintrin.h b/lib/clang/3.6/include/xmmintrin.h index c9befcb..d1afe81 100644 --- a/lib/clang/3.6/include/xmmintrin.h +++ b/lib/clang/3.6/include/xmmintrin.h @@ -182,153 +182,153 @@ _mm_xor_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 0); + return (__m128)__builtin_ia32_cmpeqss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpeq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 0); + return (__m128)__builtin_ia32_cmpeqps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 1); + return (__m128)__builtin_ia32_cmpltss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmplt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 1); + return (__m128)__builtin_ia32_cmpltps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmple_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 2); + return (__m128)__builtin_ia32_cmpless(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmple_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 2); + return (__m128)__builtin_ia32_cmpleps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 1), + __builtin_ia32_cmpltss(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 1); + return (__m128)__builtin_ia32_cmpltps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 2), + __builtin_ia32_cmpless(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 2); + return (__m128)__builtin_ia32_cmpleps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 4); + return (__m128)__builtin_ia32_cmpneqss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpneq_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 4); + return (__m128)__builtin_ia32_cmpneqps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 5); + return (__m128)__builtin_ia32_cmpnltss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnlt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 5); + return (__m128)__builtin_ia32_cmpnltps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 6); + return (__m128)__builtin_ia32_cmpnless(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnle_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 6); + return (__m128)__builtin_ia32_cmpnleps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 5), + __builtin_ia32_cmpnltss(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 5); + return (__m128)__builtin_ia32_cmpnltps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ss(__m128 __a, __m128 __b) { return (__m128)__builtin_shufflevector(__a, - __builtin_ia32_cmpss(__b, __a, 6), + __builtin_ia32_cmpnless(__b, __a), 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__b, __a, 6); + return (__m128)__builtin_ia32_cmpnleps(__b, __a); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 7); + return (__m128)__builtin_ia32_cmpordss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 7); + return (__m128)__builtin_ia32_cmpordps(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__a, __b, 3); + return (__m128)__builtin_ia32_cmpunordss(__a, __b); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpunord_ps(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpps(__a, __b, 3); + return (__m128)__builtin_ia32_cmpunordps(__a, __b); } static __inline__ int __attribute__((__always_inline__, __nodebug__)) diff --git a/lib/libc++.dylib b/lib/libc++.dylib Binary files differindex 5732e72..ea5dbee 100755 --- a/lib/libc++.dylib +++ b/lib/libc++.dylib |