summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2015-03-30 16:53:30 -0700
committerStephen Hines <srhines@google.com>2015-03-30 16:53:30 -0700
commit15679301bad171fecfd2393d057bdf6152928f77 (patch)
tree20d2bb01cf29af9552cd5bbdb94148a8a1a2698a
parent0d97dd81b09a704f897d300edae3bf96f8c98ec1 (diff)
download3.6-15679301bad171fecfd2393d057bdf6152928f77.tar.gz
Update Darwin clang prebuilts for rebase to r230699.android-wear-5.1.1_r1android-wear-5.1.0_r1
Change-Id: Ieacdf028fae7bc6110746be40ffa4061c975bc7b
-rwxr-xr-xbin/clangbin37083948 -> 39291408 bytes
-rwxr-xr-xbin/llvm-asbin2459684 -> 2888728 bytes
-rwxr-xr-xbin/llvm-linkbin2655772 -> 3117760 bytes
-rw-r--r--lib/clang/3.6/include/Intrin.h35
-rw-r--r--lib/clang/3.6/include/__stddef_max_align_t.h9
-rw-r--r--lib/clang/3.6/include/altivec.h64
-rw-r--r--lib/clang/3.6/include/arm_acle.h68
-rw-r--r--lib/clang/3.6/include/avx512bwintrin.h309
-rw-r--r--lib/clang/3.6/include/avx512erintrin.h326
-rw-r--r--lib/clang/3.6/include/avx512fintrin.h634
-rw-r--r--lib/clang/3.6/include/avx512vlbwintrin.h606
-rw-r--r--lib/clang/3.6/include/avx512vlintrin.h610
-rw-r--r--lib/clang/3.6/include/avxintrin.h73
-rw-r--r--lib/clang/3.6/include/emmintrin.h132
-rw-r--r--lib/clang/3.6/include/module.modulemap2
-rw-r--r--lib/clang/3.6/include/shaintrin.h12
-rw-r--r--lib/clang/3.6/include/unwind.h24
-rw-r--r--lib/clang/3.6/include/xmmintrin.h48
-rwxr-xr-xlib/libc++.dylibbin1470444 -> 1475056 bytes
-rwxr-xr-xlib64/libc++.dylibbin1693488 -> 1694024 bytes
20 files changed, 2529 insertions, 423 deletions
diff --git a/bin/clang b/bin/clang
index c1565e4..6c160d8 100755
--- a/bin/clang
+++ b/bin/clang
Binary files differ
diff --git a/bin/llvm-as b/bin/llvm-as
index c3c70f5..0488bdc 100755
--- a/bin/llvm-as
+++ b/bin/llvm-as
Binary files differ
diff --git a/bin/llvm-link b/bin/llvm-link
index 85eea9d..8e6178f 100755
--- a/bin/llvm-link
+++ b/bin/llvm-link
Binary files differ
diff --git a/lib/clang/3.6/include/Intrin.h b/lib/clang/3.6/include/Intrin.h
index 016c480..727a55e 100644
--- a/lib/clang/3.6/include/Intrin.h
+++ b/lib/clang/3.6/include/Intrin.h
@@ -289,6 +289,7 @@ void _WriteBarrier(void);
unsigned __int32 xbegin(void);
void _xend(void);
static __inline__
+#define _XCR_XFEATURE_ENABLED_MASK 0
unsigned __int64 __cdecl _xgetbv(unsigned int);
void __cdecl _xrstor(void const *, unsigned __int64);
void __cdecl _xsave(void *, unsigned __int64);
@@ -330,7 +331,6 @@ unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
unsigned char _Shift);
static __inline__
void __stosq(unsigned __int64 *, unsigned __int64, size_t);
-unsigned __int64 __umulh(unsigned __int64, unsigned __int64);
unsigned char __vmx_on(unsigned __int64 *);
unsigned char __vmx_vmclear(unsigned __int64 *);
unsigned char __vmx_vmlaunch(void);
@@ -416,10 +416,25 @@ __int64 _sarx_i64(__int64, unsigned int);
int __cdecl _setjmpex(jmp_buf);
#endif
unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);
-unsigned __int64 shrx_u64(unsigned __int64, unsigned int);
-unsigned __int64 _umul128(unsigned __int64 _Multiplier,
- unsigned __int64 _Multiplicand,
- unsigned __int64 *_HighProduct);
+unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);
+/*
+ * Multiply two 64-bit integers and obtain a 64-bit result.
+ * The low-half is returned directly and the high half is in an out parameter.
+ */
+static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+_umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand,
+ unsigned __int64 *_HighProduct) {
+ unsigned __int128 _FullProduct =
+ (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
+ *_HighProduct = _FullProduct >> 64;
+ return _FullProduct;
+}
+static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+__umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) {
+ unsigned __int128 _FullProduct =
+ (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
+ return _FullProduct >> 64;
+}
void __cdecl _xrstor64(void const *, unsigned __int64);
void __cdecl _xsave64(void *, unsigned __int64);
void __cdecl _xsaveopt64(void *, unsigned __int64);
@@ -766,17 +781,17 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination,
\*----------------------------------------------------------------------------*/
#if defined(__i386__) || defined(__x86_64__)
static __inline__ void __attribute__((__always_inline__, __nodebug__))
-__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
+__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_ReadWriteBarrier(void) {
__asm__ volatile ("" : : : "memory");
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
-__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
+__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_ReadBarrier(void) {
__asm__ volatile ("" : : : "memory");
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
-__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
+__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
_WriteBarrier(void) {
__asm__ volatile ("" : : : "memory");
}
@@ -929,14 +944,14 @@ __readmsr(unsigned long __register) {
return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
}
-static __inline__ unsigned long __attribute__((always_inline, __nodebug__))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__readcr3(void) {
unsigned long __cr3_val;
__asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory");
return __cr3_val;
}
-static __inline__ void __attribute__((always_inline, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
__writecr3(unsigned int __cr3_val) {
__asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory");
}
diff --git a/lib/clang/3.6/include/__stddef_max_align_t.h b/lib/clang/3.6/include/__stddef_max_align_t.h
index a06f412..1e10ca9 100644
--- a/lib/clang/3.6/include/__stddef_max_align_t.h
+++ b/lib/clang/3.6/include/__stddef_max_align_t.h
@@ -26,15 +26,18 @@
#ifndef __CLANG_MAX_ALIGN_T_DEFINED
#define __CLANG_MAX_ALIGN_T_DEFINED
-#ifndef _MSC_VER
+#if defined(_MSC_VER)
+typedef double max_align_t;
+#elif defined(__APPLE__)
+typedef long double max_align_t;
+#else
+// Define 'max_align_t' to match the GCC definition.
typedef struct {
long long __clang_max_align_nonce1
__attribute__((__aligned__(__alignof__(long long))));
long double __clang_max_align_nonce2
__attribute__((__aligned__(__alignof__(long double))));
} max_align_t;
-#else
-typedef double max_align_t;
#endif
#endif
diff --git a/lib/clang/3.6/include/altivec.h b/lib/clang/3.6/include/altivec.h
index eded7b2..b8a8869 100644
--- a/lib/clang/3.6/include/altivec.h
+++ b/lib/clang/3.6/include/altivec.h
@@ -2270,7 +2270,7 @@ vec_vlogefp(vector float __a)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const signed char *__b)
{
@@ -2289,7 +2289,7 @@ vec_lvsl(int __a, const signed char *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const unsigned char *__b)
{
@@ -2308,7 +2308,7 @@ vec_lvsl(int __a, const unsigned char *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const short *__b)
{
@@ -2327,7 +2327,7 @@ vec_lvsl(int __a, const short *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const unsigned short *__b)
{
@@ -2346,7 +2346,7 @@ vec_lvsl(int __a, const unsigned short *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const int *__b)
{
@@ -2365,7 +2365,7 @@ vec_lvsl(int __a, const int *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const unsigned int *__b)
{
@@ -2384,7 +2384,7 @@ vec_lvsl(int __a, const unsigned int *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsl(int __a, const float *__b)
{
@@ -2405,7 +2405,7 @@ vec_lvsl(int __a, const float *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const signed char *__b)
{
@@ -2424,7 +2424,7 @@ vec_lvsr(int __a, const signed char *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const unsigned char *__b)
{
@@ -2443,7 +2443,7 @@ vec_lvsr(int __a, const unsigned char *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const short *__b)
{
@@ -2462,7 +2462,7 @@ vec_lvsr(int __a, const short *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const unsigned short *__b)
{
@@ -2481,7 +2481,7 @@ vec_lvsr(int __a, const unsigned short *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const int *__b)
{
@@ -2500,7 +2500,7 @@ vec_lvsr(int __a, const int *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const unsigned int *__b)
{
@@ -2519,7 +2519,7 @@ vec_lvsr(int __a, const unsigned int *__b)
#ifdef __LITTLE_ENDIAN__
static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
loads/stores")))
vec_lvsr(int __a, const float *__b)
{
@@ -4735,7 +4735,7 @@ vec_vpkswus(vector unsigned int __a, vector unsigned int __b)
// in that the vec_xor can be recognized as a vec_nor (and for P8 and
// later, possibly a vec_nand).
-vector signed char __ATTRS_o_ai
+static vector signed char __ATTRS_o_ai
vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4750,7 +4750,7 @@ vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __
#endif
}
-vector unsigned char __ATTRS_o_ai
+static vector unsigned char __ATTRS_o_ai
vec_perm(vector unsigned char __a,
vector unsigned char __b,
vector unsigned char __c)
@@ -4767,7 +4767,7 @@ vec_perm(vector unsigned char __a,
#endif
}
-vector bool char __ATTRS_o_ai
+static vector bool char __ATTRS_o_ai
vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4782,7 +4782,7 @@ vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c)
#endif
}
-vector short __ATTRS_o_ai
+static vector short __ATTRS_o_ai
vec_perm(vector short __a, vector short __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4797,7 +4797,7 @@ vec_perm(vector short __a, vector short __b, vector unsigned char __c)
#endif
}
-vector unsigned short __ATTRS_o_ai
+static vector unsigned short __ATTRS_o_ai
vec_perm(vector unsigned short __a,
vector unsigned short __b,
vector unsigned char __c)
@@ -4814,7 +4814,7 @@ vec_perm(vector unsigned short __a,
#endif
}
-vector bool short __ATTRS_o_ai
+static vector bool short __ATTRS_o_ai
vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4829,7 +4829,7 @@ vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c)
#endif
}
-vector pixel __ATTRS_o_ai
+static vector pixel __ATTRS_o_ai
vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4844,7 +4844,7 @@ vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c)
#endif
}
-vector int __ATTRS_o_ai
+static vector int __ATTRS_o_ai
vec_perm(vector int __a, vector int __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4857,7 +4857,7 @@ vec_perm(vector int __a, vector int __b, vector unsigned char __c)
#endif
}
-vector unsigned int __ATTRS_o_ai
+static vector unsigned int __ATTRS_o_ai
vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4872,7 +4872,7 @@ vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char
#endif
}
-vector bool int __ATTRS_o_ai
+static vector bool int __ATTRS_o_ai
vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4887,7 +4887,7 @@ vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c)
#endif
}
-vector float __ATTRS_o_ai
+static vector float __ATTRS_o_ai
vec_perm(vector float __a, vector float __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4903,7 +4903,7 @@ vec_perm(vector float __a, vector float __b, vector unsigned char __c)
}
#ifdef __VSX__
-vector long long __ATTRS_o_ai
+static vector long long __ATTRS_o_ai
vec_perm(vector long long __a, vector long long __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -4916,7 +4916,7 @@ vec_perm(vector long long __a, vector long long __b, vector unsigned char __c)
#endif
}
-vector unsigned long long __ATTRS_o_ai
+static vector unsigned long long __ATTRS_o_ai
vec_perm(vector unsigned long long __a, vector unsigned long long __b,
vector unsigned char __c)
{
@@ -4932,7 +4932,7 @@ vec_perm(vector unsigned long long __a, vector unsigned long long __b,
#endif
}
-vector double __ATTRS_o_ai
+static vector double __ATTRS_o_ai
vec_perm(vector double __a, vector double __b, vector unsigned char __c)
{
#ifdef __LITTLE_ENDIAN__
@@ -8664,11 +8664,11 @@ vec_sum2s(vector int __a, vector int __b)
#ifdef __LITTLE_ENDIAN__
vector int __c = (vector signed int)
vec_perm(__b, __b, (vector unsigned char)
- (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+ (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
__c = __builtin_altivec_vsum2sws(__a, __c);
return (vector signed int)
vec_perm(__c, __c, (vector unsigned char)
- (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+ (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
#else
return __builtin_altivec_vsum2sws(__a, __b);
#endif
@@ -8682,11 +8682,11 @@ vec_vsum2sws(vector int __a, vector int __b)
#ifdef __LITTLE_ENDIAN__
vector int __c = (vector signed int)
vec_perm(__b, __b, (vector unsigned char)
- (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+ (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
__c = __builtin_altivec_vsum2sws(__a, __c);
return (vector signed int)
vec_perm(__c, __c, (vector unsigned char)
- (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+ (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
#else
return __builtin_altivec_vsum2sws(__a, __b);
#endif
diff --git a/lib/clang/3.6/include/arm_acle.h b/lib/clang/3.6/include/arm_acle.h
index 814df2c..6c56f3b 100644
--- a/lib/clang/3.6/include/arm_acle.h
+++ b/lib/clang/3.6/include/arm_acle.h
@@ -45,23 +45,23 @@ extern "C" {
/* 8.4 Hints */
#if !defined(_MSC_VER)
-static __inline__ void __attribute__((always_inline, nodebug)) __wfi(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
__builtin_arm_wfi();
}
-static __inline__ void __attribute__((always_inline, nodebug)) __wfe(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
__builtin_arm_wfe();
}
-static __inline__ void __attribute__((always_inline, nodebug)) __sev(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
__builtin_arm_sev();
}
-static __inline__ void __attribute__((always_inline, nodebug)) __sevl(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
__builtin_arm_sevl();
}
-static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
__builtin_arm_yield();
}
#endif
@@ -71,7 +71,7 @@ static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) {
#endif
/* 8.5 Swap */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__swp(uint32_t x, volatile uint32_t *p) {
uint32_t v;
do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p));
@@ -102,28 +102,28 @@ static __inline__ uint32_t __attribute__((always_inline, nodebug))
#endif
/* 8.7 NOP */
-static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
__builtin_arm_nop();
}
/* 9 DATA-PROCESSING INTRINSICS */
/* 9.2 Miscellaneous data-processing intrinsics */
/* ROR */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__ror(uint32_t x, uint32_t y) {
y %= 32;
if (y == 0) return x;
return (x >> y) | (x << (32 - y));
}
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rorll(uint64_t x, uint32_t y) {
y %= 64;
if (y == 0) return x;
return (x >> y) | (x << (64 - y));
}
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__rorl(unsigned long x, uint32_t y) {
#if __SIZEOF_LONG__ == 4
return __ror(x, y);
@@ -134,28 +134,28 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug))
/* CLZ */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__clz(uint32_t t) {
return __builtin_clz(t);
}
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__clzl(unsigned long t) {
return __builtin_clzl(t);
}
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__clzll(uint64_t t) {
return __builtin_clzll(t);
}
/* REV */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rev(uint32_t t) {
return __builtin_bswap32(t);
}
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__revl(unsigned long t) {
#if __SIZEOF_LONG__ == 4
return __builtin_bswap32(t);
@@ -164,40 +164,40 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug))
#endif
}
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__revll(uint64_t t) {
return __builtin_bswap64(t);
}
/* REV16 */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rev16(uint32_t t) {
return __ror(__rev(t), 16);
}
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__rev16l(unsigned long t) {
return __rorl(__revl(t), sizeof(long) / 2);
}
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rev16ll(uint64_t t) {
return __rorll(__revll(t), 32);
}
/* REVSH */
-static __inline__ int16_t __attribute__((always_inline, nodebug))
+static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
__revsh(int16_t t) {
return __builtin_bswap16(t);
}
/* RBIT */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__rbit(uint32_t t) {
return __builtin_arm_rbit(t);
}
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
__rbitll(uint64_t t) {
#if __ARM_32BIT_STATE
return (((uint64_t) __builtin_arm_rbit(t)) << 32) |
@@ -207,7 +207,7 @@ static __inline__ uint64_t __attribute__((always_inline, nodebug))
#endif
}
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
__rbitl(unsigned long t) {
#if __SIZEOF_LONG__ == 4
return __rbit(t);
@@ -230,17 +230,17 @@ static __inline__ unsigned long __attribute__((always_inline, nodebug))
/* 9.4.2 Saturating addition and subtraction intrinsics */
#if __ARM_32BIT_STATE
-static __inline__ int32_t __attribute__((always_inline, nodebug))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qadd(int32_t t, int32_t v) {
return __builtin_arm_qadd(t, v);
}
-static __inline__ int32_t __attribute__((always_inline, nodebug))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qsub(int32_t t, int32_t v) {
return __builtin_arm_qsub(t, v);
}
-static __inline__ int32_t __attribute__((always_inline, nodebug))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
__qdbl(int32_t t) {
return __builtin_arm_qadd(t, t);
}
@@ -248,42 +248,42 @@ __qdbl(int32_t t) {
/* 9.7 CRC32 intrinsics */
#if __ARM_FEATURE_CRC32
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32b(uint32_t a, uint8_t b) {
return __builtin_arm_crc32b(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32h(uint32_t a, uint16_t b) {
return __builtin_arm_crc32h(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32w(uint32_t a, uint32_t b) {
return __builtin_arm_crc32w(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32d(uint32_t a, uint64_t b) {
return __builtin_arm_crc32d(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32cb(uint32_t a, uint8_t b) {
return __builtin_arm_crc32cb(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32ch(uint32_t a, uint16_t b) {
return __builtin_arm_crc32ch(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32cw(uint32_t a, uint32_t b) {
return __builtin_arm_crc32cw(a, b);
}
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
__crc32cd(uint32_t a, uint64_t b) {
return __builtin_arm_crc32cd(a, b);
}
diff --git a/lib/clang/3.6/include/avx512bwintrin.h b/lib/clang/3.6/include/avx512bwintrin.h
index bc4d4ac..acc3da2 100644
--- a/lib/clang/3.6/include/avx512bwintrin.h
+++ b/lib/clang/3.6/include/avx512bwintrin.h
@@ -21,13 +21,16 @@
*
*===-----------------------------------------------------------------------===
*/
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
+#endif
#ifndef __AVX512BWINTRIN_H
#define __AVX512BWINTRIN_H
typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;
-typedef char __v64qi __attribute__ ((vector_size (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
@@ -45,6 +48,18 @@ _mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
__u);
}
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
+ __u);
+}
+
static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
@@ -57,4 +72,296 @@ _mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
__u);
}
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+ (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+ return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+ return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+ __u);
+}
+
+#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), \
+ (p), (__mmask64)-1); })
+
+#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), \
+ (p), (__mmask64)(m)); })
+
+#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), \
+ (p), (__mmask64)-1); })
+
+#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), \
+ (p), (__mmask64)(m)); })
+
+#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), \
+ (p), (__mmask32)-1); })
+
+#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), \
+ (p), (__mmask32)(m)); })
+
+#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), \
+ (p), (__mmask32)-1); })
+
+#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), \
+ (p), (__mmask32)(m)); })
+
#endif
diff --git a/lib/clang/3.6/include/avx512erintrin.h b/lib/clang/3.6/include/avx512erintrin.h
index 1a5ea15..57c61aa 100644
--- a/lib/clang/3.6/include/avx512erintrin.h
+++ b/lib/clang/3.6/include/avx512erintrin.h
@@ -28,85 +28,259 @@
#define __AVX512ERINTRIN_H
+// exp2a23
+#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_exp2a23_pd(A) \
+ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(S, M, A) \
+ _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(M, A) \
+ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_exp2a23_ps(A) \
+ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(S, M, A) \
+ _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(M, A) \
+ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
// rsqrt28
-static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
-_mm512_rsqrt28_round_pd (__m512d __A, int __R)
-{
- return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
- (__v8df)_mm512_setzero_pd(),
- (__mmask8)-1,
- __R);
-}
-static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
-_mm512_rsqrt28_round_ps(__m512 __A, int __R)
-{
- return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
- (__v16sf)_mm512_setzero_ps(),
- (__mmask16)-1,
- __R);
-}
-
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
-_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R)
-{
- return (__m128) __builtin_ia32_rsqrt28ss_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1,
- __R);
-}
-
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
-_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
-{
- return (__m128d) __builtin_ia32_rsqrt28sd_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1,
- __R);
-}
+#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_rsqrt28_pd(A) \
+ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(S, M, A) \
+ _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(M, A) \
+ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (R)); })
+
+#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), \
+ (__mmask16)(M), (R)); })
+
+#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (R)); })
+
+#define _mm512_rsqrt28_ps(A) \
+ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(S, M, A) \
+ _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(M, A) \
+ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
+ (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
+ (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
+ (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_rsqrt28_ss(A, B) \
+ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(S, M, A, B) \
+ _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(M, A, B) \
+ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
+ (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
+ (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
+ (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_rsqrt28_sd(A, B) \
+ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(S, M, A, B) \
+ _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(M, A, B) \
+ _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
// rcp28
-static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
-_mm512_rcp28_round_pd (__m512d __A, int __R)
-{
- return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
- (__v8df)_mm512_setzero_pd(),
- (__mmask8)-1,
- __R);
-}
-
-static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
-_mm512_rcp28_round_ps (__m512 __A, int __R)
-{
- return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
- (__v16sf)_mm512_setzero_ps (),
- (__mmask16)-1,
- __R);
-}
-
-static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
-_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
-{
- return (__m128) __builtin_ia32_rcp28ss_mask ((__v4sf) __A,
- (__v4sf) __B,
- (__v4sf)
- _mm_setzero_ps (),
- (__mmask8) -1,
- __R);
-}
-static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
-_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
-{
- return (__m128d) __builtin_ia32_rcp28sd_mask ((__v2df) __A,
- (__v2df) __B,
- (__v2df)
- _mm_setzero_pd (),
- (__mmask8) -1,
- __R);
-}
+#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
+ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm512_rcp28_pd(A) \
+ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(S, M, A) \
+ _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(M, A) \
+ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (R)); })
+
+#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), \
+ (__mmask16)(M), (R)); })
+
+#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (R)); })
+
+#define _mm512_rcp28_ps(A) \
+ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_ps(S, M, A) \
+ _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_ps(M, A) \
+ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
+ (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
+ (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
+ (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_rcp28_ss(A, B) \
+ _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_ss(S, M, A, B) \
+ _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_ss(M, A, B) \
+ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
+ (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
+ (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
+ (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (R)); })
+
+#define _mm_rcp28_sd(A, B) \
+ _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_sd(S, M, A, B) \
+ _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_sd(M, A, B) \
+ _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#endif // __AVX512ERINTRIN_H
diff --git a/lib/clang/3.6/include/avx512fintrin.h b/lib/clang/3.6/include/avx512fintrin.h
index 9591dcf..c6d46cb 100644
--- a/lib/clang/3.6/include/avx512fintrin.h
+++ b/lib/clang/3.6/include/avx512fintrin.h
@@ -492,20 +492,13 @@ _mm512_abs_epi32(__m512i __A)
(__mmask16) -1);
}
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
-_mm512_roundscale_ps(__m512 __A, const int __imm)
-{
- return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
- (__v16sf) __A, -1,
- _MM_FROUND_CUR_DIRECTION);
-}
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
-_mm512_roundscale_pd(__m512d __A, const int __imm)
-{
- return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
- (__v8df) __A, -1,
- _MM_FROUND_CUR_DIRECTION);
-}
+#define _mm512_roundscale_ps(A, B) __extension__ ({ \
+ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
+ -1, _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_roundscale_pd(A, B) __extension__ ({ \
+ (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
+ -1, _MM_FROUND_CUR_DIRECTION); })
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
@@ -613,25 +606,35 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
(__mmask16) -1);
}
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
-{
- return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
- (__v8di)__B,
- __I,
- (__v8di)_mm512_setzero_si512(),
- (__mmask8) -1);
-}
-
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
-{
- return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
- (__v16si)__B,
- __I,
- (__v16si)_mm512_setzero_si512(),
- (__mmask16) -1);
-}
+#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
+ (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (I), (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1); })
+
+#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
+ (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (I), (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1); })
+
+/* Vector Extract */
+
+#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
+ __m512d __A = (A); \
+ (__m256d) \
+ __builtin_ia32_extractf64x4_mask((__v8df)__A, \
+ (I), \
+ (__v4df)_mm256_setzero_si256(), \
+ (__mmask8) -1); })
+
+#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
+ __m512 __A = (A); \
+ (__m128) \
+ __builtin_ia32_extractf32x4_mask((__v16sf)__A, \
+ (I), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8) -1); })
/* Vector Blend */
@@ -669,22 +672,37 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
/* Compare */
-static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
-{
- return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
- (__v16sf) b, p, (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
+#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (P), (__mmask16)-1, (R)); })
-static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
-{
- return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
- (__v8df) __Y, __P,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
-}
+#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (P), (__mmask16)(U), (R)); })
+
+#define _mm512_cmp_ps_mask(A, B, P) \
+ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
+ _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (P), (__mmask8)-1, (R)); })
+
+#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (P), (__mmask8)(U), (R)); })
+
+#define _mm512_cmp_pd_mask(A, B, P) \
+ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
+ _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
/* Conversion */
@@ -698,25 +716,15 @@ _mm512_cvttps_epu32(__m512 __A)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
-_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
-{
- return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) -1,
- __R);
-}
+#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (R)); })
-static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
-_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
-{
- return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
- (__v16sf)
- _mm512_setzero_ps (),
- (__mmask16) -1,
- __R);
-}
+#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (R)); })
static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
_mm512_cvtepi32_pd(__m256i __A)
@@ -735,25 +743,16 @@ _mm512_cvtepu32_pd(__m256i __A)
_mm512_setzero_pd (),
(__mmask8) -1);
}
-static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
-_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
-{
- return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
- (__v8sf)
- _mm256_setzero_ps (),
- (__mmask8) -1,
- __R);
-}
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvtps_ph(__m512 __A, const int __I)
-{
- return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
- __I,
- (__v16hi)
- _mm256_setzero_si256 (),
- -1);
-}
+#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
+ (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm512_cvtps_ph(A, I) __extension__ ({ \
+ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
+ (__v16hi)_mm256_setzero_si256(), \
+ -1); })
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
_mm512_cvtph_ps(__m256i __A)
@@ -783,60 +782,67 @@ _mm512_cvttpd_epi32(__m512d a)
_MM_FROUND_CUR_DIRECTION);
}
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
-{
- return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) -1,
- __R);
-}
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
-{
- return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __R);
-}
+#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
+ (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)-1, (R)); })
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
+#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1, (R)); })
+
+#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1, (R)); })
+
+#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
+ (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)-1, (R)); })
+
+#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
+ (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1, (R)); })
+
+#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
+ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8) -1, (R)); })
+
+/* Unpack and Interleave */
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_unpackhi_pd(__m512d __a, __m512d __b)
{
- return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __R);
+ return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
}
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_unpacklo_pd(__m512d __a, __m512d __b)
{
- return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) -1,
- __R);
+ return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
}
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_unpackhi_ps(__m512 __a, __m512 __b)
{
- return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
- (__v16si)
- _mm512_setzero_si512 (),
- (__mmask16) -1,
- __R);
+ return __builtin_shufflevector(__a, __b,
+ 2, 18, 3, 19,
+ 2+4, 18+4, 3+4, 19+4,
+ 2+8, 18+8, 3+8, 19+8,
+ 2+12, 18+12, 3+12, 19+12);
}
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_unpacklo_ps(__m512 __a, __m512 __b)
{
- return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
- (__v8si)
- _mm256_setzero_si256 (),
- (__mmask8) -1,
- __R);
+ return __builtin_shufflevector(__a, __b,
+ 0, 16, 1, 17,
+ 0+4, 16+4, 1+4, 17+4,
+ 0+8, 16+8, 1+8, 17+8,
+ 0+12, 16+12, 1+12, 17+12);
}
/* Bit Test */
@@ -895,12 +901,30 @@ _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
(__mmask8) __U);
}
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
_mm512_loadu_pd(double const *__p)
{
struct __loadu_pd {
__m512d __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__p)->__v;
}
@@ -909,10 +933,28 @@ _mm512_loadu_ps(float const *__p)
{
struct __loadu_ps {
__m512 __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_ps*)__p)->__v;
}
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_load_ps(double const *__p)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_load_pd(float const *__p)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
/* SIMD store ops */
static __inline void __attribute__ ((__always_inline__, __nodebug__))
@@ -955,9 +997,9 @@ _mm512_storeu_ps(void *__P, __m512 __A)
}
static __inline void __attribute__ ((__always_inline__, __nodebug__))
-_mm512_store_ps(void *__P, __m512 __A)
+_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
{
- *(__m512*)__P = __A;
+ __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
}
static __inline void __attribute__ ((__always_inline__, __nodebug__))
@@ -966,6 +1008,19 @@ _mm512_store_pd(void *__P, __m512d __A)
*(__m512d*)__P = __A;
}
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_store_ps(void *__P, __m512 __A)
+{
+ *(__m512*)__P = __A;
+}
+
/* Mask ops */
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
@@ -988,6 +1043,18 @@ _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
__u);
}
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
+ __u);
+}
+
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
@@ -1000,4 +1067,303 @@ _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
(__mmask8)-1);
}
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+ return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+ __u);
+}
+
+#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)-1); })
+
+#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)-1); })
+
+#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)-1); })
+
+#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)-1); })
+
+#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)(m)); })
+
+#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+ (__mmask16)(m)); })
+
+#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)(m)); })
+
+#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
+ __m512i __a = (a); \
+ __m512i __b = (b); \
+ (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+ (__mmask8)(m)); })
#endif // __AVX512FINTRIN_H
diff --git a/lib/clang/3.6/include/avx512vlbwintrin.h b/lib/clang/3.6/include/avx512vlbwintrin.h
index 11333f8..0746f43 100644
--- a/lib/clang/3.6/include/avx512vlbwintrin.h
+++ b/lib/clang/3.6/include/avx512vlbwintrin.h
@@ -42,6 +42,17 @@ _mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
__u);
}
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
+ __u);
+}
static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
@@ -55,6 +66,18 @@ _mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
__u);
}
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
+ __u);
+}
+
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
@@ -67,6 +90,17 @@ _mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
+ __u);
+}
static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) {
@@ -80,4 +114,576 @@ _mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
__u);
}
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+ return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+ (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+ return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+ (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+ return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+ __u);
+}
+
+#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), \
+ (p), (__mmask16)-1); })
+
+#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), \
+ (p), (__mmask16)(m)); })
+
+#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), \
+ (p), (__mmask16)-1); })
+
+#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), \
+ (p), (__mmask16)(m)); })
+
+#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \
+ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), \
+ (p), (__mmask32)-1); })
+
+#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+ (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), \
+ (p), (__mmask32)(m)); })
+
+#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \
+ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), \
+ (p), (__mmask32)-1); })
+
+#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+ (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), \
+ (p), (__mmask32)(m)); })
+
+#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), \
+ (p), (__mmask16)-1); })
+
+#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), \
+ (p), (__mmask16)(m)); })
+
+#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), \
+ (p), (__mmask16)-1); })
+
+#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+ (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), \
+ (p), (__mmask16)(m)); })
+
#endif /* __AVX512VLBWINTRIN_H */
diff --git a/lib/clang/3.6/include/avx512vlintrin.h b/lib/clang/3.6/include/avx512vlintrin.h
index 8a374b1..b460992 100644
--- a/lib/clang/3.6/include/avx512vlintrin.h
+++ b/lib/clang/3.6/include/avx512vlintrin.h
@@ -42,6 +42,17 @@ _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
+ __u);
+}
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
@@ -56,6 +67,18 @@ _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
}
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
(__mmask8)-1);
@@ -67,6 +90,17 @@ _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
__u);
}
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
+ __u);
+}
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
@@ -80,4 +114,580 @@ _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
__u);
}
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
+ __u);
+}
+
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+ __u);
+}
+
+
+
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+ __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+ (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+ return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+ __u);
+}
+
+#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), \
+ (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), \
+ (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
+ (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), \
+ (p), (__mmask8)(m)); })
+
#endif /* __AVX512VLINTRIN_H */
diff --git a/lib/clang/3.6/include/avxintrin.h b/lib/clang/3.6/include/avxintrin.h
index 4e1044a..d7c7f46 100644
--- a/lib/clang/3.6/include/avxintrin.h
+++ b/lib/clang/3.6/include/avxintrin.h
@@ -257,8 +257,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_permutevar_ps(__m256 __a, __m256i __c)
{
- return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a,
- (__v8si)__c);
+ return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
}
#define _mm_permute_pd(A, C) __extension__ ({ \
@@ -444,21 +443,21 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
(__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); })
static __inline int __attribute__((__always_inline__, __nodebug__))
-_mm256_extract_epi32(__m256i __a, int const __imm)
+_mm256_extract_epi32(__m256i __a, const int __imm)
{
__v8si __b = (__v8si)__a;
return __b[__imm & 7];
}
static __inline int __attribute__((__always_inline__, __nodebug__))
-_mm256_extract_epi16(__m256i __a, int const __imm)
+_mm256_extract_epi16(__m256i __a, const int __imm)
{
__v16hi __b = (__v16hi)__a;
return __b[__imm & 15];
}
static __inline int __attribute__((__always_inline__, __nodebug__))
-_mm256_extract_epi8(__m256i __a, int const __imm)
+_mm256_extract_epi8(__m256i __a, const int __imm)
{
__v32qi __b = (__v32qi)__a;
return __b[__imm & 31];
@@ -515,7 +514,7 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm)
#ifdef __x86_64__
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
-_mm256_insert_epi64(__m256i __a, int __b, int const __imm)
+_mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
{
__v4di __c = (__v4di)__a;
__c[__imm & 3] = __b;
@@ -785,7 +784,7 @@ _mm256_loadu_pd(double const *__p)
{
struct __loadu_pd {
__m256d __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__p)->__v;
}
@@ -794,7 +793,7 @@ _mm256_loadu_ps(float const *__p)
{
struct __loadu_ps {
__m256 __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_ps*)__p)->__v;
}
@@ -809,7 +808,7 @@ _mm256_loadu_si256(__m256i const *__p)
{
struct __loadu_si256 {
__m256i __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_si256*)__p)->__v;
}
@@ -935,23 +934,23 @@ _mm256_set_pd(double __a, double __b, double __c, double __d)
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_set_ps(float __a, float __b, float __c, float __d,
- float __e, float __f, float __g, float __h)
+ float __e, float __f, float __g, float __h)
{
return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
}
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
- int __i4, int __i5, int __i6, int __i7)
+ int __i4, int __i5, int __i6, int __i7)
{
return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
}
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
- short __w11, short __w10, short __w09, short __w08,
- short __w07, short __w06, short __w05, short __w04,
- short __w03, short __w02, short __w01, short __w00)
+ short __w11, short __w10, short __w09, short __w08,
+ short __w07, short __w06, short __w05, short __w04,
+ short __w03, short __w02, short __w01, short __w00)
{
return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
__w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
@@ -959,13 +958,13 @@ _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
- char __b27, char __b26, char __b25, char __b24,
- char __b23, char __b22, char __b21, char __b20,
- char __b19, char __b18, char __b17, char __b16,
- char __b15, char __b14, char __b13, char __b12,
- char __b11, char __b10, char __b09, char __b08,
- char __b07, char __b06, char __b05, char __b04,
- char __b03, char __b02, char __b01, char __b00)
+ char __b27, char __b26, char __b25, char __b24,
+ char __b23, char __b22, char __b21, char __b20,
+ char __b19, char __b18, char __b17, char __b16,
+ char __b15, char __b14, char __b13, char __b12,
+ char __b11, char __b10, char __b09, char __b08,
+ char __b07, char __b06, char __b05, char __b04,
+ char __b03, char __b02, char __b01, char __b00)
{
return (__m256i)(__v32qi){
__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
@@ -990,23 +989,23 @@ _mm256_setr_pd(double __a, double __b, double __c, double __d)
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_setr_ps(float __a, float __b, float __c, float __d,
- float __e, float __f, float __g, float __h)
+ float __e, float __f, float __g, float __h)
{
return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
}
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
- int __i4, int __i5, int __i6, int __i7)
+ int __i4, int __i5, int __i6, int __i7)
{
return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
}
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
- short __w11, short __w10, short __w09, short __w08,
- short __w07, short __w06, short __w05, short __w04,
- short __w03, short __w02, short __w01, short __w00)
+ short __w11, short __w10, short __w09, short __w08,
+ short __w07, short __w06, short __w05, short __w04,
+ short __w03, short __w02, short __w01, short __w00)
{
return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
__w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
@@ -1014,19 +1013,19 @@ _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
- char __b27, char __b26, char __b25, char __b24,
- char __b23, char __b22, char __b21, char __b20,
- char __b19, char __b18, char __b17, char __b16,
- char __b15, char __b14, char __b13, char __b12,
- char __b11, char __b10, char __b09, char __b08,
- char __b07, char __b06, char __b05, char __b04,
- char __b03, char __b02, char __b01, char __b00)
+ char __b27, char __b26, char __b25, char __b24,
+ char __b23, char __b22, char __b21, char __b20,
+ char __b19, char __b18, char __b17, char __b16,
+ char __b15, char __b14, char __b13, char __b12,
+ char __b11, char __b10, char __b09, char __b08,
+ char __b07, char __b06, char __b05, char __b04,
+ char __b03, char __b02, char __b01, char __b00)
{
return (__m256i)(__v32qi){
__b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
- __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
- __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
- __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
+ __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
+ __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
+ __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
}
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
@@ -1195,7 +1194,7 @@ _mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)
{
struct __loadu_si128 {
__m128i __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
__m256i __v256 = _mm256_castsi128_si256(
((struct __loadu_si128*)__addr_lo)->__v);
return _mm256_insertf128_si256(__v256,
diff --git a/lib/clang/3.6/include/emmintrin.h b/lib/clang/3.6/include/emmintrin.h
index b3f8569..c764d68 100644
--- a/lib/clang/3.6/include/emmintrin.h
+++ b/lib/clang/3.6/include/emmintrin.h
@@ -155,148 +155,148 @@ _mm_xor_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
+ return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
+ return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
+ return (__m128d)__builtin_ia32_cmplepd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
+ return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
+ return (__m128d)__builtin_ia32_cmplepd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
+ return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
+ return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
+ return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
+ return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
+ return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
+ return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_pd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
+ return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
+ return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
+ return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
+ return (__m128d)__builtin_ia32_cmplesd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
+ __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
+ __m128d __c = __builtin_ia32_cmplesd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
+ return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
+ return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
+ return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
+ return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_sd(__m128d __a, __m128d __b)
{
- return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
+ return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
+ __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_sd(__m128d __a, __m128d __b)
{
- __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
+ __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
return (__m128d) { __c[0], __a[1] };
}
@@ -489,7 +489,7 @@ _mm_loadu_pd(double const *__dp)
{
struct __loadu_pd {
__m128d __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_pd*)__dp)->__v;
}
@@ -825,11 +825,28 @@ _mm_xor_si128(__m128i __a, __m128i __b)
return __a ^ __b;
}
-#define _mm_slli_si128(a, count) __extension__ ({ \
- _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
- __m128i __a = (a); \
- _Pragma("clang diagnostic pop"); \
- (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
+#define _mm_slli_si128(a, imm) __extension__ ({ \
+ (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \
+ (__v16qi)(__m128i)(a), \
+ ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
+ ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
+
+#define _mm_bslli_si128(a, imm) \
+ _mm_slli_si128((a), (imm))
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_slli_epi16(__m128i __a, int __count)
@@ -891,12 +908,28 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
}
-
-#define _mm_srli_si128(a, count) __extension__ ({ \
- _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
- __m128i __a = (a); \
- _Pragma("clang diagnostic pop"); \
- (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
+#define _mm_srli_si128(a, imm) __extension__ ({ \
+ (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \
+ (__v16qi)_mm_setzero_si128(), \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
+ ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
+
+#define _mm_bsrli_si128(a, imm) \
+ _mm_srli_si128((a), (imm))
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srli_epi16(__m128i __a, int __count)
@@ -1070,7 +1103,7 @@ _mm_loadu_si128(__m128i const *__p)
{
struct __loadu_si128 {
__m128i __v;
- } __attribute__((packed, may_alias));
+ } __attribute__((__packed__, __may_alias__));
return ((struct __loadu_si128*)__p)->__v;
}
@@ -1284,27 +1317,21 @@ _mm_movemask_epi8(__m128i __a)
}
#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
- _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
- __m128i __a = (a); \
- _Pragma("clang diagnostic pop"); \
- (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
+ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
+ (__v4si)_mm_set1_epi32(0), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
- _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
- __m128i __a = (a); \
- _Pragma("clang diagnostic pop"); \
- (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
+ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
+ (__v8hi)_mm_set1_epi16(0), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4, 5, 6, 7); })
#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
- _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
- __m128i __a = (a); \
- _Pragma("clang diagnostic pop"); \
- (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
+ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
+ (__v8hi)_mm_set1_epi16(0), \
0, 1, 2, 3, \
4 + (((imm) & 0x03) >> 0), \
4 + (((imm) & 0x0c) >> 2), \
@@ -1396,11 +1423,8 @@ _mm_movemask_pd(__m128d __a)
}
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
- _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
- __m128d __a = (a); \
- __m128d __b = (b); \
- _Pragma("clang diagnostic pop"); \
- __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
+ __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
+ (i) & 1, (((i) & 2) >> 1) + 2); })
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_castpd_ps(__m128d __a)
diff --git a/lib/clang/3.6/include/module.modulemap b/lib/clang/3.6/include/module.modulemap
index 3c42477..062464e 100644
--- a/lib/clang/3.6/include/module.modulemap
+++ b/lib/clang/3.6/include/module.modulemap
@@ -1,4 +1,4 @@
-module _Builtin_intrinsics [system] {
+module _Builtin_intrinsics [system] [extern_c] {
explicit module altivec {
requires altivec
header "altivec.h"
diff --git a/lib/clang/3.6/include/shaintrin.h b/lib/clang/3.6/include/shaintrin.h
index 66ed055..391a4bb 100644
--- a/lib/clang/3.6/include/shaintrin.h
+++ b/lib/clang/3.6/include/shaintrin.h
@@ -38,37 +38,37 @@
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
{
- return __builtin_ia32_sha1nexte(__X, __Y);
+ return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)
{
- return __builtin_ia32_sha1msg1(__X, __Y);
+ return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)
{
- return __builtin_ia32_sha1msg2(__X, __Y);
+ return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)
{
- return __builtin_ia32_sha256rnds2(__X, __Y, __Z);
+ return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)
{
- return __builtin_ia32_sha256msg1(__X, __Y);
+ return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)
{
- return __builtin_ia32_sha256msg2(__X, __Y);
+ return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);
}
#endif /* __SHAINTRIN_H */
diff --git a/lib/clang/3.6/include/unwind.h b/lib/clang/3.6/include/unwind.h
index 685c1df..303d792 100644
--- a/lib/clang/3.6/include/unwind.h
+++ b/lib/clang/3.6/include/unwind.h
@@ -26,8 +26,8 @@
#ifndef __CLANG_UNWIND_H
#define __CLANG_UNWIND_H
-#if __has_include_next(<unwind.h>)
-/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available,
+#if defined(__APPLE__) && __has_include_next(<unwind.h>)
+/* Darwin (from 11.x on) provide an unwind.h. If that's available,
* use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
* so define that around the include.*/
# ifndef _GNU_SOURCE
@@ -199,6 +199,8 @@ _Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);
_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);
+_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);
+
void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
@@ -233,9 +235,9 @@ void *_Unwind_FindEnclosingFunction(void *);
#ifdef __APPLE__
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)
- __attribute__((unavailable));
+ __attribute__((__unavailable__));
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)
- __attribute__((unavailable));
+ __attribute__((__unavailable__));
/* Darwin-specific functions */
void __register_frame(const void *);
@@ -249,15 +251,15 @@ struct dwarf_eh_bases {
void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);
void __register_frame_info_bases(const void *, void *, void *, void *)
- __attribute__((unavailable));
-void __register_frame_info(const void *, void *) __attribute__((unavailable));
+ __attribute__((__unavailable__));
+void __register_frame_info(const void *, void *) __attribute__((__unavailable__));
void __register_frame_info_table_bases(const void *, void*, void *, void *)
- __attribute__((unavailable));
+ __attribute__((__unavailable__));
void __register_frame_info_table(const void *, void *)
- __attribute__((unavailable));
-void __register_frame_table(const void *) __attribute__((unavailable));
-void __deregister_frame_info(const void *) __attribute__((unavailable));
-void __deregister_frame_info_bases(const void *)__attribute__((unavailable));
+ __attribute__((__unavailable__));
+void __register_frame_table(const void *) __attribute__((__unavailable__));
+void __deregister_frame_info(const void *) __attribute__((__unavailable__));
+void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));
#else
diff --git a/lib/clang/3.6/include/xmmintrin.h b/lib/clang/3.6/include/xmmintrin.h
index c9befcb..d1afe81 100644
--- a/lib/clang/3.6/include/xmmintrin.h
+++ b/lib/clang/3.6/include/xmmintrin.h
@@ -182,153 +182,153 @@ _mm_xor_ps(__m128 __a, __m128 __b)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
+ return (__m128)__builtin_ia32_cmpeqss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
+ return (__m128)__builtin_ia32_cmpeqps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
+ return (__m128)__builtin_ia32_cmpltss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
+ return (__m128)__builtin_ia32_cmpltps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmple_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
+ return (__m128)__builtin_ia32_cmpless(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmple_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
+ return (__m128)__builtin_ia32_cmpleps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 1),
+ __builtin_ia32_cmpltss(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
+ return (__m128)__builtin_ia32_cmpltps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 2),
+ __builtin_ia32_cmpless(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
+ return (__m128)__builtin_ia32_cmpleps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
+ return (__m128)__builtin_ia32_cmpneqss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
+ return (__m128)__builtin_ia32_cmpneqps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
+ return (__m128)__builtin_ia32_cmpnltss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
+ return (__m128)__builtin_ia32_cmpnltps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
+ return (__m128)__builtin_ia32_cmpnless(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
+ return (__m128)__builtin_ia32_cmpnleps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 5),
+ __builtin_ia32_cmpnltss(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
+ return (__m128)__builtin_ia32_cmpnltps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ss(__m128 __a, __m128 __b)
{
return (__m128)__builtin_shufflevector(__a,
- __builtin_ia32_cmpss(__b, __a, 6),
+ __builtin_ia32_cmpnless(__b, __a),
4, 1, 2, 3);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
+ return (__m128)__builtin_ia32_cmpnleps(__b, __a);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
+ return (__m128)__builtin_ia32_cmpordss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
+ return (__m128)__builtin_ia32_cmpordps(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_ss(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
+ return (__m128)__builtin_ia32_cmpunordss(__a, __b);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_ps(__m128 __a, __m128 __b)
{
- return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
+ return (__m128)__builtin_ia32_cmpunordps(__a, __b);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
diff --git a/lib/libc++.dylib b/lib/libc++.dylib
index 5732e72..ea5dbee 100755
--- a/lib/libc++.dylib
+++ b/lib/libc++.dylib
Binary files differ
diff --git a/lib64/libc++.dylib b/lib64/libc++.dylib
index 300f9c7..2b732f3 100755
--- a/lib64/libc++.dylib
+++ b/lib64/libc++.dylib
Binary files differ