aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clang/Basic/BuiltinsAArch64.def7
-rw-r--r--include/clang/Basic/TargetBuiltins.h3
-rw-r--r--include/clang/Basic/arm_neon.td110
-rw-r--r--include/clang/Sema/Sema.h2
-rw-r--r--lib/AST/ItaniumMangle.cpp75
-rw-r--r--lib/Basic/Targets.cpp39
-rw-r--r--lib/CodeGen/CGBuiltin.cpp197
-rw-r--r--lib/Driver/Tools.cpp14
-rw-r--r--lib/Driver/Tools.h2
-rw-r--r--lib/Sema/SemaChecking.cpp87
-rw-r--r--lib/Sema/SemaType.cpp47
-rw-r--r--test/CodeGen/aarch64-neon-intrinsics.c3023
-rw-r--r--test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp85
-rw-r--r--test/CodeGenCXX/mangle-neon-vectors.cpp17
-rw-r--r--test/Preprocessor/aarch64-target-features.c3
-rw-r--r--test/Sema/aarch64-neon-vector-types.c34
-rw-r--r--utils/TableGen/NeonEmitter.cpp574
17 files changed, 4169 insertions, 150 deletions
diff --git a/include/clang/Basic/BuiltinsAArch64.def b/include/clang/Basic/BuiltinsAArch64.def
index 768e4bb26c..aafd202aae 100644
--- a/include/clang/Basic/BuiltinsAArch64.def
+++ b/include/clang/Basic/BuiltinsAArch64.def
@@ -16,3 +16,10 @@
// In libgcc
BUILTIN(__clear_cache, "vv*v*", "i")
+// NEON
+#define GET_NEON_AARCH64_BUILTINS
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_AARCH64_BUILTINS
+#undef GET_NEON_BUILTINS
+
+#undef BUILTIN
diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h
index 66e378fa9b..4202a4a502 100644
--- a/include/clang/Basic/TargetBuiltins.h
+++ b/include/clang/Basic/TargetBuiltins.h
@@ -91,7 +91,8 @@ namespace clang {
Poly8,
Poly16,
Float16,
- Float32
+ Float32,
+ Float64
};
NeonTypeFlags(unsigned F) : Flags(F) {}
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td
index 77bc797c50..ea03e8fe8a 100644
--- a/include/clang/Basic/arm_neon.td
+++ b/include/clang/Basic/arm_neon.td
@@ -69,6 +69,7 @@ def OP_REINT : Op;
def OP_ABDL : Op;
def OP_ABA : Op;
def OP_ABAL : Op;
+def OP_DIV : Op;
class Inst <string n, string p, string t, Op o> {
string Name = n;
@@ -77,6 +78,7 @@ class Inst <string n, string p, string t, Op o> {
Op Operand = o;
bit isShift = 0;
bit isVCVT_N = 0;
+ bit isA64 = 0;
// Certain intrinsics have different names than their representative
// instructions. This field allows us to handle this correctly when we
@@ -145,6 +147,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
// l: long
// f: float
// h: half-float
+// d: double
// size modifiers:
// U: unsigned
@@ -452,3 +455,110 @@ def VREINTERPRET
// Vector fused multiply-add operations
def VFMA : SInst<"vfma", "dddd", "fQf">;
+
+////////////////////////////////////////////////////////////////////////////////
+// AArch64 Intrinsics
+
+let isA64 = 1 in {
+
+////////////////////////////////////////////////////////////////////////////////
+// Addition
+// With additional Qd type.
+def ADD : IOpInst<"vadd", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_ADD>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Subtraction
+// With additional Qd type.
+def SUB : IOpInst<"vsub", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_SUB>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiplication
+// With additional Qd type.
+def MUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>;
+def MLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>;
+def MLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Multiplication Extended
+def MULX : SInst<"vmulx", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Division
+def FDIV : IOpInst<"vdiv", "ddd", "fQfQd", OP_DIV>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector fused multiply-add operations
+// With additional Qd type.
+def FMLA : SInst<"vfma", "dddd", "fQfQd">;
+def FMLS : SInst<"vfms", "dddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Logical operations
+// With additional Qd type.
+def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Absolute Difference
+// With additional Qd type.
+def ABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Reciprocal/Sqrt
+// With additional Qd type.
+def FRECPS : IInst<"vrecps", "ddd", "fQfQd">;
+def FRSQRTS : IInst<"vrsqrts", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Comparison
+// With additional Qd type.
+def FCAGE : IInst<"vcage", "udd", "fQfQd">;
+def FCAGT : IInst<"vcagt", "udd", "fQfQd">;
+def FCALE : IInst<"vcale", "udd", "fQfQd">;
+def FCALT : IInst<"vcalt", "udd", "fQfQd">;
+// With additional Ql, QUl, Qd types.
+def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcQcQsQiQlQUcQUsQUiQUlQPc">;
+def CFMEQ : SOpInst<"vceq", "udd",
+ "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>;
+def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>;
+def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>;
+def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>;
+def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Max/Min Integer
+// With additional Qd type.
+def MAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// MaxNum/MinNum Floating Point
+def FMAXNM : SInst<"vmaxnm", "ddd", "fQfQd">;
+def FMINNM : SInst<"vminnm", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pairwise Max/Min
+// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
+def MAXP : SInst<"vpmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+def MINP : SInst<"vpmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pairwise MaxNum/MinNum Floating Point
+def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">;
+def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Pairwise Addition
+// With additional Qc Qs Qi QUc QUs QUi Qf Qd types.
+def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Arithmetic
+
+// Scalar Addition
+
+def SCALAR_ADD : Inst<"vaddd", "ddd", "lUl", OP_ADD>;
+
+// Scalar Subtraction
+def SCALAR_SUB : Inst<"vsubd", "ddd", "lUl", OP_SUB>;
+
+}
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 752dd985a9..d14b38eb0c 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -7557,7 +7557,7 @@ private:
bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
-
+ bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool SemaBuiltinVAStart(CallExpr *TheCall);
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp
index 76a8bf4ecc..7016ee1d65 100644
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -360,6 +360,7 @@ private:
void mangleBareFunctionType(const FunctionType *T,
bool MangleReturnType);
void mangleNeonVectorType(const VectorType *T);
+ void mangleAArch64NeonVectorType(const VectorType *T);
void mangleIntegerLiteral(QualType T, const llvm::APSInt &Value);
void mangleMemberExpr(const Expr *base, bool isArrow,
@@ -2174,7 +2175,9 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
case BuiltinType::LongLong: EltName = "int64_t"; break;
case BuiltinType::ULongLong: EltName = "uint64_t"; break;
case BuiltinType::Float: EltName = "float32_t"; break;
- default: llvm_unreachable("unexpected Neon vector element type");
+ case BuiltinType::Half: EltName = "float16_t";break;
+ default:
+ llvm_unreachable("unexpected Neon vector element type");
}
}
const char *BaseName = 0;
@@ -2190,6 +2193,70 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
Out << BaseName << EltName;
}
+static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) {
+ switch (EltType->getKind()) {
+ case BuiltinType::SChar:
+ return "Int8";
+ case BuiltinType::Short:
+ return "Int16";
+ case BuiltinType::Int:
+ return "Int32";
+ case BuiltinType::LongLong:
+ return "Int64";
+ case BuiltinType::UChar:
+ return "Uint8";
+ case BuiltinType::UShort:
+ return "Uint16";
+ case BuiltinType::UInt:
+ return "Uint32";
+ case BuiltinType::ULongLong:
+ return "Uint64";
+ case BuiltinType::Half:
+ return "Float16";
+ case BuiltinType::Float:
+ return "Float32";
+ case BuiltinType::Double:
+ return "Float64";
+ default:
+ llvm_unreachable("Unexpected vector element base type");
+ }
+}
+
+// AArch64's ABI for Neon vector types specifies that they should be mangled as
+// the equivalent internal name. The vector type must be one of the special
+// types predefined by ARM.
+void CXXNameMangler::mangleAArch64NeonVectorType(const VectorType *T) {
+ QualType EltType = T->getElementType();
+ assert(EltType->isBuiltinType() && "Neon vector element not a BuiltinType");
+ unsigned BitSize =
+ (T->getNumElements() * getASTContext().getTypeSize(EltType));
+
+ assert((BitSize == 64 || BitSize == 128) &&
+ "Neon vector type not 64 or 128 bits");
+
+ assert(getASTContext().getTypeSize(EltType) != BitSize &&
+ "Vector of 1 element not permitted");
+
+ StringRef EltName;
+ if (T->getVectorKind() == VectorType::NeonPolyVector) {
+ switch (cast<BuiltinType>(EltType)->getKind()) {
+ case BuiltinType::UChar:
+ EltName = "Poly8";
+ break;
+ case BuiltinType::UShort:
+ EltName = "Poly16";
+ break;
+ default:
+ llvm_unreachable("unexpected Neon polynomial vector element type");
+ }
+ } else
+ EltName = mangleAArch64VectorBase(cast<BuiltinType>(EltType));
+
+ std::string TypeName =
+ ("__" + EltName + "x" + llvm::utostr(T->getNumElements()) + "_t").str();
+ Out << TypeName.length() << TypeName;
+}
+
// GNU extension: vector types
// <type> ::= <vector-type>
// <vector-type> ::= Dv <positive dimension number> _
@@ -2201,7 +2268,11 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) {
void CXXNameMangler::mangleType(const VectorType *T) {
if ((T->getVectorKind() == VectorType::NeonVector ||
T->getVectorKind() == VectorType::NeonPolyVector)) {
- mangleNeonVectorType(T);
+ if (getASTContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::aarch64)
+ mangleAArch64NeonVectorType(T);
+ else
+ mangleNeonVectorType(T);
return;
}
Out << "Dv" << T->getNumElements() << '_';
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 596eb8cb16..718f3bb223 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -3177,7 +3177,14 @@ class AArch64TargetInfo : public TargetInfo {
static const char * const GCCRegNames[];
static const TargetInfo::GCCRegAlias GCCRegAliases[];
+ enum FPUModeEnum {
+ FPUMode,
+ NeonMode
+ };
+
+ unsigned FPU;
static const Builtin::Info BuiltinInfo[];
+
public:
AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
BigEndian = false;
@@ -3242,7 +3249,14 @@ public:
Opts.ShortEnums ? "1" : "4");
if (BigEndian)
- Builder.defineMacro("__ARM_BIG_ENDIAN");
+ Builder.defineMacro("__AARCH_BIG_ENDIAN");
+
+ if (FPU == NeonMode) {
+ Builder.defineMacro("__AARCH_FEATURE_ADVSIMD");
+
+ // 64-bit NEON supports half, single and double precision operations.
+ Builder.defineMacro("__AARCH_ADVSIMD_FP", "0xe");
+ }
}
virtual void getTargetBuiltins(const Builtin::Info *&Records,
unsigned &NumRecords) const {
@@ -3250,9 +3264,28 @@ public:
NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin;
}
virtual bool hasFeature(StringRef Feature) const {
- return Feature == "aarch64";
+ return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode);
}
- virtual void getGCCRegNames(const char * const *&Names,
+
+ virtual bool setFeatureEnabled(llvm::StringMap<bool> &Features,
+ StringRef Name, bool Enabled) const {
+ if (Name == "neon") {
+ Features[Name] = Enabled;
+ return true;
+ }
+
+ return false;
+ }
+
+ virtual void HandleTargetFeatures(std::vector<std::string> &Features) {
+ FPU = FPUMode;
+ for (unsigned i = 0, e = Features.size(); i != e; ++i) {
+ if (Features[i] == "+neon")
+ FPU = NeonMode;
+ }
+ }
+
+ virtual void getGCCRegNames(const char *const *&Names,
unsigned &NumNames) const;
virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
unsigned &NumAliases) const;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 5b41237585..d1dd7a0958 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1614,6 +1614,8 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
case NeonTypeFlags::Float32:
return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
+ case NeonTypeFlags::Float64:
+ return llvm::VectorType::get(CGF->DoubleTy, 1 << IsQuad);
}
llvm_unreachable("Invalid NeonTypeFlags element type!");
}
@@ -1718,7 +1720,200 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
}
- return 0;
+ SmallVector<Value *, 4> Ops;
+ for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ }
+
+ // Get the last argument, which specifies the vector type.
+ llvm::APSInt Result;
+ const Expr *Arg = E->getArg(E->getNumArgs() - 1);
+ if (!Arg->isIntegerConstantExpr(Result, getContext()))
+ return 0;
+
+ // Determine the type of this overloaded NEON intrinsic.
+ NeonTypeFlags Type(Result.getZExtValue());
+ bool usgn = Type.isUnsigned();
+
+ llvm::VectorType *VTy = GetNeonType(this, Type);
+ llvm::Type *Ty = VTy;
+ if (!Ty)
+ return 0;
+
+ unsigned Int;
+ switch (BuiltinID) {
+ default:
+ return 0;
+
+ // AArch64 builtins mapping to legacy ARM v7 builtins.
+ // FIXME: the mapped builtins listed correspond to what has been tested
+ // in aarch64-neon-intrinsics.c so far.
+ case AArch64::BI__builtin_neon_vmul_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E);
+ case AArch64::BI__builtin_neon_vmulq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E);
+ case AArch64::BI__builtin_neon_vabd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E);
+ case AArch64::BI__builtin_neon_vabdq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E);
+ case AArch64::BI__builtin_neon_vfma_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E);
+ case AArch64::BI__builtin_neon_vfmaq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E);
+ case AArch64::BI__builtin_neon_vbsl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E);
+ case AArch64::BI__builtin_neon_vbslq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E);
+ case AArch64::BI__builtin_neon_vrsqrts_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E);
+ case AArch64::BI__builtin_neon_vrsqrtsq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E);
+ case AArch64::BI__builtin_neon_vrecps_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E);
+ case AArch64::BI__builtin_neon_vrecpsq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E);
+ case AArch64::BI__builtin_neon_vcage_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E);
+ case AArch64::BI__builtin_neon_vcale_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E);
+ case AArch64::BI__builtin_neon_vcaleq_v:
+ std::swap(Ops[0], Ops[1]);
+ case AArch64::BI__builtin_neon_vcageq_v: {
+ Function *F;
+ if (VTy->getElementType()->isIntegerTy(64))
+ F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq);
+ else
+ F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
+ return EmitNeonCall(F, Ops, "vcage");
+ }
+ case AArch64::BI__builtin_neon_vcalt_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E);
+ case AArch64::BI__builtin_neon_vcagt_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E);
+ case AArch64::BI__builtin_neon_vcaltq_v:
+ std::swap(Ops[0], Ops[1]);
+ case AArch64::BI__builtin_neon_vcagtq_v: {
+ Function *F;
+ if (VTy->getElementType()->isIntegerTy(64))
+ F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq);
+ else
+ F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
+ return EmitNeonCall(F, Ops, "vcagt");
+ }
+ case AArch64::BI__builtin_neon_vtst_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E);
+ case AArch64::BI__builtin_neon_vtstq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E);
+ case AArch64::BI__builtin_neon_vhadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E);
+ case AArch64::BI__builtin_neon_vhaddq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E);
+ case AArch64::BI__builtin_neon_vhsub_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E);
+ case AArch64::BI__builtin_neon_vhsubq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E);
+ case AArch64::BI__builtin_neon_vrhadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E);
+ case AArch64::BI__builtin_neon_vrhaddq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E);
+ case AArch64::BI__builtin_neon_vqadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E);
+ case AArch64::BI__builtin_neon_vqaddq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E);
+ case AArch64::BI__builtin_neon_vqsub_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E);
+ case AArch64::BI__builtin_neon_vqsubq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E);
+ case AArch64::BI__builtin_neon_vshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E);
+ case AArch64::BI__builtin_neon_vshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E);
+ case AArch64::BI__builtin_neon_vqshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E);
+ case AArch64::BI__builtin_neon_vqshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E);
+ case AArch64::BI__builtin_neon_vrshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E);
+ case AArch64::BI__builtin_neon_vrshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E);
+ case AArch64::BI__builtin_neon_vqrshl_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E);
+ case AArch64::BI__builtin_neon_vqrshlq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E);
+ case AArch64::BI__builtin_neon_vmax_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E);
+ case AArch64::BI__builtin_neon_vmaxq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E);
+ case AArch64::BI__builtin_neon_vmin_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E);
+ case AArch64::BI__builtin_neon_vminq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E);
+ case AArch64::BI__builtin_neon_vpmax_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E);
+ case AArch64::BI__builtin_neon_vpmin_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E);
+ case AArch64::BI__builtin_neon_vpadd_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E);
+ case AArch64::BI__builtin_neon_vqdmulh_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E);
+ case AArch64::BI__builtin_neon_vqdmulhq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E);
+ case AArch64::BI__builtin_neon_vqrdmulh_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E);
+ case AArch64::BI__builtin_neon_vqrdmulhq_v:
+ return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E);
+
+ // AArch64-only builtins
+ case AArch64::BI__builtin_neon_vfms_v:
+ case AArch64::BI__builtin_neon_vfmsq_v: {
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[1] = Builder.CreateFNeg(Ops[1]);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+
+ // LLVM's fma intrinsic puts the accumulator in the last position, but the
+ // AArch64 intrinsic has it first.
+ return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ }
+ case AArch64::BI__builtin_neon_vmaxnm_v:
+ case AArch64::BI__builtin_neon_vmaxnmq_v: {
+ Int = Intrinsic::aarch64_neon_vmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
+ }
+ case AArch64::BI__builtin_neon_vminnm_v:
+ case AArch64::BI__builtin_neon_vminnmq_v: {
+ Int = Intrinsic::aarch64_neon_vminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
+ }
+ case AArch64::BI__builtin_neon_vpmaxnm_v:
+ case AArch64::BI__builtin_neon_vpmaxnmq_v: {
+ Int = Intrinsic::aarch64_neon_vpmaxnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
+ }
+ case AArch64::BI__builtin_neon_vpminnm_v:
+ case AArch64::BI__builtin_neon_vpminnmq_v: {
+ Int = Intrinsic::aarch64_neon_vpminnm;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
+ }
+ case AArch64::BI__builtin_neon_vpmaxq_v: {
+ Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
+ }
+ case AArch64::BI__builtin_neon_vpminq_v: {
+ Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
+ }
+ case AArch64::BI__builtin_neon_vpaddq_v: {
+ Int = Intrinsic::arm_neon_vpadd;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd");
+ }
+ case AArch64::BI__builtin_neon_vmulx_v:
+ case AArch64::BI__builtin_neon_vmulxq_v: {
+ Int = Intrinsic::aarch64_neon_vmulx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
+ }
+ }
}
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index 50630b6989..bc003facba 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -1407,6 +1407,14 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
CmdArgs.push_back ("-machine-sink-split=0");
}
+void Clang::AddAArch64TargetArgs(const ArgList &Args,
+ ArgStringList &CmdArgs) const {
+ const Driver &D = getToolChain().getDriver();
+ // Honor -mfpu=.
+ if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ))
+ addFPUArgs(D, A, Args, CmdArgs);
+}
+
static bool
shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
const llvm::Triple &Triple) {
@@ -2498,9 +2506,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::hexagon:
AddHexagonTargetArgs(Args, CmdArgs);
break;
- }
-
+ case llvm::Triple::aarch64:
+ AddAArch64TargetArgs(Args, CmdArgs);
+ break;
+ }
// Pass the linker version in use.
if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h
index 1dd4d5edfe..9138ddf88b 100644
--- a/lib/Driver/Tools.h
+++ b/lib/Driver/Tools.h
@@ -65,6 +65,8 @@ using llvm::opt::ArgStringList;
llvm::opt::ArgStringList &CmdArgs) const;
void AddHexagonTargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
+ void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs) const;
enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile };
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index e417949013..9f2dc34575 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -306,6 +306,10 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
+ case llvm::Triple::aarch64:
+ if (CheckAArch64BuiltinFunctionCall(BuiltinID, TheCall))
+ return ExprError();
+ break;
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
@@ -342,6 +346,9 @@ static unsigned RFT(unsigned t, bool shift = false) {
case NeonTypeFlags::Float32:
assert(!shift && "cannot shift float types!");
return (2 << IsQuad) - 1;
+ case NeonTypeFlags::Float64:
+ assert(!shift && "cannot shift float types!");
+ return (1 << IsQuad) - 1;
}
llvm_unreachable("Invalid NeonTypeFlag!");
}
@@ -367,10 +374,90 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) {
return Context.UnsignedShortTy;
case NeonTypeFlags::Float32:
return Context.FloatTy;
+ case NeonTypeFlags::Float64:
+ return Context.DoubleTy;
}
llvm_unreachable("Invalid NeonTypeFlag!");
}
+bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
+ CallExpr *TheCall) {
+
+ llvm::APSInt Result;
+
+ uint64_t mask = 0;
+ unsigned TV = 0;
+ int PtrArgNum = -1;
+ bool HasConstPtr = false;
+ switch (BuiltinID) {
+#define GET_NEON_AARCH64_OVERLOAD_CHECK
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_AARCH64_OVERLOAD_CHECK
+ }
+
+ // For NEON intrinsics which are overloaded on vector element type, validate
+ // the immediate which specifies which variant to emit.
+ unsigned ImmArg = TheCall->getNumArgs() - 1;
+ if (mask) {
+ if (SemaBuiltinConstantArg(TheCall, ImmArg, Result))
+ return true;
+
+ TV = Result.getLimitedValue(64);
+ if ((TV > 63) || (mask & (1ULL << TV)) == 0)
+ return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code)
+ << TheCall->getArg(ImmArg)->getSourceRange();
+ }
+
+ if (PtrArgNum >= 0) {
+ // Check that pointer arguments have the specified type.
+ Expr *Arg = TheCall->getArg(PtrArgNum);
+ if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg))
+ Arg = ICE->getSubExpr();
+ ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg);
+ QualType RHSTy = RHS.get()->getType();
+ QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context);
+ if (HasConstPtr)
+ EltTy = EltTy.withConst();
+ QualType LHSTy = Context.getPointerType(EltTy);
+ AssignConvertType ConvTy;
+ ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS);
+ if (RHS.isInvalid())
+ return true;
+ if (DiagnoseAssignmentResult(ConvTy, Arg->getLocStart(), LHSTy, RHSTy,
+ RHS.get(), AA_Assigning))
+ return true;
+ }
+
+ // For NEON intrinsics which take an immediate value as part of the
+ // instruction, range check them here.
+ unsigned i = 0, l = 0, u = 0;
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_NEON_AARCH64_IMMEDIATE_CHECK
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_AARCH64_IMMEDIATE_CHECK
+ }
+ ;
+
+ // We can't check the value of a dependent argument.
+ if (TheCall->getArg(i)->isTypeDependent() ||
+ TheCall->getArg(i)->isValueDependent())
+ return false;
+
+ // Check that the immediate argument is actually a constant.
+ if (SemaBuiltinConstantArg(TheCall, i, Result))
+ return true;
+
+ // Range check against the upper/lower values for this isntruction.
+ unsigned Val = Result.getZExtValue();
+ if (Val < l || Val > (u + l))
+ return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
+ << l << u + l << TheCall->getArg(i)->getSourceRange();
+
+ return false;
+}
+
bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall) {
assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
BuiltinID == ARM::BI__builtin_arm_strex) &&
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 0106a679b9..bc6b6a52fa 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -4603,6 +4603,42 @@ static void HandleExtVectorTypeAttr(QualType &CurType,
CurType = T;
}
+static bool isPermittedNeonBaseType(QualType &Ty,
+ VectorType::VectorKind VecKind,
+ bool IsAArch64) {
+ const BuiltinType *BTy = Ty->getAs<BuiltinType>();
+ if (!BTy)
+ return false;
+
+ if (VecKind == VectorType::NeonPolyVector) {
+ if (IsAArch64) {
+ // AArch64 polynomial vectors are unsigned
+ return BTy->getKind() == BuiltinType::UChar ||
+ BTy->getKind() == BuiltinType::UShort;
+ } else {
+ // AArch32 polynomial vector are signed.
+ return BTy->getKind() == BuiltinType::SChar ||
+ BTy->getKind() == BuiltinType::Short;
+ }
+ }
+
+ // Non-polynomial vector types: the usual suspects are allowed, as well as
+ // float64_t on AArch64.
+ if (IsAArch64 && BTy->getKind() == BuiltinType::Double)
+ return true;
+
+ return BTy->getKind() == BuiltinType::SChar ||
+ BTy->getKind() == BuiltinType::UChar ||
+ BTy->getKind() == BuiltinType::Short ||
+ BTy->getKind() == BuiltinType::UShort ||
+ BTy->getKind() == BuiltinType::Int ||
+ BTy->getKind() == BuiltinType::UInt ||
+ BTy->getKind() == BuiltinType::LongLong ||
+ BTy->getKind() == BuiltinType::ULongLong ||
+ BTy->getKind() == BuiltinType::Float ||
+ BTy->getKind() == BuiltinType::Half;
+}
+
/// HandleNeonVectorTypeAttr - The "neon_vector_type" and
/// "neon_polyvector_type" attributes are used to create vector types that
/// are mangled according to ARM's ABI. Otherwise, these types are identical
@@ -4646,9 +4682,14 @@ static void HandleNeonVectorTypeAttr(QualType& CurType,
BTy->getKind() != BuiltinType::LongLong &&
BTy->getKind() != BuiltinType::ULongLong &&
BTy->getKind() != BuiltinType::Float)) {
- S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) <<CurType;
- Attr.setInvalid();
- return;
+ llvm::Triple::ArchType Arch =
+ S.Context.getTargetInfo().getTriple().getArch();
+ if (!isPermittedNeonBaseType(CurType, VecKind,
+ Arch == llvm::Triple::aarch64)) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType;
+ Attr.setInvalid();
+ return;
+ }
}
// The total size of the vector must be 64 or 128 bits.
unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType));
diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c
new file mode 100644
index 0000000000..7ed8a39a02
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-intrinsics.c
@@ -0,0 +1,3023 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vadd_s8
+ return vadd_s8(v1, v2);
+ // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vadd_s16
+ return vadd_s16(v1, v2);
+ // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vadd_s32
+ return vadd_s32(v1, v2);
+ // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
+ // CHECK: test_vadd_s64
+ return vadd_s64(v1, v2);
+ // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vadd_f32
+ return vadd_f32(v1, v2);
+ // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vadd_u8
+ return vadd_u8(v1, v2);
+ // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vadd_u16
+ return vadd_u16(v1, v2);
+ // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vadd_u32
+ return vadd_u32(v1, v2);
+ // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
+ // CHECK: test_vadd_u64
+ return vadd_u64(v1, v2);
+ // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vaddq_s8
+ return vaddq_s8(v1, v2);
+ // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vaddq_s16
+ return vaddq_s16(v1, v2);
+ // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) {
+ // CHECK: test_vaddq_s32
+ return vaddq_s32(v1, v2);
+ // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vaddq_s64
+ return vaddq_s64(v1, v2);
+ // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vaddq_f32
+ return vaddq_f32(v1, v2);
+ // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vaddq_f64
+ return vaddq_f64(v1, v2);
+ // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vaddq_u8
+ return vaddq_u8(v1, v2);
+ // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vaddq_u16
+ return vaddq_u16(v1, v2);
+ // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: vaddq_u32
+ return vaddq_u32(v1, v2);
+ // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vaddq_u64
+ return vaddq_u64(v1, v2);
+ // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vsub_s8
+ return vsub_s8(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vsub_s16
+ return vsub_s16(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vsub_s32
+ return vsub_s32(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
+ // CHECK: test_vsub_s64
+ return vsub_s64(v1, v2);
+ // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vsub_f32
+ return vsub_f32(v1, v2);
+ // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vsub_u8
+ return vsub_u8(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vsub_u16
+ return vsub_u16(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vsub_u32
+ return vsub_u32(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
+ // CHECK: test_vsub_u64
+ return vsub_u64(v1, v2);
+ // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vsubq_s8
+ return vsubq_s8(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vsubq_s16
+ return vsubq_s16(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) {
+ // CHECK: test_vsubq_s32
+ return vsubq_s32(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vsubq_s64
+ return vsubq_s64(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vsubq_f32
+ return vsubq_f32(v1, v2);
+ // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vsubq_f64
+ return vsubq_f64(v1, v2);
+ // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vsubq_u8
+ return vsubq_u8(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vsubq_u16
+ return vsubq_u16(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: vsubq_u32
+ return vsubq_u32(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vsubq_u64
+ return vsubq_u64(v1, v2);
+ // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vmul_s8
+ return vmul_s8(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vmul_s16
+ return vmul_s16(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vmul_s32
+ return vmul_s32(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vmul_f32
+ return vmul_f32(v1, v2);
+ // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+
+uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vmul_u8
+ return vmul_u8(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vmul_u16
+ return vmul_u16(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vmul_u32
+ return vmul_u32(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vmulq_s8
+ return vmulq_s8(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vmulq_s16
+ return vmulq_s16(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vmulq_s32
+ return vmulq_s32(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vmulq_u8
+ return vmulq_u8(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vmulq_u16
+ return vmulq_u16(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vmulq_u32
+ return vmulq_u32(v1, v2);
+ // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vmulq_f32
+ return vmulq_f32(v1, v2);
+ // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vmulq_f64
+ return vmulq_f64(v1, v2);
+ // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
+ // test_vmul_p8
+ return vmul_p8(v1, v2);
+ // pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
+ // test_vmulq_p8
+ return vmulq_p8(v1, v2);
+ // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+
+int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+ // CHECK: test_vmla_s8
+ return vmla_s8(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+ // CHECK: test_vmla_s16
+ return vmla_s16(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+ // CHECK: test_vmla_s32
+ return vmla_s32(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+ // CHECK: test_vmla_f32
+ return vmla_f32(v1, v2, v3);
+ // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+ // CHECK: test_vmla_u8
+ return vmla_u8(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+ // CHECK: test_vmla_u16
+ return vmla_u16(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+ // CHECK: test_vmla_u32
+ return vmla_u32(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+ // CHECK: test_vmlaq_s8
+ return vmlaq_s8(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+ // CHECK: test_vmlaq_s16
+ return vmlaq_s16(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+ // CHECK: test_vmlaq_s32
+ return vmlaq_s32(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+ // CHECK: test_vmlaq_f32
+ return vmlaq_f32(v1, v2, v3);
+ // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+ // CHECK: test_vmlaq_u8
+ return vmlaq_u8(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+ // CHECK: test_vmlaq_u16
+ return vmlaq_u16(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+ // CHECK: test_vmlaq_u32
+ return vmlaq_u32(v1, v2, v3);
+ // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+ // CHECK: test_vmlaq_f64
+ return vmlaq_f64(v1, v2, v3);
+ // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+ // CHECK: test_vmls_s8
+ return vmls_s8(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+ // CHECK: test_vmls_s16
+ return vmls_s16(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+ // CHECK: test_vmls_s32
+ return vmls_s32(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+ // CHECK: test_vmls_f32
+ return vmls_f32(v1, v2, v3);
+ // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+ // CHECK: test_vmls_u8
+ return vmls_u8(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+ // CHECK: test_vmls_u16
+ return vmls_u16(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+ // CHECK: test_vmls_u32
+ return vmls_u32(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+ // CHECK: test_vmlsq_s8
+ return vmlsq_s8(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+ // CHECK: test_vmlsq_s16
+ return vmlsq_s16(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+ // CHECK: test_vmlsq_s32
+ return vmlsq_s32(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+ // CHECK: test_vmlsq_f32
+ return vmlsq_f32(v1, v2, v3);
+ // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+ // CHECK: test_vmlsq_u8
+ return vmlsq_u8(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+ // CHECK: test_vmlsq_u16
+ return vmlsq_u16(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+ // CHECK: test_vmlsq_u32
+ return vmlsq_u32(v1, v2, v3);
+ // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+ // CHECK: test_vmlsq_f64
+ return vmlsq_f64(v1, v2, v3);
+ // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+ // CHECK: test_vfma_f32
+ return vfma_f32(v1, v2, v3);
+ // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+ // CHECK: test_vfmaq_f32
+ return vfmaq_f32(v1, v2, v3);
+ // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+ // CHECK: test_vfmaq_f64
+ return vfmaq_f64(v1, v2, v3);
+ // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+ // CHECK: test_vfms_f32
+ return vfms_f32(v1, v2, v3);
+ // CHECK: fmls v0.2s, v1.2s, v2.2s
+}
+
+float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+ // CHECK: test_vfmsq_f32
+ return vfmsq_f32(v1, v2, v3);
+ // CHECK: fmls v0.4s, v1.4s, v2.4s
+}
+
+float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+ // CHECK: vfmsq_f64
+ return vfmsq_f64(v1, v2, v3);
+ // CHECK: fmls v0.2d, v1.2d, v2.2d
+}
+
+float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vdivq_f64
+ return vdivq_f64(v1, v2);
+ // CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vdivq_f32
+ return vdivq_f32(v1, v2);
+ // CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vdiv_f32
+ return vdiv_f32(v1, v2);
+ // CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vaddd_u64(uint64x1_t v1, uint64x1_t v2) {
+ // CHECK: test_vaddd_u64
+ return vaddd_u64(v1, v2);
+ // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int64x1_t test_vaddd_s64(int64x1_t v1, int64x1_t v2) {
+ // CHECK: test_vaddd_s64
+ return vaddd_s64(v1, v2);
+ // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint64x1_t test_vsubd_u64(uint64x1_t v1, uint64x1_t v2) {
+ // CHECK: test_vsubd_u64
+ return vsubd_u64(v1, v2);
+ // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int64x1_t test_vsubd_s64(int64x1_t v1, int64x1_t v2) {
+ // CHECK: test_vsubd_s64
+ return vsubd_s64(v1, v2);
+ // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+ // CHECK: test_vaba_s8
+ return vaba_s8(v1, v2, v3);
+ // CHECK: saba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+ // CHECK: test_vaba_s16
+ return vaba_s16(v1, v2, v3);
+ // CHECK: saba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+ // CHECK: test_vaba_s32
+ return vaba_s32(v1, v2, v3);
+ // CHECK: saba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+ // CHECK: test_vaba_u8
+ return vaba_u8(v1, v2, v3);
+ // CHECK: uaba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+ // CHECK: test_vaba_u16
+ return vaba_u16(v1, v2, v3);
+ // CHECK: uaba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+ // CHECK: test_vaba_u32
+ return vaba_u32(v1, v2, v3);
+ // CHECK: uaba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+ // CHECK: test_vabaq_s8
+ return vabaq_s8(v1, v2, v3);
+ // CHECK: saba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+ // CHECK: test_vabaq_s16
+ return vabaq_s16(v1, v2, v3);
+ // CHECK: saba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+ // CHECK: test_vabaq_s32
+ return vabaq_s32(v1, v2, v3);
+ // CHECK: saba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+ // CHECK: test_vabaq_u8
+ return vabaq_u8(v1, v2, v3);
+ // CHECK: uaba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+ // CHECK: test_vabaq_u16
+ return vabaq_u16(v1, v2, v3);
+ // CHECK: uaba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+ // CHECK: test_vabaq_u32
+ return vabaq_u32(v1, v2, v3);
+ // CHECK: uaba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vabd_s8
+ return vabd_s8(v1, v2);
+ // CHECK: sabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vabd_s16
+ return vabd_s16(v1, v2);
+ // CHECK: sabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vabd_s32
+ return vabd_s32(v1, v2);
+ // CHECK: sabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vabd_u8
+ return vabd_u8(v1, v2);
+ // CHECK: uabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vabd_u16
+ return vabd_u16(v1, v2);
+ // CHECK: uabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vabd_u32
+ return vabd_u32(v1, v2);
+ // CHECK: uabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vabd_f32
+ return vabd_f32(v1, v2);
+ // CHECK: fabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vabdq_s8
+ return vabdq_s8(v1, v2);
+ // CHECK: sabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vabdq_s16
+ return vabdq_s16(v1, v2);
+ // CHECK: sabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vabdq_s32
+ return vabdq_s32(v1, v2);
+ // CHECK: sabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vabdq_u8
+ return vabdq_u8(v1, v2);
+ // CHECK: uabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vabdq_u16
+ return vabdq_u16(v1, v2);
+ // CHECK: uabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vabdq_u32
+ return vabdq_u32(v1, v2);
+ // CHECK: uabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vabdq_f32
+ return vabdq_f32(v1, v2);
+ // CHECK: fabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vabdq_f64
+ return vabdq_f64(v1, v2);
+ // CHECK: fabd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
+ // CHECK: test_vbsl_s8
+ return vbsl_s8(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
+ // CHECK: test_vbsl_s16
+ return vbsl_s16(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
+ // CHECK: test_vbsl_s32
+ return vbsl_s32(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+ // CHECK: test_vbsl_s64
+ return vbsl_s64(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+ // CHECK: test_vbsl_u8
+ return vbsl_u8(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+ // CHECK: test_vbsl_u16
+ return vbsl_u16(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+ // CHECK: test_vbsl_u32
+ return vbsl_u32(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+ // CHECK: test_vbsl_u64
+ return vbsl_u64(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+ // CHECK: test_vbsl_f32
+ return vbsl_f32(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
+ // CHECK: test_vbsl_p8
+ return vbsl_p8(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
+ // CHECK: test_vbsl_p16
+ return vbsl_p16(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
+ // CHECK: test_vbslq_s8
+ return vbslq_s8(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
+ // CHECK: test_vbslq_s16
+ return vbslq_s16(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+ // CHECK: test_vbslq_s32
+ return vbslq_s32(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
+ // CHECK: test_vbslq_s64
+ return vbslq_s64(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+ // CHECK: test_vbslq_u8
+ return vbslq_u8(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+ // CHECK: test_vbslq_u16
+ return vbslq_u16(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+ // CHECK: test_vbslq_u32
+ return vbslq_s32(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
+ // CHECK: test_vbslq_u64
+ return vbslq_u64(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
+ // CHECK: test_vbslq_f32
+ return vbslq_f32(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
+ // CHECK: test_vbslq_p8
+ return vbslq_p8(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
+ // CHECK: test_vbslq_p16
+ return vbslq_p16(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
+ // CHECK: test_vbslq_f64
+ return vbslq_f64(v1, v2, v3);
+ // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vrecps_f32
+ return vrecps_f32(v1, v2);
+ // CHECK: frecps {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vrecpsq_f32
+ return vrecpsq_f32(v1, v2);
+ // CHECK: frecps {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vrecpsq_f64
+ return vrecpsq_f64(v1, v2);
+ // CHECK: frecps {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vrsqrts_f32
+ return vrsqrts_f32(v1, v2);
+ // CHECK: frsqrts {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vrsqrtsq_f32
+ return vrsqrtsq_f32(v1, v2);
+ // CHECK: frsqrts {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vrsqrtsq_f64
+ return vrsqrtsq_f64(v1, v2);
+ // CHECK: frsqrts {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vcage_f32
+ return vcage_f32(v1, v2);
+ // CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcageq_f32
+ return vcageq_f32(v1, v2);
+ // CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcageq_f64
+ return vcageq_f64(v1, v2);
+ // CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vcagt_f32
+ return vcagt_f32(v1, v2);
+ // CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcagtq_f32
+ return vcagtq_f32(v1, v2);
+ // CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcagtq_f64
+ return vcagtq_f64(v1, v2);
+ // CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vcale_f32
+ return vcale_f32(v1, v2);
+ // Using registers other than v0, v1 are possible, but would be odd.
+ // CHECK: facge {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcaleq_f32
+ return vcaleq_f32(v1, v2);
+ // Using registers other than v0, v1 are possible, but would be odd.
+ // CHECK: facge {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcaleq_f64
+ return vcaleq_f64(v1, v2);
+ // Using registers other than v0, v1 are possible, but would be odd.
+ // CHECK: facge {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vcalt_f32
+ return vcalt_f32(v1, v2);
+ // Using registers other than v0, v1 are possible, but would be odd.
+ // CHECK: facgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcaltq_f32
+ return vcaltq_f32(v1, v2);
+ // Using registers other than v0, v1 are possible, but would be odd.
+ // CHECK: facgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcaltq_f64
+ return vcaltq_f64(v1, v2);
+ // Using registers other than v0, v1 are possible, but would be odd.
+ // CHECK: facgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vtst_s8
+ return vtst_s8(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vtst_s16
+ return vtst_s16(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vtst_s32
+ return vtst_s32(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vtst_u8
+ return vtst_u8(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vtst_u16
+ return vtst_u16(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vtst_u32
+ return vtst_u32(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vtstq_s8
+ return vtstq_s8(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vtstq_s16
+ return vtstq_s16(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vtstq_s32
+ return vtstq_s32(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vtstq_u8
+ return vtstq_u8(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vtstq_u16
+ return vtstq_u16(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vtstq_u32
+ return vtstq_u32(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vtstq_s64
+ return vtstq_s64(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vtstq_u64
+ return vtstq_u64(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
+ // CHECK: test_vtst_p8
+ return vtst_p8(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
+ // CHECK: test_vtstq_p8
+ return vtstq_p8(v1, v2);
+ // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+
+uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vceq_s8
+ return vceq_s8(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vceq_s16
+ return vceq_s16(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vceq_s32
+ return vceq_s32(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vceq_f32
+ return vceq_f32(v1, v2);
+ // CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vceq_u8
+ return vceq_u8(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vceq_u16
+ return vceq_u16(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vceq_u32
+ return vceq_u32(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
+ // CHECK: test_vceq_p8
+ return vceq_p8(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vceqq_s8
+ return vceqq_s8(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vceqq_s16
+ return vceqq_s16(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vceqq_s32
+ return vceqq_s32(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vceqq_f32
+ return vceqq_f32(v1, v2);
+ // CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vceqq_u8
+ return vceqq_u8(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vceqq_u16
+ return vceqq_u16(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vceqq_u32
+ return vceqq_u32(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
+ // CHECK: test_vceqq_p8
+ return vceqq_p8(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+
+uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vceqq_s64
+ return vceqq_s64(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vceqq_u64
+ return vceqq_u64(v1, v2);
+ // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vceqq_f64
+ return vceqq_f64(v1, v2);
+ // CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vcge_s8
+ return vcge_s8(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vcge_s16
+ return vcge_s16(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vcge_s32
+ return vcge_s32(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
+// CHECK: test_vcge_f32
+ return vcge_f32(v1, v2);
+// CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vcge_u8
+ return vcge_u8(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vcge_u16
+ return vcge_u16(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vcge_u32
+ return vcge_u32(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vcgeq_s8
+ return vcgeq_s8(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vcgeq_s16
+ return vcgeq_s16(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vcgeq_s32
+ return vcgeq_s32(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
+// CHECK: test_vcgeq_f32
+ return vcgeq_f32(v1, v2);
+// CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vcgeq_u8
+ return vcgeq_u8(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vcgeq_u16
+ return vcgeq_u16(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vcgeq_u32
+ return vcgeq_u32(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
+// CHECK: test_vcgeq_s64
+ return vcgeq_s64(v1, v2);
+// CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
+// CHECK: test_vcgeq_u64
+ return vcgeq_u64(v1, v2);
+// CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
+// CHECK: test_vcgeq_f64
+ return vcgeq_f64(v1, v2);
+// CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+// Notes about vcle:
+// LE condition predicate implemented as GE, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vcle_s8
+ return vcle_s8(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vcle_s16
+ return vcle_s16(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vcle_s32
+ return vcle_s32(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vcle_f32
+ return vcle_f32(v1, v2);
+ // CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vcle_u8
+ return vcle_u8(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vcle_u16
+ return vcle_u16(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vcle_u32
+ return vcle_u32(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vcleq_s8
+ return vcleq_s8(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vcleq_s16
+ return vcleq_s16(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vcleq_s32
+ return vcleq_s32(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcleq_f32
+ return vcleq_f32(v1, v2);
+ // CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vcleq_u8
+ return vcleq_u8(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vcleq_u16
+ return vcleq_u16(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vcleq_u32
+ return vcleq_u32(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vcleq_s64
+ return vcleq_s64(v1, v2);
+ // CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vcleq_u64
+ return vcleq_u64(v1, v2);
+ // CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcleq_f64
+ return vcleq_f64(v1, v2);
+ // CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+
+uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vcgt_s8
+ return vcgt_s8(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vcgt_s16
+ return vcgt_s16(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vcgt_s32
+ return vcgt_s32(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vcgt_f32
+ return vcgt_f32(v1, v2);
+ // CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vcgt_u8
+ return vcgt_u8(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vcgt_u16
+ return vcgt_u16(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vcgt_u32
+ return vcgt_u32(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vcgtq_s8
+ return vcgtq_s8(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vcgtq_s16
+ return vcgtq_s16(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vcgtq_s32
+ return vcgtq_s32(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcgtq_f32
+ return vcgtq_f32(v1, v2);
+ // CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vcgtq_u8
+ return vcgtq_u8(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vcgtq_u16
+ return vcgtq_u16(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vcgtq_u32
+ return vcgtq_u32(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vcgtq_s64
+ return vcgtq_s64(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vcgtq_u64
+ return vcgtq_u64(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcgtq_f64
+ return vcgtq_f64(v1, v2);
+ // CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+// Notes about vclt:
+// LT condition predicate implemented as GT, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+
+uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
+ // CHECK: test_vclt_s8
+ return vclt_s8(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
+ // CHECK: test_vclt_s16
+ return vclt_s16(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
+ // CHECK: test_vclt_s32
+ return vclt_s32(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
+ // CHECK: test_vclt_f32
+ return vclt_f32(v1, v2);
+ // CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
+ // CHECK: test_vclt_u8
+ return vclt_u8(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b
+}
+
+uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
+ // CHECK: test_vclt_u16
+ return vclt_u16(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h
+}
+
+uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
+ // CHECK: test_vclt_u32
+ return vclt_u32(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s
+}
+
+uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
+ // CHECK: test_vcltq_s8
+ return vcltq_s8(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
+ // CHECK: test_vcltq_s16
+ return vcltq_s16(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
+ // CHECK: test_vcltq_s32
+ return vcltq_s32(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
+ // CHECK: test_vcltq_f32
+ return vcltq_f32(v1, v2);
+ // CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
+ // CHECK: test_vcltq_u8
+ return vcltq_u8(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b
+}
+
+uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
+ // CHECK: test_vcltq_u16
+ return vcltq_u16(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h
+}
+
+uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
+ // CHECK: test_vcltq_u32
+ return vcltq_u32(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s
+}
+
+uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
+ // CHECK: test_vcltq_s64
+ return vcltq_s64(v1, v2);
+ // CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
+ // CHECK: test_vcltq_u64
+ return vcltq_u64(v1, v2);
+ // CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
+ // CHECK: test_vcltq_f64
+ return vcltq_f64(v1, v2);
+ // CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d
+}
+
+
+int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vhadd_s8
+ return vhadd_s8(v1, v2);
+ // CHECK: shadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vhadd_s16
+ return vhadd_s16(v1, v2);
+ // CHECK: shadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vhadd_s32
+ return vhadd_s32(v1, v2);
+ // CHECK: shadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vhadd_u8
+ return vhadd_u8(v1, v2);
+ // CHECK: uhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vhadd_u16
+ return vhadd_u16(v1, v2);
+ // CHECK: uhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vhadd_u32
+ return vhadd_u32(v1, v2);
+ // CHECK: uhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vhaddq_s8
+ return vhaddq_s8(v1, v2);
+ // CHECK: shadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vhaddq_s16
+ return vhaddq_s16(v1, v2);
+ // CHECK: shadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vhaddq_s32
+ return vhaddq_s32(v1, v2);
+ // CHECK: shadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vhaddq_u8
+ return vhaddq_u8(v1, v2);
+ // CHECK: uhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vhaddq_u16
+ return vhaddq_u16(v1, v2);
+ // CHECK: uhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vhaddq_u32
+ return vhaddq_u32(v1, v2);
+ // CHECK: uhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+
+int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vhsub_s8
+ return vhsub_s8(v1, v2);
+ // CHECK: shsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vhsub_s16
+ return vhsub_s16(v1, v2);
+ // CHECK: shsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vhsub_s32
+ return vhsub_s32(v1, v2);
+ // CHECK: shsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vhsub_u8
+ return vhsub_u8(v1, v2);
+ // CHECK: uhsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vhsub_u16
+ return vhsub_u16(v1, v2);
+ // CHECK: uhsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vhsub_u32
+ return vhsub_u32(v1, v2);
+ // CHECK: uhsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vhsubq_s8
+ return vhsubq_s8(v1, v2);
+ // CHECK: shsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vhsubq_s16
+ return vhsubq_s16(v1, v2);
+ // CHECK: shsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vhsubq_s32
+ return vhsubq_s32(v1, v2);
+ // CHECK: shsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vhsubq_u8
+ return vhsubq_u8(v1, v2);
+ // CHECK: uhsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vhsubq_u16
+ return vhsubq_u16(v1, v2);
+ // CHECK: uhsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vhsubq_u32
+ return vhsubq_u32(v1, v2);
+ // CHECK: uhsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+
+int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
+// CHECK: test_vrhadd_s8
+ return vrhadd_s8(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
+// CHECK: test_vrhadd_s16
+ return vrhadd_s16(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
+// CHECK: test_vrhadd_s32
+ return vrhadd_s32(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+// CHECK: test_vrhadd_u8
+ return vrhadd_u8(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+// CHECK: test_vrhadd_u16
+ return vrhadd_u16(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+// CHECK: test_vrhadd_u32
+ return vrhadd_u32(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
+// CHECK: test_vrhaddq_s8
+ return vrhaddq_s8(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
+// CHECK: test_vrhaddq_s16
+ return vrhaddq_s16(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
+// CHECK: test_vrhaddq_s32
+ return vrhaddq_s32(v1, v2);
+// CHECK: srhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+// CHECK: test_vrhaddq_u8
+ return vrhaddq_u8(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+// CHECK: test_vrhaddq_u16
+ return vrhaddq_u16(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+// CHECK: test_vrhaddq_u32
+ return vrhaddq_u32(v1, v2);
+// CHECK: urhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqadd_s8
+ return vqadd_s8(a, b);
+ // CHECK: sqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqadd_s16
+ return vqadd_s16(a, b);
+ // CHECK: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqadd_s32
+ return vqadd_s32(a, b);
+ // CHECK: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqadd_s64
+ return vqadd_s64(a, b);
+// CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vqadd_u8
+ return vqadd_u8(a, b);
+ // CHECK: uqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vqadd_u16
+ return vqadd_u16(a, b);
+ // CHECK: uqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vqadd_u32
+ return vqadd_u32(a, b);
+ // CHECK: uqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vqadd_u64
+ return vqadd_u64(a, b);
+// CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqaddq_s8
+ return vqaddq_s8(a, b);
+ // CHECK: sqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqaddq_s16
+ return vqaddq_s16(a, b);
+ // CHECK: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqaddq_s32
+ return vqaddq_s32(a, b);
+ // CHECK: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqaddq_s64
+ return vqaddq_s64(a, b);
+// CHECK: sqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vqaddq_u8
+ return vqaddq_u8(a, b);
+ // CHECK: uqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vqaddq_u16
+ return vqaddq_u16(a, b);
+ // CHECK: uqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vqaddq_u32
+ return vqaddq_u32(a, b);
+ // CHECK: uqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
+// CHECK: test_vqaddq_u64
+ return vqaddq_u64(a, b);
+// CHECK: uqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqsub_s8
+ return vqsub_s8(a, b);
+ // CHECK: sqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqsub_s16
+ return vqsub_s16(a, b);
+ // CHECK: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqsub_s32
+ return vqsub_s32(a, b);
+ // CHECK: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqsub_s64
+ return vqsub_s64(a, b);
+// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vqsub_u8
+ return vqsub_u8(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vqsub_u16
+ return vqsub_u16(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vqsub_u32
+ return vqsub_u32(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
+// CHECK: test_vqsub_u64
+ return vqsub_u64(a, b);
+// CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqsubq_s8
+ return vqsubq_s8(a, b);
+ // CHECK: sqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqsubq_s16
+ return vqsubq_s16(a, b);
+ // CHECK: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqsubq_s32
+ return vqsubq_s32(a, b);
+ // CHECK: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqsubq_s64
+ return vqsubq_s64(a, b);
+// CHECK: sqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vqsubq_u8
+ return vqsubq_u8(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vqsubq_u16
+ return vqsubq_u16(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vqsubq_u32
+ return vqsubq_u32(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
+// CHECK: test_vqsubq_u64
+ return vqsubq_u64(a, b);
+ // CHECK: uqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vshl_s8
+ return vshl_s8(a, b);
+// CHECK: sshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vshl_s16
+ return vshl_s16(a, b);
+// CHECK: sshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vshl_s32
+ return vshl_s32(a, b);
+// CHECK: sshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vshl_s64
+ return vshl_s64(a, b);
+// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vshl_u8
+ return vshl_u8(a, b);
+// CHECK: ushl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vshl_u16
+ return vshl_u16(a, b);
+// CHECK: ushl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vshl_u32
+ return vshl_u32(a, b);
+// CHECK: ushl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vshl_u64
+ return vshl_u64(a, b);
+// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vshlq_s8
+ return vshlq_s8(a, b);
+// CHECK: sshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vshlq_s16
+ return vshlq_s16(a, b);
+// CHECK: sshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vshlq_s32
+ return vshlq_s32(a, b);
+// CHECK: sshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vshlq_s64
+ return vshlq_s64(a, b);
+// CHECK: sshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
+// CHECK: test_vshlq_u8
+ return vshlq_u8(a, b);
+// CHECK: ushl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vshlq_u16
+ return vshlq_u16(a, b);
+// CHECK: ushl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vshlq_u32
+ return vshlq_u32(a, b);
+// CHECK: ushl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vshlq_u64
+ return vshlq_u64(a, b);
+// CHECK: ushl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqshl_s8
+ return vqshl_s8(a, b);
+// CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqshl_s16
+ return vqshl_s16(a, b);
+// CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqshl_s32
+ return vqshl_s32(a, b);
+// CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqshl_s64
+ return vqshl_s64(a, b);
+// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vqshl_u8
+ return vqshl_u8(a, b);
+// CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vqshl_u16
+ return vqshl_u16(a, b);
+// CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vqshl_u32
+ return vqshl_u32(a, b);
+// CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vqshl_u64
+ return vqshl_u64(a, b);
+// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqshlq_s8
+ return vqshlq_s8(a, b);
+// CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqshlq_s16
+ return vqshlq_s16(a, b);
+// CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqshlq_s32
+ return vqshlq_s32(a, b);
+// CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqshlq_s64
+ return vqshlq_s64(a, b);
+// CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
+// CHECK: test_vqshlq_u8
+ return vqshlq_u8(a, b);
+// CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vqshlq_u16
+ return vqshlq_u16(a, b);
+// CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vqshlq_u32
+ return vqshlq_u32(a, b);
+// CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vqshlq_u32
+ return vqshlq_u64(a, b);
+// CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vrshl_s8
+ return vrshl_s8(a, b);
+// CHECK: srshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vrshl_s16
+ return vrshl_s16(a, b);
+// CHECK: srshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vrshl_s32
+ return vrshl_s32(a, b);
+// CHECK: srshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vrshl_s64
+ return vrshl_s64(a, b);
+// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vrshl_u8
+ return vrshl_u8(a, b);
+// CHECK: urshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vrshl_u16
+ return vrshl_u16(a, b);
+// CHECK: urshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vrshl_u32
+ return vrshl_u32(a, b);
+// CHECK: urshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vrshl_u64
+ return vrshl_u64(a, b);
+// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vrshlq_s8
+ return vrshlq_s8(a, b);
+// CHECK: srshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vrshlq_s16
+ return vrshlq_s16(a, b);
+// CHECK: srshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vrshlq_s32
+ return vrshlq_s32(a, b);
+// CHECK: srshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vrshlq_s64
+ return vrshlq_s64(a, b);
+// CHECK: srshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
+// CHECK: test_vrshlq_u8
+ return vrshlq_u8(a, b);
+// CHECK: urshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vrshlq_u16
+ return vrshlq_u16(a, b);
+// CHECK: urshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vrshlq_u32
+ return vrshlq_u32(a, b);
+// CHECK: urshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vrshlq_u64
+ return vrshlq_u64(a, b);
+// CHECK: urshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vqrshl_s8
+ return vqrshl_s8(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqrshl_s16
+ return vqrshl_s16(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqrshl_s32
+ return vqrshl_s32(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
+// CHECK: test_vqrshl_s64
+ return vqrshl_s64(a, b);
+// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
+// CHECK: test_vqrshl_u8
+ return vqrshl_u8(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
+// CHECK: test_vqrshl_u16
+ return vqrshl_u16(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
+// CHECK: test_vqrshl_u32
+ return vqrshl_u32(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
+// CHECK: test_vqrshl_u64
+ return vqrshl_u64(a, b);
+// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+}
+
+int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vqrshlq_s8
+ return vqrshlq_s8(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqrshlq_s16
+ return vqrshlq_s16(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqrshlq_s32
+ return vqrshlq_s32(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
+// CHECK: test_vqrshlq_s64
+ return vqrshlq_s64(a, b);
+// CHECK: sqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+// CHECK: test_vqrshlq_u8
+uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
+ return vqrshlq_u8(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
+// CHECK: test_vqrshlq_u16
+ return vqrshlq_u16(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
+// CHECK: test_vqrshlq_u32
+ return vqrshlq_u32(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
+// CHECK: test_vqrshlq_u64
+ return vqrshlq_u64(a, b);
+// CHECK: uqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vmax_s8
+ return vmax_s8(a, b);
+// CHECK: smax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vmax_s16
+ return vmax_s16(a, b);
+// CHECK: smax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vmax_s32
+ return vmax_s32(a, b);
+// CHECK: smax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vmax_u8
+ return vmax_u8(a, b);
+// CHECK: umax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vmax_u16
+ return vmax_u16(a, b);
+// CHECK: umax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vmax_u32
+ return vmax_u32(a, b);
+// CHECK: umax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmax_f32
+ return vmax_f32(a, b);
+// CHECK: fmax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vmaxq_s8
+ return vmaxq_s8(a, b);
+// CHECK: smax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vmaxq_s16
+ return vmaxq_s16(a, b);
+// CHECK: smax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vmaxq_s32
+ return vmaxq_s32(a, b);
+// CHECK: smax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vmaxq_u8
+ return vmaxq_u8(a, b);
+// CHECK: umax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vmaxq_u16
+ return vmaxq_u16(a, b);
+// CHECK: umax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vmaxq_u32
+ return vmaxq_u32(a, b);
+// CHECK: umax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vmaxq_f32
+ return vmaxq_f32(a, b);
+// CHECK: fmax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vmaxq_f64
+ return vmaxq_f64(a, b);
+// CHECK: fmax {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+
+int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vmin_s8
+ return vmin_s8(a, b);
+// CHECK: smin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vmin_s16
+ return vmin_s16(a, b);
+// CHECK: smin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vmin_s32
+ return vmin_s32(a, b);
+// CHECK: smin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vmin_u8
+ return vmin_u8(a, b);
+// CHECK: umin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vmin_u16
+ return vmin_u16(a, b);
+// CHECK: umin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vmin_u32
+ return vmin_u32(a, b);
+// CHECK: umin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmin_f32
+ return vmin_f32(a, b);
+// CHECK: fmin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vminq_s8
+ return vminq_s8(a, b);
+// CHECK: smin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vminq_s16
+ return vminq_s16(a, b);
+// CHECK: smin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vminq_s32
+ return vminq_s32(a, b);
+// CHECK: smin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vminq_u8
+ return vminq_u8(a, b);
+// CHECK: umin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vminq_u16
+ return vminq_u16(a, b);
+// CHECK: umin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vminq_u32
+ return vminq_u32(a, b);
+// CHECK: umin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vminq_f32
+ return vminq_f32(a, b);
+// CHECK: fmin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vminq_f64
+ return vminq_f64(a, b);
+// CHECK: fmin {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmaxnm_f32
+ return vmaxnm_f32(a, b);
+// CHECK: fmaxnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vmaxnmq_f32
+ return vmaxnmq_f32(a, b);
+// CHECK: fmaxnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vmaxnmq_f64
+ return vmaxnmq_f64(a, b);
+// CHECK: fmaxnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vminnm_f32
+ return vminnm_f32(a, b);
+// CHECK: fminnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vminnmq_f32
+ return vminnmq_f32(a, b);
+// CHECK: fminnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vminnmq_f64
+ return vminnmq_f64(a, b);
+// CHECK: fminnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vpmax_s8
+ return vpmax_s8(a, b);
+// CHECK: smaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vpmax_s16
+ return vpmax_s16(a, b);
+// CHECK: smaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vpmax_s32
+ return vpmax_s32(a, b);
+// CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vpmax_u8
+ return vpmax_u8(a, b);
+// CHECK: umaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vpmax_u16
+ return vpmax_u16(a, b);
+// CHECK: umaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vpmax_u32
+ return vpmax_u32(a, b);
+// CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpmax_f32
+ return vpmax_f32(a, b);
+// CHECK: fmaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vpmaxq_s8
+ return vpmaxq_s8(a, b);
+// CHECK: smaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vpmaxq_s16
+ return vpmaxq_s16(a, b);
+// CHECK: smaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vpmaxq_s32
+ return vpmaxq_s32(a, b);
+// CHECK: smaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vpmaxq_u8
+ return vpmaxq_u8(a, b);
+// CHECK: umaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vpmaxq_u16
+ return vpmaxq_u16(a, b);
+// CHECK: umaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vpmaxq_u32
+ return vpmaxq_u32(a, b);
+// CHECK: umaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpmaxq_f32
+ return vpmaxq_f32(a, b);
+// CHECK: fmaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpmaxq_f64
+ return vpmaxq_f64(a, b);
+// CHECK: fmaxp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vpmin_s8
+ return vpmin_s8(a, b);
+// CHECK: sminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vpmin_s16
+ return vpmin_s16(a, b);
+// CHECK: sminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vpmin_s32
+ return vpmin_s32(a, b);
+// CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vpmin_u8
+ return vpmin_u8(a, b);
+// CHECK: uminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vpmin_u16
+ return vpmin_u16(a, b);
+// CHECK: uminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vpmin_u32
+ return vpmin_u32(a, b);
+// CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpmin_f32
+ return vpmin_f32(a, b);
+// CHECK: fminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vpminq_s8
+ return vpminq_s8(a, b);
+// CHECK: sminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vpminq_s16
+ return vpminq_s16(a, b);
+// CHECK: sminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vpminq_s32
+ return vpminq_s32(a, b);
+// CHECK: sminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vpminq_u8
+ return vpminq_u8(a, b);
+// CHECK: uminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vpminq_u16
+ return vpminq_u16(a, b);
+// CHECK: uminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vpminq_u32
+ return vpminq_u32(a, b);
+// CHECK: uminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpminq_f32
+ return vpminq_f32(a, b);
+// CHECK: fminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpminq_f64
+ return vpminq_f64(a, b);
+// CHECK: fminp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpmaxnm_f32
+ return vpmaxnm_f32(a, b);
+// CHECK: fmaxnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpmaxnmq_f32
+ return vpmaxnmq_f32(a, b);
+// CHECK: fmaxnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpmaxnmq_f64
+ return vpmaxnmq_f64(a, b);
+// CHECK: fmaxnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpminnm_f32
+ return vpminnm_f32(a, b);
+// CHECK: fminnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpminnmq_f32
+ return vpminnmq_f32(a, b);
+// CHECK: fminnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpminnmq_f64
+ return vpminnmq_f64(a, b);
+// CHECK: fminnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
+// CHECK: test_vpadd_s8
+ return vpadd_s8(a, b);
+// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vpadd_s16
+ return vpadd_s16(a, b);
+// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vpadd_s32
+ return vpadd_s32(a, b);
+// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
+// CHECK: test_vpadd_u8
+ return vpadd_u8(a, b);
+// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+}
+
+uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
+// CHECK: test_vpadd_u16
+ return vpadd_u16(a, b);
+// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
+// CHECK: test_vpadd_u32
+ return vpadd_u32(a, b);
+// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vpadd_f32
+ return vpadd_f32(a, b);
+// CHECK: faddp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
+// CHECK: test_vpaddq_s8
+ return vpaddq_s8(a, b);
+// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vpaddq_s16
+ return vpaddq_s16(a, b);
+// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vpaddq_s32
+ return vpaddq_s32(a, b);
+// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
+// CHECK: test_vpaddq_u8
+ return vpaddq_u8(a, b);
+// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+}
+
+uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
+// CHECK: test_vpaddq_u16
+ return vpaddq_u16(a, b);
+// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
+// CHECK: test_vpaddq_u32
+ return vpaddq_u32(a, b);
+// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vpaddq_f32
+ return vpaddq_f32(a, b);
+// CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vpaddq_f64
+ return vpaddq_f64(a, b);
+// CHECK: faddp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
+int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqdmulh_s16
+ return vqdmulh_s16(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqdmulh_s32
+ return vqdmulh_s32(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqdmulhq_s16
+ return vqdmulhq_s16(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqdmulhq_s32
+ return vqdmulhq_s32(a, b);
+// CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
+// CHECK: test_vqrdmulh_s16
+ return vqrdmulh_s16(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+}
+
+int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
+// CHECK: test_vqrdmulh_s32
+ return vqrdmulh_s32(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
+// CHECK: test_vqrdmulhq_s16
+ return vqrdmulhq_s16(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+}
+
+int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
+// CHECK: test_vqrdmulhq_s32
+ return vqrdmulhq_s32(a, b);
+// CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+
+float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
+// CHECK: test_vmulx_f32
+ return vmulx_f32(a, b);
+// CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+}
+
+float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
+// CHECK: test_vmulxq_f32
+ return vmulxq_f32(a, b);
+// CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+}
+
+float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
+// CHECK: test_vmulxq_f64
+ return vmulxq_f64(a, b);
+// CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
+}
+
diff --git a/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp b/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp
new file mode 100644
index 0000000000..86509a0739
--- /dev/null
+++ b/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp
@@ -0,0 +1,85 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - | FileCheck %s
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed long long int64_t;
+typedef unsigned long long uint64_t;
+typedef unsigned char poly8_t;
+typedef unsigned short poly16_t;
+typedef __fp16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
+typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
+typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
+typedef __attribute__((neon_vector_type(2))) int int32x2_t;
+typedef __attribute__((neon_vector_type(4))) int int32x4_t;
+typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
+typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
+typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
+typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
+typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(2))) unsigned int uint32x2_t;
+typedef __attribute__((neon_vector_type(4))) unsigned int uint32x4_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
+typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+
+// CHECK: 10__Int8x8_t
+void f1(int8x8_t) {}
+// CHECK: 11__Int16x4_t
+void f2(int16x4_t) {}
+// CHECK: 11__Int32x2_t
+void f3(int32x2_t) {}
+// CHECK: 11__Uint8x8_t
+void f4(uint8x8_t) {}
+// CHECK: 12__Uint16x4_t
+void f5(uint16x4_t) {}
+// CHECK: 13__Float16x4_t
+void f6(float16x4_t) {}
+// CHECK: 13__Float16x8_t
+void f7(float16x8_t) {}
+// CHECK: 12__Uint32x2_t
+void f8(uint32x2_t) {}
+// CHECK: 13__Float32x2_t
+void f9(float32x2_t) {}
+// CHECK: 13__Float32x4_t
+void f10(float32x4_t) {}
+// CHECK: 11__Poly8x8_t
+void f11(poly8x8_t v) {}
+// CHECK: 12__Poly16x4_t
+void f12(poly16x4_t v) {}
+// CHECK:12__Poly8x16_t
+void f13(poly8x16_t v) {}
+// CHECK:12__Poly16x8_t
+void f14(poly16x8_t v) {}
+// CHECK: 11__Int8x16_t
+void f15(int8x16_t) {}
+// CHECK: 11__Int16x8_t
+void f16(int16x8_t) {}
+// CHECK:11__Int32x4_t
+void f17(int32x4_t) {}
+// CHECK: 12__Uint8x16_t
+void f18(uint8x16_t) {}
+// CHECK: 12__Uint16x8_t
+void f19(uint16x8_t) {}
+// CHECK: 12__Uint32x4_t
+void f20(uint32x4_t) {}
+// CHECK: 11__Int64x2_t
+void f21(int64x2_t) {}
+// CHECK: 12__Uint64x2_t
+void f22(uint64x2_t) {}
+// CHECK: 13__Float64x2_t
+void f23(float64x2_t) {}
diff --git a/test/CodeGenCXX/mangle-neon-vectors.cpp b/test/CodeGenCXX/mangle-neon-vectors.cpp
index 3723deb192..793c89803f 100644
--- a/test/CodeGenCXX/mangle-neon-vectors.cpp
+++ b/test/CodeGenCXX/mangle-neon-vectors.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple arm-none-linux-gnueabi %s -emit-llvm -o - | FileCheck %s
typedef float float32_t;
+typedef __fp16 float16_t;
typedef signed char poly8_t;
typedef short poly16_t;
typedef unsigned long long uint64_t;
@@ -11,8 +12,10 @@ typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
-typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
-typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
// CHECK: 16__simd64_int32_t
void f1(int32x2_t v) { }
@@ -26,7 +29,11 @@ void f4(uint64x2_t v) { }
void f5(float32x2_t v) { }
// CHECK: 19__simd128_float32_t
void f6(float32x4_t v) { }
+// CHECK: 18__simd64_float16_t
+void f7(float16x4_t v) {}
+// CHECK: 19__simd128_float16_t
+void f8(float16x8_t v) {}
// CHECK: 17__simd128_poly8_t
-void f7(poly8x16_t v) { }
+void f9(poly8x16_t v) {}
// CHECK: 18__simd128_poly16_t
-void f8(poly16x8_t v) { }
+void f10(poly16x8_t v) {}
diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c
index 8bb8427c0d..25bdb71bc3 100644
--- a/test/Preprocessor/aarch64-target-features.c
+++ b/test/Preprocessor/aarch64-target-features.c
@@ -30,3 +30,6 @@
// RUN: %clang -target aarch64-none-linux-gnu -fshort-enums -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTENUMS %s
// CHECK-SHORTENUMS: __ARM_SIZEOF_MINIMAL_ENUM 1
+// RUN: %clang -target aarch64-none-linux-gnu -mfpu=neon -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NEON %s
+// CHECK-NEON: __AARCH_ADVSIMD_FP
+// CHECK-NEON: __AARCH_FEATURE_ADVSIMD
diff --git a/test/Sema/aarch64-neon-vector-types.c b/test/Sema/aarch64-neon-vector-types.c
new file mode 100644
index 0000000000..f4d58ffd09
--- /dev/null
+++ b/test/Sema/aarch64-neon-vector-types.c
@@ -0,0 +1,34 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 %s -triple aarch64-none-linux-gnu -fsyntax-only -verify
+
+typedef float float32_t;
+typedef unsigned char poly8_t;
+typedef unsigned short poly16_t;
+typedef unsigned long long uint64_t;
+
+// Define some valid Neon types.
+typedef __attribute__((neon_vector_type(2))) int int32x2_t;
+typedef __attribute__((neon_vector_type(4))) int int32x4_t;
+typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
+typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
+
+// The attributes must have a single argument.
+typedef __attribute__((neon_vector_type(2, 4))) int only_one_arg; // expected-error{{attribute takes one argument}}
+
+// The number of elements must be an ICE.
+typedef __attribute__((neon_vector_type(2.0))) int non_int_width; // expected-error{{attribute requires an integer constant}}
+
+// Only certain element types are allowed.
+typedef __attribute__((neon_vector_type(2))) double double_elt;
+typedef __attribute__((neon_vector_type(4))) void* ptr_elt; // expected-error{{invalid vector element type}}
+typedef __attribute__((neon_polyvector_type(4))) float32_t bad_poly_elt; // expected-error{{invalid vector element type}}
+struct aggr { signed char c; };
+typedef __attribute__((neon_vector_type(8))) struct aggr aggregate_elt; // expected-error{{invalid vector element type}}
+
+// The total vector size must be 64 or 128 bits.
+typedef __attribute__((neon_vector_type(1))) int int32x1_t; // expected-error{{Neon vector size must be 64 or 128 bits}}
+typedef __attribute__((neon_vector_type(3))) int int32x3_t; // expected-error{{Neon vector size must be 64 or 128 bits}}
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index bb505de95d..411aa7e4ab 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -90,7 +90,8 @@ enum OpKind {
OpReinterpret,
OpAbdl,
OpAba,
- OpAbal
+ OpAbal,
+ OpDiv
};
enum ClassKind {
@@ -127,7 +128,8 @@ public:
Poly8,
Poly16,
Float16,
- Float32
+ Float32,
+ Float64
};
NeonTypeFlags(unsigned F) : Flags(F) {}
@@ -205,6 +207,7 @@ public:
OpMap["OP_ABDL"] = OpAbdl;
OpMap["OP_ABA"] = OpAba;
OpMap["OP_ABAL"] = OpAbal;
+ OpMap["OP_DIV"] = OpDiv;
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
@@ -235,7 +238,18 @@ public:
void runTests(raw_ostream &o);
private:
- void emitIntrinsic(raw_ostream &OS, Record *R);
+ void emitIntrinsic(raw_ostream &OS, Record *R,
+ StringMap<ClassKind> &EmittedMap);
+ void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64GenBuiltinDef);
+ void genOverloadTypeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64TypeCheck);
+ void genIntrinsicRangeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64RangeCheck);
+ void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
+ bool isA64TestGen);
};
} // end anonymous namespace
@@ -259,6 +273,7 @@ static void ParseTypes(Record *r, std::string &s,
case 'l':
case 'h':
case 'f':
+ case 'd':
break;
default:
PrintFatalError(r->getLoc(),
@@ -347,6 +362,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
poly = false;
if (type == 'f')
type = 'i';
+ if (type == 'd')
+ type = 'l';
break;
case 'x':
usgn = false;
@@ -470,6 +487,13 @@ static std::string TypeString(const char mod, StringRef typestr) {
break;
s += quad ? "x4" : "x2";
break;
+ case 'd':
+ s += "float64";
+ if (scal)
+ break;
+ s += quad ? "x2" : "x1";
+ break;
+
default:
PrintFatalError("unhandled type!");
}
@@ -647,6 +671,18 @@ static void InstructionTypeCode(const StringRef &typeStr,
default: break;
}
break;
+ case 'd':
+ switch (ck) {
+ case ClassS:
+ case ClassI:
+ typeCode += "f64";
+ break;
+ case ClassW:
+ PrintFatalError("unhandled type!");
+ default:
+ break;
+ }
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1252,6 +1288,9 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) {
case 'l': nElts = 1; break;
case 'h': nElts = 4; break;
case 'f': nElts = 2; break;
+ case 'd':
+ nElts = 1;
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1488,6 +1527,9 @@ static std::string GenOpString(OpKind op, const std::string &proto,
}
break;
}
+ case OpDiv:
+ s += "__a / __b;";
+ break;
default:
PrintFatalError("unknown OpKind!");
}
@@ -1533,6 +1575,9 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
case 'f':
ET = NeonTypeFlags::Float32;
break;
+ case 'd':
+ ET = NeonTypeFlags::Float64;
+ break;
default:
PrintFatalError("unhandled type!");
}
@@ -1776,7 +1821,7 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << "#ifndef __ARM_NEON_H\n";
OS << "#define __ARM_NEON_H\n\n";
- OS << "#ifndef __ARM_NEON__\n";
+ OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n";
OS << "#error \"NEON support not enabled\"\n";
OS << "#endif\n\n";
@@ -1784,19 +1829,39 @@ void NeonEmitter::run(raw_ostream &OS) {
// Emit NEON-specific scalar typedefs.
OS << "typedef float float32_t;\n";
+ OS << "typedef __fp16 float16_t;\n";
+
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef double float64_t;\n";
+ OS << "#endif\n\n";
+
+ // For now, signedness of polynomial types depends on target
+ OS << "#ifdef __aarch64__\n";
+ OS << "typedef uint8_t poly8_t;\n";
+ OS << "typedef uint16_t poly16_t;\n";
+ OS << "#else\n";
OS << "typedef int8_t poly8_t;\n";
OS << "typedef int16_t poly16_t;\n";
- OS << "typedef uint16_t float16_t;\n";
+ OS << "#endif\n";
// Emit Neon vector typedefs.
- std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
+ std::string TypedefTypes(
+ "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs");
SmallVector<StringRef, 24> TDTypeVec;
ParseTypes(0, TypedefTypes, TDTypeVec);
// Emit vector typedefs.
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
bool dummy, quad = false, poly = false;
- (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ bool isA64 = false;
+
+ if (type == 'd' && quad)
+ isA64 = true;
+
+ if (isA64)
+ OS << "#ifdef __aarch64__\n";
+
if (poly)
OS << "typedef __attribute__((neon_polyvector_type(";
else
@@ -1809,19 +1874,37 @@ void NeonEmitter::run(raw_ostream &OS) {
OS << TypeString('s', TDTypeVec[i]);
OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
+
+ if (isA64)
+ OS << "#endif\n";
}
OS << "\n";
// Emit struct typedefs.
for (unsigned vi = 2; vi != 5; ++vi) {
for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
+ bool dummy, quad = false, poly = false;
+ char type = ClassifyType(TDTypeVec[i], quad, poly, dummy);
+ bool isA64 = false;
+
+ if (type == 'd' && quad)
+ isA64 = true;
+
+ if (isA64)
+ OS << "#ifdef __aarch64__\n";
+
std::string ts = TypeString('d', TDTypeVec[i]);
std::string vs = TypeString('0' + vi, TDTypeVec[i]);
OS << "typedef struct " << vs << " {\n";
OS << " " << ts << " val";
OS << "[" << utostr(vi) << "]";
OS << ";\n} ";
- OS << vs << ";\n\n";
+ OS << vs << ";\n";
+
+ if (isA64)
+ OS << "#endif\n";
+
+ OS << "\n";
}
}
@@ -1829,30 +1912,58 @@ void NeonEmitter::run(raw_ostream &OS) {
std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<ClassKind> EmittedMap;
+
// Emit vmovl, vmull and vabd intrinsics first so they can be used by other
// intrinsics. (Some of the saturating multiply instructions are also
// used to implement the corresponding "_lane" variants, but tablegen
// sorts the records into alphabetical order so that the "_lane" variants
// come after the intrinsics they use.)
- emitIntrinsic(OS, Records.getDef("VMOVL"));
- emitIntrinsic(OS, Records.getDef("VMULL"));
- emitIntrinsic(OS, Records.getDef("VABD"));
-
+ emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap);
+ emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap);
+
+ // ARM intrinsics must be emitted before AArch64 intrinsics to ensure
+ // common intrinsics appear only once in the output stream.
+ // The check for uniquiness is done in emitIntrinsic.
+ // Emit ARM intrinsics.
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
- if (R->getName() != "VMOVL" &&
- R->getName() != "VMULL" &&
+
+ // Skip AArch64 intrinsics; they will be emitted at the end.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (isA64)
+ continue;
+
+ if (R->getName() != "VMOVL" && R->getName() != "VMULL" &&
R->getName() != "VABD")
- emitIntrinsic(OS, R);
+ emitIntrinsic(OS, R, EmittedMap);
}
+ // Emit AArch64-specific intrinsics.
+ OS << "#ifdef __aarch64__\n";
+
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ // Skip ARM intrinsics already included above.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64)
+ continue;
+
+ emitIntrinsic(OS, R, EmittedMap);
+ }
+
+ OS << "#endif\n\n";
+
OS << "#undef __ai\n\n";
OS << "#endif /* __ARM_NEON_H */\n";
}
/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
-/// intrinsics specified by record R.
-void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
+/// intrinsics specified by record R checking for intrinsic uniqueness.
+void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R,
+ StringMap<ClassKind> &EmittedMap) {
std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
std::string Types = R->getValueAsString("Types");
@@ -1879,12 +1990,20 @@ void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
if (srcti == ti || inQuad != outQuad)
continue;
- OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
- OpCast, ClassS);
+ std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
+ OpCast, ClassS);
+ if (EmittedMap.count(s))
+ continue;
+ EmittedMap[s] = ClassS;
+ OS << s;
}
} else {
- OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
- kind, classKind);
+ std::string s =
+ GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind);
+ if (EmittedMap.count(s))
+ continue;
+ EmittedMap[s] = classKind;
+ OS << s;
}
}
OS << "\n";
@@ -1912,56 +2031,151 @@ static unsigned RangeFromType(const char mod, StringRef typestr) {
}
}
-/// runHeader - Emit a file with sections defining:
-/// 1. the NEON section of BuiltinsARM.def.
-/// 2. the SemaChecking code for the type overload checking.
-/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
-void NeonEmitter::runHeader(raw_ostream &OS) {
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
-
+/// Generate the ARM and AArch64 intrinsic range checking code for
+/// shift/lane immediates, checking for unique declarations.
+void
+NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64RangeCheck) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
StringMap<OpKind> EmittedMap;
- // Generate BuiltinsARM.def for NEON
- OS << "#ifdef GET_NEON_BUILTINS\n";
+ // Generate the intrinsic range checking code for shift/lane immediates.
+ if (isA64RangeCheck)
+ OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n";
+ else
+ OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
+
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
if (k != OpNone)
continue;
+ std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
+ std::string Types = R->getValueAsString("Types");
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
continue;
- std::string Types = R->getValueAsString("Types");
+ // Functions which do not have an immediate do not need to have range
+ // checking code emitted.
+ size_t immPos = Proto.find('i');
+ if (immPos == std::string::npos)
+ continue;
+
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
if (R->getSuperClasses().size() < 2)
PrintFatalError(R->getLoc(), "Builtin has no class kind");
- std::string name = R->getValueAsString("Name");
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ // Do not include AArch64 range checks if not generating code for AArch64.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64RangeCheck && isA64)
+ continue;
+
+ // Include ARM range checks in AArch64 but only if ARM intrinsics are not
+ // redefined by AArch64 to handle new types.
+ if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) {
+ ClassKind &A64CK = A64IntrinsicMap[name];
+ if (A64CK == ck && ck != ClassNone)
+ continue;
+ }
+
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- // Generate the BuiltinsARM.def declaration for this builtin, ensuring
- // that each unique BUILTIN() macro appears only once in the output
- // stream.
- std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
- if (EmittedMap.count(bd))
+ std::string namestr, shiftstr, rangestr;
+
+ if (R->getValueAsBit("isVCVT_N")) {
+ // VCVT between floating- and fixed-point values takes an immediate
+ // in the range 1 to 32.
+ ck = ClassB;
+ rangestr = "l = 1; u = 31"; // upper bound = l + u
+ } else if (Proto.find('s') == std::string::npos) {
+ // Builtins which are overloaded by type will need to have their upper
+ // bound computed at Sema time based on the type constant.
+ ck = ClassB;
+ if (R->getValueAsBit("isShift")) {
+ shiftstr = ", true";
+
+ // Right shifts have an 'r' in the name, left shifts do not.
+ if (name.find('r') != std::string::npos)
+ rangestr = "l = 1; ";
+ }
+ rangestr += "u = RFT(TV" + shiftstr + ")";
+ } else {
+ // The immediate generally refers to a lane in the preceding argument.
+ assert(immPos > 0 && "unexpected immediate operand");
+ rangestr =
+ "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti]));
+ }
+ // Make sure cases appear only once by uniquing them in a string map.
+ namestr = MangleName(name, TypeVec[ti], ck);
+ if (EmittedMap.count(namestr))
continue;
+ EmittedMap[namestr] = OpNone;
- EmittedMap[bd] = OpNone;
- OS << bd << "\n";
+ // Calculate the index of the immediate that should be range checked.
+ unsigned immidx = 0;
+
+ // Builtins that return a struct of multiple vectors have an extra
+ // leading arg for the struct return.
+ if (Proto[0] >= '2' && Proto[0] <= '4')
+ ++immidx;
+
+ // Add one to the index for each argument until we reach the immediate
+ // to be checked. Structs of vectors are passed as multiple arguments.
+ for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
+ switch (Proto[ii]) {
+ default:
+ immidx += 1;
+ break;
+ case '2':
+ immidx += 2;
+ break;
+ case '3':
+ immidx += 3;
+ break;
+ case '4':
+ immidx += 4;
+ break;
+ case 'i':
+ ie = ii + 1;
+ break;
+ }
+ }
+ if (isA64RangeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; "
+ << rangestr << "; break;\n";
}
}
OS << "#endif\n\n";
+}
+
+/// Generate the ARM and AArch64 overloaded type checking code for
+/// SemaChecking.cpp, checking for unique builtin declarations.
+void
+NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64TypeCheck) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<OpKind> EmittedMap;
// Generate the overloaded type checking code for SemaChecking.cpp
- OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+ if (isA64TypeCheck)
+ OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n";
+ else
+ OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
@@ -1988,6 +2202,21 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
if (R->getSuperClasses().size() < 2)
PrintFatalError(R->getLoc(), "Builtin has no class kind");
+ // Do not include AArch64 type checks if not generating code for AArch64.
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64TypeCheck && isA64)
+ continue;
+
+ // Include ARM type check in AArch64 but only if ARM intrinsics
+ // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
+ // redefined in AArch64 to handle an additional 2 x f64 type.
+ ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+ if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) {
+ ClassKind &A64CK = A64IntrinsicMap[name];
+ if (A64CK == ck && ck != ClassNone)
+ continue;
+ }
+
int si = -1, qi = -1;
uint64_t mask = 0, qmask = 0;
for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
@@ -2035,9 +2264,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
if (mask) {
- OS << "case ARM::BI__builtin_neon_"
- << MangleName(name, TypeVec[si], ClassB)
- << ": mask = " << "0x" << utohexstr(mask) << "ULL";
+ if (isA64TypeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[si], ClassB) << ": mask = "
+ << "0x" << utohexstr(mask) << "ULL";
if (PtrArgNum >= 0)
OS << "; PtrArgNum = " << PtrArgNum;
if (HasConstPtr)
@@ -2045,9 +2277,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
OS << "; break;\n";
}
if (qmask) {
- OS << "case ARM::BI__builtin_neon_"
- << MangleName(name, TypeVec[qi], ClassB)
- << ": mask = " << "0x" << utohexstr(qmask) << "ULL";
+ if (isA64TypeCheck)
+ OS << "case AArch64::BI__builtin_neon_";
+ else
+ OS << "case ARM::BI__builtin_neon_";
+ OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = "
+ << "0x" << utohexstr(qmask) << "ULL";
if (PtrArgNum >= 0)
OS << "; PtrArgNum = " << PtrArgNum;
if (HasConstPtr)
@@ -2056,31 +2291,37 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
}
}
OS << "#endif\n\n";
+}
+
+/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
+/// declaration of builtins, checking for unique builtin declarations.
+void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
+ StringMap<ClassKind> &A64IntrinsicMap,
+ bool isA64GenBuiltinDef) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ StringMap<OpKind> EmittedMap;
+
+ // Generate BuiltinsARM.def and BuiltinsAArch64.def
+ if (isA64GenBuiltinDef)
+ OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n";
+ else
+ OS << "#ifdef GET_NEON_BUILTINS\n";
- // Generate the intrinsic range checking code for shift/lane immediates.
- OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
-
OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
if (k != OpNone)
continue;
- std::string name = R->getValueAsString("Name");
std::string Proto = R->getValueAsString("Prototype");
- std::string Types = R->getValueAsString("Types");
+ std::string name = R->getValueAsString("Name");
// Functions with 'a' (the splat code) in the type prototype should not get
// their own builtin as they use the non-splat variant.
if (Proto.find('a') != std::string::npos)
continue;
- // Functions which do not have an immediate do not need to have range
- // checking code emitted.
- size_t immPos = Proto.find('i');
- if (immPos == std::string::npos)
- continue;
-
+ std::string Types = R->getValueAsString("Types");
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
@@ -2089,70 +2330,90 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
ClassKind ck = ClassMap[R->getSuperClasses()[1]];
- for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
- std::string namestr, shiftstr, rangestr;
-
- if (R->getValueAsBit("isVCVT_N")) {
- // VCVT between floating- and fixed-point values takes an immediate
- // in the range 1 to 32.
- ck = ClassB;
- rangestr = "l = 1; u = 31"; // upper bound = l + u
- } else if (Proto.find('s') == std::string::npos) {
- // Builtins which are overloaded by type will need to have their upper
- // bound computed at Sema time based on the type constant.
- ck = ClassB;
- if (R->getValueAsBit("isShift")) {
- shiftstr = ", true";
+ // Do not include AArch64 BUILTIN() macros if not generating
+ // code for AArch64
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64GenBuiltinDef && isA64)
+ continue;
- // Right shifts have an 'r' in the name, left shifts do not.
- if (name.find('r') != std::string::npos)
- rangestr = "l = 1; ";
- }
- rangestr += "u = RFT(TV" + shiftstr + ")";
- } else {
- // The immediate generally refers to a lane in the preceding argument.
- assert(immPos > 0 && "unexpected immediate operand");
- rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
- }
- // Make sure cases appear only once by uniquing them in a string map.
- namestr = MangleName(name, TypeVec[ti], ck);
- if (EmittedMap.count(namestr))
+ // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics
+ // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr
+ // redefined in AArch64 to handle an additional 2 x f64 type.
+ if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) {
+ ClassKind &A64CK = A64IntrinsicMap[name];
+ if (A64CK == ck && ck != ClassNone)
continue;
- EmittedMap[namestr] = OpNone;
-
- // Calculate the index of the immediate that should be range checked.
- unsigned immidx = 0;
+ }
- // Builtins that return a struct of multiple vectors have an extra
- // leading arg for the struct return.
- if (Proto[0] >= '2' && Proto[0] <= '4')
- ++immidx;
+ for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+ // Generate the declaration for this builtin, ensuring
+ // that each unique BUILTIN() macro appears only once in the output
+ // stream.
+ std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
+ if (EmittedMap.count(bd))
+ continue;
- // Add one to the index for each argument until we reach the immediate
- // to be checked. Structs of vectors are passed as multiple arguments.
- for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
- switch (Proto[ii]) {
- default: immidx += 1; break;
- case '2': immidx += 2; break;
- case '3': immidx += 3; break;
- case '4': immidx += 4; break;
- case 'i': ie = ii + 1; break;
- }
- }
- OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
- << ": i = " << immidx << "; " << rangestr << "; break;\n";
+ EmittedMap[bd] = OpNone;
+ OS << bd << "\n";
}
}
OS << "#endif\n\n";
}
+/// runHeader - Emit a file with sections defining:
+/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
+/// 2. the SemaChecking code for the type overload checking.
+/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
+void NeonEmitter::runHeader(raw_ostream &OS) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+
+ // build a map of AArch64 intriniscs to be used in uniqueness checks.
+ StringMap<ClassKind> A64IntrinsicMap;
+ for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+ Record *R = RV[i];
+
+ bool isA64 = R->getValueAsBit("isA64");
+ if (!isA64)
+ continue;
+
+ ClassKind CK = ClassNone;
+ if (R->getSuperClasses().size() >= 2)
+ CK = ClassMap[R->getSuperClasses()[1]];
+
+ std::string Name = R->getValueAsString("Name");
+ if (A64IntrinsicMap.count(Name))
+ continue;
+ A64IntrinsicMap[Name] = CK;
+ }
+
+ // Generate BuiltinsARM.def for ARM
+ genBuiltinsDef(OS, A64IntrinsicMap, false);
+
+ // Generate BuiltinsAArch64.def for AArch64
+ genBuiltinsDef(OS, A64IntrinsicMap, true);
+
+ // Generate ARM overloaded type checking code for SemaChecking.cpp
+ genOverloadTypeCheckCode(OS, A64IntrinsicMap, false);
+
+ // Generate AArch64 overloaded type checking code for SemaChecking.cpp
+ genOverloadTypeCheckCode(OS, A64IntrinsicMap, true);
+
+ // Generate ARM range checking code for shift/lane immediates.
+ genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false);
+
+ // Generate the AArch64 range checking code for shift/lane immediates.
+ genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true);
+}
+
/// GenTest - Write out a test for the intrinsic specified by the name and
/// type strings, including the embedded patterns for FileCheck to match.
static std::string GenTest(const std::string &name,
const std::string &proto,
StringRef outTypeStr, StringRef inTypeStr,
bool isShift, bool isHiddenLOp,
- ClassKind ck, const std::string &InstName) {
+ ClassKind ck, const std::string &InstName,
+ bool isA64,
+ std::string & testFuncProto) {
assert(!proto.empty() && "");
std::string s;
@@ -2167,12 +2428,17 @@ static std::string GenTest(const std::string &name,
mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
}
+ // todo: GenerateChecksForIntrinsic does not generate CHECK
+ // for aarch64 instructions yet
std::vector<std::string> FileCheckPatterns;
- GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
- isHiddenLOp, FileCheckPatterns);
+ if (!isA64) {
+ GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName,
+ isHiddenLOp, FileCheckPatterns);
+ s+= "// CHECK_ARM: test_" + mangledName + "\n";
+ }
+ s += "// CHECK_AARCH64: test_" + mangledName + "\n";
// Emit the FileCheck patterns.
- s += "// CHECK: test_" + mangledName + "\n";
// If for any reason we do not want to emit a check, mangledInst
// will be the empty string.
if (FileCheckPatterns.size()) {
@@ -2180,23 +2446,27 @@ static std::string GenTest(const std::string &name,
e = FileCheckPatterns.end();
i != e;
++i) {
- s += "// CHECK: " + *i + "\n";
+ s += "// CHECK_ARM: " + *i + "\n";
}
}
// Emit the start of the test function.
- s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
+
+ testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
char arg = 'a';
std::string comma;
for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
// Do not create arguments for values that must be immediate constants.
if (proto[i] == 'i')
continue;
- s += comma + TypeString(proto[i], inTypeStr) + " ";
- s.push_back(arg);
+ testFuncProto += comma + TypeString(proto[i], inTypeStr) + " ";
+ testFuncProto.push_back(arg);
comma = ", ";
}
- s += ") {\n ";
+ testFuncProto += ")";
+
+ s+= testFuncProto;
+ s+= " {\n ";
if (proto[0] != 'v')
s += "return ";
@@ -2220,20 +2490,14 @@ static std::string GenTest(const std::string &name,
return s;
}
-/// runTests - Write out a complete set of tests for all of the Neon
-/// intrinsics.
-void NeonEmitter::runTests(raw_ostream &OS) {
- OS <<
- "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n"
- "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
- "// RUN: | FileCheck %s\n"
- "\n"
- "// REQUIRES: long_tests\n"
- "\n"
- "#include <arm_neon.h>\n"
- "\n";
+/// Write out all intrinsic tests for the specified target, checking
+/// for intrinsic test uniqueness.
+void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap,
+ bool isA64GenTest) {
+ if (isA64GenTest)
+ OS << "#ifdef __aarch64__\n";
- std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
for (unsigned i = 0, e = RV.size(); i != e; ++i) {
Record *R = RV[i];
std::string name = R->getValueAsString("Name");
@@ -2242,6 +2506,12 @@ void NeonEmitter::runTests(raw_ostream &OS) {
bool isShift = R->getValueAsBit("isShift");
std::string InstName = R->getValueAsString("InstName");
bool isHiddenLOp = R->getValueAsBit("isHiddenLInst");
+ bool isA64 = R->getValueAsBit("isA64");
+
+ // do not include AArch64 intrinsic test if not generating
+ // code for AArch64
+ if (!isA64GenTest && isA64)
+ continue;
SmallVector<StringRef, 16> TypeVec;
ParseTypes(R, Types, TypeVec);
@@ -2261,16 +2531,56 @@ void NeonEmitter::runTests(raw_ostream &OS) {
(void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
if (srcti == ti || inQuad != outQuad)
continue;
- OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
- isShift, isHiddenLOp, ck, InstName);
+ std::string testFuncProto;
+ std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti],
+ isShift, isHiddenLOp, ck, InstName, isA64,
+ testFuncProto);
+ if (EmittedMap.count(testFuncProto))
+ continue;
+ EmittedMap[testFuncProto] = kind;
+ OS << s << "\n";
}
} else {
- OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti],
- isShift, isHiddenLOp, ck, InstName);
+ std::string testFuncProto;
+ std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift,
+ isHiddenLOp, ck, InstName, isA64, testFuncProto);
+ if (EmittedMap.count(testFuncProto))
+ continue;
+ EmittedMap[testFuncProto] = kind;
+ OS << s << "\n";
}
}
- OS << "\n";
}
+
+ if (isA64GenTest)
+ OS << "#endif\n";
+}
+/// runTests - Write out a complete set of tests for all of the Neon
+/// intrinsics.
+void NeonEmitter::runTests(raw_ostream &OS) {
+ OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi "
+ "apcs-gnu\\\n"
+ "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n"
+ "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n"
+ "\n"
+ "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n"
+ "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n"
+ "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n"
+ "\n"
+ "// REQUIRES: long_tests\n"
+ "\n"
+ "#include <arm_neon.h>\n"
+ "\n";
+
+ // ARM tests must be emitted before AArch64 tests to ensure
+ // tests for intrinsics that are common to ARM and AArch64
+ // appear only once in the output stream.
+ // The check for uniqueness is done in genTargetTest.
+ StringMap<OpKind> EmittedMap;
+
+ genTargetTest(OS, EmittedMap, false);
+
+ genTargetTest(OS, EmittedMap, true);
}
namespace clang {