aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLogan Chien <tzuhsiang.chien@gmail.com>2014-12-18 00:57:38 +0800
committerLogan Chien <tzuhsiang.chien@gmail.com>2014-12-18 00:57:38 +0800
commit66249d3b1367ea4f08fcda6ce205aef0ae448a45 (patch)
treea68f2b6fb519c9a8fc7044b9e922e726a3a53bfa
parentc4bf5513716e10b8238357c0f6aaef705213231a (diff)
downloadllvm-66249d3b1367ea4f08fcda6ce205aef0ae448a45.tar.gz
[ndk][backport] Workaround llvm.cttz.v2i64() problem.
Under some situation, the LLVM auto-vectorization will generate llvm.cttz.v2i64() which does not correspond to any ARM instruction and result in unmatched selection DAG. This backported patch workaround the problem by refining the cost model to avoid this intrinsics in this case. Patch originally by James Molloy - james molloy arm com [ARM] Teach the cost model that cross-class copies are costly. Cross-class copies being expensive is actually a trait of the microarchitecture, but as I haven't yet seen an example of a microarchitecture where they're cheap it seems best to just enable this by default, covering the non-mcpu build case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217674 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--test/Analysis/CostModel/ARM/cast.ll112
2 files changed, 63 insertions, 56 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index a2ace629baa..52d862c9ecd 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -389,6 +389,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getScalarSizeInBits() <= 32)
return 3;
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if ((Opcode == Instruction::InsertElement ||
+ Opcode == Instruction::ExtractElement) &&
+ ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
}
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 662110f2720..18d6e841433 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -221,35 +221,35 @@ define i32 @casts() {
%r96 = fptoui <2 x float> undef to <2 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r97 = fptosi <2 x float> undef to <2 x i32>
- ; CHECK: cost of 28 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r98 = fptoui <2 x float> undef to <2 x i64>
- ; CHECK: cost of 28 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r99 = fptosi <2 x float> undef to <2 x i64>
- ; CHECK: cost of 8 {{.*}} fptoui
+ ; CHECK: cost of 16 {{.*}} fptoui
%r100 = fptoui <2 x double> undef to <2 x i1>
- ; CHECK: cost of 8 {{.*}} fptosi
+ ; CHECK: cost of 16 {{.*}} fptosi
%r101 = fptosi <2 x double> undef to <2 x i1>
- ; CHECK: cost of 8 {{.*}} fptoui
+ ; CHECK: cost of 16 {{.*}} fptoui
%r102 = fptoui <2 x double> undef to <2 x i8>
- ; CHECK: cost of 8 {{.*}} fptosi
+ ; CHECK: cost of 16 {{.*}} fptosi
%r103 = fptosi <2 x double> undef to <2 x i8>
- ; CHECK: cost of 8 {{.*}} fptoui
+ ; CHECK: cost of 16 {{.*}} fptoui
%r104 = fptoui <2 x double> undef to <2 x i16>
- ; CHECK: cost of 8 {{.*}} fptosi
+ ; CHECK: cost of 16 {{.*}} fptosi
%r105 = fptosi <2 x double> undef to <2 x i16>
; CHECK: cost of 2 {{.*}} fptoui
%r106 = fptoui <2 x double> undef to <2 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r107 = fptosi <2 x double> undef to <2 x i32>
- ; CHECK: cost of 28 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r108 = fptoui <2 x double> undef to <2 x i64>
- ; CHECK: cost of 28 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r109 = fptosi <2 x double> undef to <2 x i64>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r110 = fptoui <4 x float> undef to <4 x i1>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r111 = fptosi <4 x float> undef to <4 x i1>
; CHECK: cost of 3 {{.*}} fptoui
%r112 = fptoui <4 x float> undef to <4 x i8>
@@ -263,39 +263,39 @@ define i32 @casts() {
%r116 = fptoui <4 x float> undef to <4 x i32>
; CHECK: cost of 1 {{.*}} fptosi
%r117 = fptosi <4 x float> undef to <4 x i32>
- ; CHECK: cost of 56 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r118 = fptoui <4 x float> undef to <4 x i64>
- ; CHECK: cost of 56 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r119 = fptosi <4 x float> undef to <4 x i64>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r120 = fptoui <4 x double> undef to <4 x i1>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r121 = fptosi <4 x double> undef to <4 x i1>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r122 = fptoui <4 x double> undef to <4 x i8>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r123 = fptosi <4 x double> undef to <4 x i8>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r124 = fptoui <4 x double> undef to <4 x i16>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r125 = fptosi <4 x double> undef to <4 x i16>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: cost of 32 {{.*}} fptoui
%r126 = fptoui <4 x double> undef to <4 x i32>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: cost of 32 {{.*}} fptosi
%r127 = fptosi <4 x double> undef to <4 x i32>
- ; CHECK: cost of 56 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r128 = fptoui <4 x double> undef to <4 x i64>
- ; CHECK: cost of 56 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r129 = fptosi <4 x double> undef to <4 x i64>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r130 = fptoui <8 x float> undef to <8 x i1>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r131 = fptosi <8 x float> undef to <8 x i1>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r132 = fptoui <8 x float> undef to <8 x i8>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r133 = fptosi <8 x float> undef to <8 x i8>
; CHECK: cost of 4 {{.*}} fptoui
%r134 = fptoui <8 x float> undef to <8 x i16>
@@ -305,39 +305,39 @@ define i32 @casts() {
%r136 = fptoui <8 x float> undef to <8 x i32>
; CHECK: cost of 2 {{.*}} fptosi
%r137 = fptosi <8 x float> undef to <8 x i32>
- ; CHECK: cost of 112 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r138 = fptoui <8 x float> undef to <8 x i64>
- ; CHECK: cost of 112 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r139 = fptosi <8 x float> undef to <8 x i64>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r140 = fptoui <8 x double> undef to <8 x i1>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r141 = fptosi <8 x double> undef to <8 x i1>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r142 = fptoui <8 x double> undef to <8 x i8>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r143 = fptosi <8 x double> undef to <8 x i8>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r144 = fptoui <8 x double> undef to <8 x i16>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r145 = fptosi <8 x double> undef to <8 x i16>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: cost of 64 {{.*}} fptoui
%r146 = fptoui <8 x double> undef to <8 x i32>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: cost of 64 {{.*}} fptosi
%r147 = fptosi <8 x double> undef to <8 x i32>
- ; CHECK: cost of 112 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r148 = fptoui <8 x double> undef to <8 x i64>
- ; CHECK: cost of 112 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r149 = fptosi <8 x double> undef to <8 x i64>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r150 = fptoui <16 x float> undef to <16 x i1>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r151 = fptosi <16 x float> undef to <16 x i1>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r152 = fptoui <16 x float> undef to <16 x i8>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r153 = fptosi <16 x float> undef to <16 x i8>
; CHECK: cost of 8 {{.*}} fptoui
%r154 = fptoui <16 x float> undef to <16 x i16>
@@ -347,30 +347,30 @@ define i32 @casts() {
%r156 = fptoui <16 x float> undef to <16 x i32>
; CHECK: cost of 4 {{.*}} fptosi
%r157 = fptosi <16 x float> undef to <16 x i32>
- ; CHECK: cost of 224 {{.*}} fptoui
+ ; CHECK: cost of 256 {{.*}} fptoui
%r158 = fptoui <16 x float> undef to <16 x i64>
- ; CHECK: cost of 224 {{.*}} fptosi
+ ; CHECK: cost of 256 {{.*}} fptosi
%r159 = fptosi <16 x float> undef to <16 x i64>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r160 = fptoui <16 x double> undef to <16 x i1>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r161 = fptosi <16 x double> undef to <16 x i1>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r162 = fptoui <16 x double> undef to <16 x i8>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r163 = fptosi <16 x double> undef to <16 x i8>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r164 = fptoui <16 x double> undef to <16 x i16>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r165 = fptosi <16 x double> undef to <16 x i16>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: cost of 128 {{.*}} fptoui
%r166 = fptoui <16 x double> undef to <16 x i32>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: cost of 128 {{.*}} fptosi
%r167 = fptosi <16 x double> undef to <16 x i32>
- ; CHECK: cost of 224 {{.*}} fptoui
+ ; CHECK: cost of 256 {{.*}} fptoui
%r168 = fptoui <16 x double> undef to <16 x i64>
- ; CHECK: cost of 224 {{.*}} fptosi
+ ; CHECK: cost of 256 {{.*}} fptosi
%r169 = fptosi <16 x double> undef to <16 x i64>
; CHECK: cost of 8 {{.*}} uitofp