Merging r228303:

------------------------------------------------------------------------ r228303 | thomas.stellard | 2015-02-05 10:32:18 -0500 (Thu, 05 Feb 2015) | 11 lines R600/SI: Fix bug in TTI loop unrolling preferences We should be setting UnrollingPreferences::MaxCount to MAX_UINT instead of UnrollingPreferences::Count. Count is a 'forced unrolling factor', while MaxCount sets an upper limit to the unrolling factor. Setting Count to MAX_UINT was causing the loop in the testcase to be unrolled 15 times, when it only had a maximum of 4 iterations. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@228320 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tom Stellard <thomas.stellard@amd.com> 2015-02-05 18:05:18 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2015-02-05 18:05:18 +0000
commit: f518f53f5e756eed9a6ef9f667bab5ad1cb12bc6 (patch)
tree: fe92998c60aa0974d05cf00087290bcde05646c6
parent: 4cbaf6f1ce58dfbff474c65691cd5206ee84f7cc (diff)
download: llvm-f518f53f5e756eed9a6ef9f667bab5ad1cb12bc6.tar.gz
2 files changed, 59 insertions, 1 deletions
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index e7bc00635f7..0bc62d0ab04 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -100,7 +100,7 @@ bool AMDGPUTTI::hasBranchDivergence() const { return true; }
 void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
                                         UnrollingPreferences &UP) const {
   UP.Threshold = 300; // Twice the default.
-  UP.Count = UINT_MAX;
+  UP.MaxCount = UINT_MAX;
   UP.Partial = true;
 
   // TODO: Do we want runtime unrolling?
diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/R600/tti-unroll-prefs.ll
new file mode 100644
index 00000000000..0009c42f79b
--- /dev/null
+++ b/test/CodeGen/R600/tti-unroll-prefs.ll
@@ -0,0 +1,58 @@
+; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=SI %s | FileCheck %s
+
+; This IR comes from this OpenCL C code:
+;
+; if (b + 4 > a) {
+;   for (int i = 0; i < 4; i++, b++) {
+;     if (b + 1 <= a)
+;       *(dst + c + b) = 0;
+;     else
+;       break;
+;   }
+; }
+;
+; This test is meant to check that this loop isn't unrolled into more than
+; four iterations.  The loop unrolling preferences we currently use cause this
+; loop to not be unrolled at all, but that may change in the future.
+
+; CHECK-LABEL: @test
+; CHECK: store i8 0, i8 addrspace(1)*
+; CHECK-NOT: store i8 0, i8 addrspace(1)*
+; CHECK: ret void
+define void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) {
+entry:
+  %add = add nsw i32 %b, 4
+  %cmp = icmp sgt i32 %add, %a
+  br i1 %cmp, label %for.cond.preheader, label %if.end7
+
+for.cond.preheader:                               ; preds = %entry
+  %cmp313 = icmp slt i32 %b, %a
+  br i1 %cmp313, label %if.then4.lr.ph, label %if.end7.loopexit
+
+if.then4.lr.ph:                                   ; preds = %for.cond.preheader
+  %0 = sext i32 %c to i64
+  br label %if.then4
+
+if.then4:                                         ; preds = %if.then4.lr.ph, %if.then4
+  %i.015 = phi i32 [ 0, %if.then4.lr.ph ], [ %inc, %if.then4 ]
+  %b.addr.014 = phi i32 [ %b, %if.then4.lr.ph ], [ %add2, %if.then4 ]
+  %add2 = add nsw i32 %b.addr.014, 1
+  %1 = sext i32 %b.addr.014 to i64
+  %add.ptr.sum = add nsw i64 %1, %0
+  %add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum
+  store i8 0, i8 addrspace(1)* %add.ptr5, align 1
+  %inc = add nsw i32 %i.015, 1
+  %cmp1 = icmp slt i32 %inc, 4
+  %cmp3 = icmp slt i32 %add2, %a
+  %or.cond = and i1 %cmp3, %cmp1
+  br i1 %or.cond, label %if.then4, label %for.cond.if.end7.loopexit_crit_edge
+
+for.cond.if.end7.loopexit_crit_edge:              ; preds = %if.then4
+  br label %if.end7.loopexit
+
+if.end7.loopexit:                                 ; preds = %for.cond.if.end7.loopexit_crit_edge, %for.cond.preheader
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end7.loopexit, %entry
+  ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>	2015-02-05 18:05:18 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2015-02-05 18:05:18 +0000
commit	f518f53f5e756eed9a6ef9f667bab5ad1cb12bc6 (patch)
tree	fe92998c60aa0974d05cf00087290bcde05646c6
parent	4cbaf6f1ce58dfbff474c65691cd5206ee84f7cc (diff)
download	llvm-f518f53f5e756eed9a6ef9f667bab5ad1cb12bc6.tar.gz