[ARM] Optimize {s,u}{add,sub}.with.overflow.

The AArch64 backend contains code to optimize {s,u}{add,sub}.with.overflow during SelectionDAG. This commit ports that code to the ARM backend. Differential revision: https://reviews.llvm.org/D35635 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321224 91177308-0d34-0410-b5e6-96231b3b80d8
author: Joel Galenson <jgalenson@google.com> 2017-12-20 22:25:39 +0000
committer: Yi Kong <yikong@google.com> 2017-12-20 16:30:15 -0800
commit: 550c8f7fa02ff5594d34cc0b2f70576744652c95 (patch)
tree: a815877490463f9099768cf0e3f5a4e5f2ee4129
parent: ddb4c9a54ec370419557e44aec9c907978dd8b3c (diff)
download: llvm-550c8f7fa02ff5594d34cc0b2f70576744652c95.tar.gz
3 files changed, 207 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ad865329ce3..b7446655874 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1046,7 +1046,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   if (!Subtarget->isThumb1Only())
     setOperationAction(ISD::SETCCE, MVT::i32, Custom);
 
-  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND,    MVT::Other, Custom);
   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
@@ -3909,6 +3909,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
 }
 
+// This function returns three things: the arithmetic computation itself
+// (Value), a comparison (OverflowCmp), and a condition code (ARMcc).  The
+// comparison and the condition code define the case in which the arithmetic
+// computation *does not* overflow.
 std::pair<SDValue, SDValue>
 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
                                  SDValue &ARMcc) const {
@@ -3934,7 +3938,11 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
     break;
   case ISD::UADDO:
     ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
-    Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
+    // We use ADDC here to correspond to its use in LowerUnsignedALUO.
+    // We do not use it in the USUBO case as Value may not be used.
+    Value = DAG.getNode(ARMISD::ADDC, dl,
+                        DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
+                .getValue(0);
     OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
     break;
   case ISD::SSUBO:
@@ -4453,6 +4461,39 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Cond = Op.getOperand(1);
+  SDValue Dest = Op.getOperand(2);
+  SDLoc dl(Op);
+
+  // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
+  unsigned Opc = Cond.getOpcode();
+  if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO ||
+                               Opc == ISD::SSUBO || Opc == ISD::USUBO)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
+      return SDValue();
+
+    // The actual operation with overflow check.
+    SDValue Value, OverflowCmp;
+    SDValue ARMcc;
+    std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+
+    // Reverse the condition code.
+    ARMCC::CondCodes CondCode =
+        (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
+    CondCode = ARMCC::getOppositeCondition(CondCode);
+    ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+                       OverflowCmp);
+  }
+
+  return SDValue();
+}
+
 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -4473,6 +4514,33 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
+  unsigned Opc = LHS.getOpcode();
+  if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
+      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+       Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
+      return SDValue();
+
+    // The actual operation with overflow check.
+    SDValue Value, OverflowCmp;
+    SDValue ARMcc;
+    std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
+
+    if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
+      // Reverse the condition code.
+      ARMCC::CondCodes CondCode =
+          (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
+      CondCode = ARMCC::getOppositeCondition(CondCode);
+      ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
+    }
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+                       OverflowCmp);
+  }
+
   if (LHS.getValueType() == MVT::i32) {
     SDValue ARMcc;
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@@ -7682,6 +7750,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   case ISD::SELECT:        return LowerSELECT(Op, DAG);
   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
+  case ISD::BRCOND:        return LowerBRCOND(Op, DAG);
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index a9d5639b557..e444da42732 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -641,6 +641,7 @@ class VectorType;
     SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/ARM/su-addsub-overflow.ll b/test/CodeGen/ARM/su-addsub-overflow.ll
new file mode 100644
index 00000000000..eef53128203
--- /dev/null
+++ b/test/CodeGen/ARM/su-addsub-overflow.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -mtriple=arm-eabi -mcpu=generic | FileCheck %s
+
+define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: sadd:
+; CHECK:    mov r[[R0:[0-9]+]], r0
+; CHECK-NEXT:    add r[[R1:[0-9]+]], r[[R0]], r1
+; CHECK-NEXT:    cmp r[[R1]], r[[R0]]
+; CHECK-NEXT:    movvc pc, lr
+entry:
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:
+  tail call void @llvm.trap() #2
+  unreachable
+
+cont:
+  %2 = extractvalue { i32, i1 } %0, 0
+  ret i32 %2
+
+}
+
+define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: uadd:
+; CHECK:    mov r[[R0:[0-9]+]], r0
+; CHECK-NEXT:    adds r[[R1:[0-9]+]], r[[R0]], r1
+; CHECK-NEXT:    cmp r[[R1]], r[[R0]]
+; CHECK-NEXT:    movhs pc, lr
+entry:
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:
+  tail call void @llvm.trap() #2
+  unreachable
+
+cont:
+  %2 = extractvalue { i32, i1 } %0, 0
+  ret i32 %2
+
+}
+
+define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: ssub:
+; CHECK:    cmp r0, r1
+; CHECK-NEXT:    subvc r0, r0, r1
+; CHECK-NEXT:    movvc pc, lr
+entry:
+  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:
+  tail call void @llvm.trap() #2
+  unreachable
+
+cont:
+  %2 = extractvalue { i32, i1 } %0, 0
+  ret i32 %2
+
+}
+
+define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: usub:
+; CHECK:    mov r[[R0:[0-9]+]], r0
+; CHECK-NEXT:    subs r[[R1:[0-9]+]], r[[R0]], r1
+; CHECK-NEXT:    cmp r[[R0]], r1
+; CHECK-NEXT:    movhs pc, lr
+entry:
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %1 = extractvalue { i32, i1 } %0, 1
+  br i1 %1, label %trap, label %cont
+
+trap:
+  tail call void @llvm.trap() #2
+  unreachable
+
+cont:
+  %2 = extractvalue { i32, i1 } %0, 0
+  ret i32 %2
+
+}
+
+define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: sum:
+; CHECK:    ldr [[R0:r[0-9]+]],
+; CHECK-NEXT:    ldr [[R1:r[0-9]+|lr]],
+; CHECK-NEXT:    add [[R2:r[0-9]+]], [[R1]], [[R0]]
+; CHECK-NEXT:    cmp [[R2]], [[R1]]
+; CHECK-NEXT:    strvc [[R2]],
+; CHECK-NEXT:    addvc
+; CHECK-NEXT:    cmpvc
+; CHECK-NEXT:    bvs
+entry:
+  %cmp7 = icmp eq i32 %n, 0
+  br i1 %cmp7, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.08 = phi i32 [ %7, %cont2 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.08
+  %1 = load i32, i32* %arrayidx1, align 4
+  %2 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 %0)
+  %3 = extractvalue { i32, i1 } %2, 1
+  br i1 %3, label %trap, label %cont
+
+trap:
+  tail call void @llvm.trap() #2
+  unreachable
+
+cont:
+  %4 = extractvalue { i32, i1 } %2, 0
+  store i32 %4, i32* %arrayidx1, align 4
+  %5 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.08, i32 1)
+  %6 = extractvalue { i32, i1 } %5, 1
+  br i1 %6, label %trap, label %cont2
+
+cont2:
+  %7 = extractvalue { i32, i1 } %5, 0
+  %cmp = icmp eq i32 %7, %n
+  br i1 %cmp, label %for.cond.cleanup, label %for.body
+
+}
+
+declare void @llvm.trap() #2
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
author	Joel Galenson <jgalenson@google.com>	2017-12-20 22:25:39 +0000
committer	Yi Kong <yikong@google.com>	2017-12-20 16:30:15 -0800
commit	550c8f7fa02ff5594d34cc0b2f70576744652c95 (patch)
tree	a815877490463f9099768cf0e3f5a4e5f2ee4129
parent	ddb4c9a54ec370419557e44aec9c907978dd8b3c (diff)
download	llvm-550c8f7fa02ff5594d34cc0b2f70576744652c95.tar.gz