aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2018-04-18 15:13:16 +0000
committerPirama Arumuga Nainar <pirama@google.com>2018-04-27 13:59:49 -0700
commit395a261979d5c097f7ae7fb19ef452839acf3901 (patch)
tree324bb6502d6bbd3f47569de69663ff279d01a4e0
parent4798aaa1b25a0f1b7b4e354c3d254c34dadfa5c5 (diff)
downloadllvm-395a261979d5c097f7ae7fb19ef452839acf3901.tar.gz
[x86] Fix PR37100 by teaching the EFLAGS copy lowering to rewrite uses
across basic blocks in the limited cases where it is very straight forward to do so. This will also be useful for other places where we do some limited EFLAGS propagation across CFG edges and need to handle copy rewrites afterward. I think this is rapidly approaching the maximum we can and should be doing here. Everything else begins to require either heroic analysis to prove how to do PHI insertion manually, or somehow managing arbitrary PHI-ing of EFLAGS with general PHI insertion. Neither of these seem at all promising so if those cases come up, we'll almost certainly need to rewrite the parts of LLVM that produce those patterns. We do now require dominator trees in order to reliably diagnose patterns that would require PHI nodes. This is a bit unfortunate but it seems better than the completely mysterious crash we would get otherwise. Differential Revision: https://reviews.llvm.org/D45673 Change-Id: I17e834975f79674613cf6adbe1afa892a2325a45 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330264 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp207
1 files changed, 125 insertions, 82 deletions
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index a101f6b7127..1fd1c704d79 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -36,6 +36,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -98,6 +99,7 @@ private:
const X86InstrInfo *TII;
const TargetRegisterInfo *TRI;
const TargetRegisterClass *PromoteRC;
+ MachineDominatorTree *MDT;
CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
MachineInstr &CopyDefI);
@@ -145,6 +147,7 @@ FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
char X86FlagsCopyLoweringPass::ID = 0;
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -342,6 +345,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TII = Subtarget.getInstrInfo();
TRI = Subtarget.getRegisterInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
PromoteRC = &X86::GR8RegClass;
if (MF.begin() == MF.end())
@@ -416,103 +420,142 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// of these up front instead.
CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);
- for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end();
- MII != MIE;) {
- MachineInstr &MI = *MII++;
- MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
- if (!FlagUse) {
- if (MI.findRegisterDefOperand(X86::EFLAGS)) {
- // If EFLAGS are defined, it's as-if they were killed. We can stop
- // scanning here.
- //
- // NB!!! Many instructions only modify some flags. LLVM currently
- // models this as clobbering all flags, but if that ever changes this
- // will need to be carefully updated to handle that more complex
- // logic.
+ // Collect the basic blocks we need to scan. Typically this will just be
+ // a single basic block but we may have to scan multiple blocks if the
+ // EFLAGS copy lives into successors.
+ SmallVector<MachineBasicBlock *, 2> Blocks;
+ SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
+ Blocks.push_back(&MBB);
+ VisitedBlocks.insert(&MBB);
+
+ do {
+ MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
+
+ // We currently don't do any PHI insertion and so we require that the
+ // test basic block dominates all of the use basic blocks.
+ //
+ // We could in theory do PHI insertion here if it becomes useful by just
+ // taking undef values in along every edge that we don't trace this
+ // EFLAGS copy along. This isn't as bad as fully general PHI insertion,
+ // but still seems like a great deal of complexity.
+ //
+ // Because it is theoretically possible that some earlier MI pass or
+ // other lowering transformation could induce this to happen, we do
+ // a hard check even in non-debug builds here.
+ if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) {
+ DEBUG({
+ dbgs() << "ERROR: Encountered use that is not dominated by our test "
+ "basic block! Rewriting this would require inserting PHI "
+ "nodes to track the flag state across the CFG.\n\nTest "
+ "block:\n";
+ TestMBB.dump();
+ dbgs() << "Use block:\n";
+ UseMBB.dump();
+ });
+ report_fatal_error("Cannot lower EFLAGS copy when original copy def "
+ "does not dominate all uses.");
+ }
+
+ for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator())
+ : UseMBB.instr_begin(),
+ MIE = UseMBB.instr_end();
+ MII != MIE;) {
+ MachineInstr &MI = *MII++;
+ MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
+ if (!FlagUse) {
+ if (MI.findRegisterDefOperand(X86::EFLAGS)) {
+ // If EFLAGS are defined, it's as-if they were killed. We can stop
+ // scanning here.
+ //
+ // NB!!! Many instructions only modify some flags. LLVM currently
+ // models this as clobbering all flags, but if that ever changes
+ // this will need to be carefully updated to handle that more
+ // complex logic.
+ FlagsKilled = true;
+ break;
+ }
+ continue;
+ }
+
+ DEBUG(dbgs() << " Rewriting use: "; MI.dump());
+
+ // Check the kill flag before we rewrite as that may change it.
+ if (FlagUse->isKill())
FlagsKilled = true;
+
+ // Once we encounter a branch, the rest of the instructions must also be
+ // branches. We can't rewrite in place here, so we handle them below.
+ //
+ // Note that we don't have to handle tail calls here, even conditional
+ // tail calls, as those are not introduced into the X86 MI until post-RA
+ // branch folding or black placement. As a consequence, we get to deal
+ // with the simpler formulation of conditional branches followed by tail
+ // calls.
+ if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ auto JmpIt = MI.getIterator();
+ do {
+ JmpIs.push_back(&*JmpIt);
+ ++JmpIt;
+ } while (JmpIt != UseMBB.instr_end() &&
+ X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
+ X86::COND_INVALID);
break;
}
- continue;
- }
- DEBUG(dbgs() << " Rewriting use: "; MI.dump());
-
- // Check the kill flag before we rewrite as that may change it.
- if (FlagUse->isKill())
- FlagsKilled = true;
+ // Otherwise we can just rewrite in-place.
+ if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
+ X86::COND_INVALID) {
+ rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ } else if (MI.getOpcode() == TargetOpcode::COPY) {
+ rewriteCopy(MI, *FlagUse, CopyDefI);
+ } else {
+ // We assume that arithmetic instructions that use flags also def
+ // them.
+ assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
+ "Expected a def of EFLAGS for this instruction!");
+
+ // NB!!! Several arithmetic instructions only *partially* update
+ // flags. Theoretically, we could generate MI code sequences that
+ // would rely on this fact and observe different flags independently.
+ // But currently LLVM models all of these instructions as clobbering
+ // all the flags in an undef way. We rely on that to simplify the
+ // logic.
+ FlagsKilled = true;
- // Once we encounter a branch, the rest of the instructions must also be
- // branches. We can't rewrite in place here, so we handle them below.
- //
- // Note that we don't have to handle tail calls here, even conditional
- // tail calls, as those are not introduced into the X86 MI until post-RA
- // branch folding or black placement. As a consequence, we get to deal
- // with the simpler formulation of conditional branches followed by tail
- // calls.
- if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
- auto JmpIt = MI.getIterator();
- do {
- JmpIs.push_back(&*JmpIt);
- ++JmpIt;
- } while (JmpIt != MBB.instr_end() &&
- X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
- X86::COND_INVALID);
- break;
- }
+ rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ break;
+ }
- // Otherwise we can just rewrite in-place.
- if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
- rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) {
- rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- } else if (MI.getOpcode() == TargetOpcode::COPY) {
- rewriteCopy(MI, *FlagUse, CopyDefI);
- } else {
- // We assume that arithmetic instructions that use flags also def them.
- assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
- "Expected a def of EFLAGS for this instruction!");
-
- // NB!!! Several arithmetic instructions only *partially* update
- // flags. Theoretically, we could generate MI code sequences that
- // would rely on this fact and observe different flags independently.
- // But currently LLVM models all of these instructions as clobbering
- // all the flags in an undef way. We rely on that to simplify the
- // logic.
- FlagsKilled = true;
-
- rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- break;
+ // If this was the last use of the flags, we're done.
+ if (FlagsKilled)
+ break;
}
- // If this was the last use of the flags, we're done.
+ // If the flags were killed, we're done with this block.
if (FlagsKilled)
break;
- }
- // If we didn't find a kill (or equivalent) check that the flags don't
- // live-out of the basic block. Currently we don't support lowering copies
- // of flags that live out in this fashion.
- if (!FlagsKilled &&
- llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) {
- return SuccMBB->isLiveIn(X86::EFLAGS);
- })) {
- DEBUG({
- dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n"
- << "----\n";
- MBB.dump();
- dbgs() << "----\n"
- << "ERROR: Cannot lower this EFLAGS copy!\n";
- });
- report_fatal_error(
- "Cannot lower EFLAGS copy that lives out of a basic block!");
- }
+ // Otherwise we need to scan successors for ones where the flags live-in
+ // and queue those up for processing.
+ for (MachineBasicBlock *SuccMBB : UseMBB.successors())
+ if (SuccMBB->isLiveIn(X86::EFLAGS) &&
+ VisitedBlocks.insert(SuccMBB).second)
+ Blocks.push_back(SuccMBB);
+ } while (!Blocks.empty());
// Now rewrite the jumps that use the flags. These we handle specially
- // because if there are multiple jumps we'll have to do surgery on the CFG.
+ // because if there are multiple jumps in a single basic block we'll have
+ // to do surgery on the CFG.
+ MachineBasicBlock *LastJmpMBB = nullptr;
for (MachineInstr *JmpI : JmpIs) {
- // Past the first jump we need to split the blocks apart.
- if (JmpI != JmpIs.front())
+ // Past the first jump within a basic block we need to split the blocks
+ // apart.
+ if (JmpI->getParent() == LastJmpMBB)
splitBlock(*JmpI->getParent(), *JmpI, *TII);
+ else
+ LastJmpMBB = JmpI->getParent();
rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
}