aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp207
1 files changed, 125 insertions, 82 deletions
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index a101f6b7127..1fd1c704d79 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -36,6 +36,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -98,6 +99,7 @@ private:
const X86InstrInfo *TII;
const TargetRegisterInfo *TRI;
const TargetRegisterClass *PromoteRC;
+ MachineDominatorTree *MDT;
CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
MachineInstr &CopyDefI);
@@ -145,6 +147,7 @@ FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
char X86FlagsCopyLoweringPass::ID = 0;
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -342,6 +345,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
TII = Subtarget.getInstrInfo();
TRI = Subtarget.getRegisterInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
PromoteRC = &X86::GR8RegClass;
if (MF.begin() == MF.end())
@@ -416,103 +420,142 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// of these up front instead.
CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);
- for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end();
- MII != MIE;) {
- MachineInstr &MI = *MII++;
- MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
- if (!FlagUse) {
- if (MI.findRegisterDefOperand(X86::EFLAGS)) {
- // If EFLAGS are defined, it's as-if they were killed. We can stop
- // scanning here.
- //
- // NB!!! Many instructions only modify some flags. LLVM currently
- // models this as clobbering all flags, but if that ever changes this
- // will need to be carefully updated to handle that more complex
- // logic.
+ // Collect the basic blocks we need to scan. Typically this will just be
+ // a single basic block but we may have to scan multiple blocks if the
+ // EFLAGS copy lives into successors.
+ SmallVector<MachineBasicBlock *, 2> Blocks;
+ SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
+ Blocks.push_back(&MBB);
+ VisitedBlocks.insert(&MBB);
+
+ do {
+ MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
+
+ // We currently don't do any PHI insertion and so we require that the
+ // test basic block dominates all of the use basic blocks.
+ //
+ // We could in theory do PHI insertion here if it becomes useful by just
+ // taking undef values in along every edge that we don't trace this
+ // EFLAGS copy along. This isn't as bad as fully general PHI insertion,
+ // but still seems like a great deal of complexity.
+ //
+ // Because it is theoretically possible that some earlier MI pass or
+ // other lowering transformation could induce this to happen, we do
+ // a hard check even in non-debug builds here.
+ if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) {
+ DEBUG({
+ dbgs() << "ERROR: Encountered use that is not dominated by our test "
+ "basic block! Rewriting this would require inserting PHI "
+ "nodes to track the flag state across the CFG.\n\nTest "
+ "block:\n";
+ TestMBB.dump();
+ dbgs() << "Use block:\n";
+ UseMBB.dump();
+ });
+ report_fatal_error("Cannot lower EFLAGS copy when original copy def "
+ "does not dominate all uses.");
+ }
+
+ for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator())
+ : UseMBB.instr_begin(),
+ MIE = UseMBB.instr_end();
+ MII != MIE;) {
+ MachineInstr &MI = *MII++;
+ MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
+ if (!FlagUse) {
+ if (MI.findRegisterDefOperand(X86::EFLAGS)) {
+ // If EFLAGS are defined, it's as-if they were killed. We can stop
+ // scanning here.
+ //
+ // NB!!! Many instructions only modify some flags. LLVM currently
+ // models this as clobbering all flags, but if that ever changes
+ // this will need to be carefully updated to handle that more
+ // complex logic.
+ FlagsKilled = true;
+ break;
+ }
+ continue;
+ }
+
+ DEBUG(dbgs() << " Rewriting use: "; MI.dump());
+
+ // Check the kill flag before we rewrite as that may change it.
+ if (FlagUse->isKill())
FlagsKilled = true;
+
+ // Once we encounter a branch, the rest of the instructions must also be
+ // branches. We can't rewrite in place here, so we handle them below.
+ //
+ // Note that we don't have to handle tail calls here, even conditional
+ // tail calls, as those are not introduced into the X86 MI until post-RA
+ // branch folding or black placement. As a consequence, we get to deal
+ // with the simpler formulation of conditional branches followed by tail
+ // calls.
+ if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ auto JmpIt = MI.getIterator();
+ do {
+ JmpIs.push_back(&*JmpIt);
+ ++JmpIt;
+ } while (JmpIt != UseMBB.instr_end() &&
+ X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
+ X86::COND_INVALID);
break;
}
- continue;
- }
- DEBUG(dbgs() << " Rewriting use: "; MI.dump());
-
- // Check the kill flag before we rewrite as that may change it.
- if (FlagUse->isKill())
- FlagsKilled = true;
+ // Otherwise we can just rewrite in-place.
+ if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
+ X86::COND_INVALID) {
+ rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ } else if (MI.getOpcode() == TargetOpcode::COPY) {
+ rewriteCopy(MI, *FlagUse, CopyDefI);
+ } else {
+ // We assume that arithmetic instructions that use flags also def
+ // them.
+ assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
+ "Expected a def of EFLAGS for this instruction!");
+
+ // NB!!! Several arithmetic instructions only *partially* update
+ // flags. Theoretically, we could generate MI code sequences that
+ // would rely on this fact and observe different flags independently.
+ // But currently LLVM models all of these instructions as clobbering
+ // all the flags in an undef way. We rely on that to simplify the
+ // logic.
+ FlagsKilled = true;
- // Once we encounter a branch, the rest of the instructions must also be
- // branches. We can't rewrite in place here, so we handle them below.
- //
- // Note that we don't have to handle tail calls here, even conditional
- // tail calls, as those are not introduced into the X86 MI until post-RA
- // branch folding or black placement. As a consequence, we get to deal
- // with the simpler formulation of conditional branches followed by tail
- // calls.
- if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
- auto JmpIt = MI.getIterator();
- do {
- JmpIs.push_back(&*JmpIt);
- ++JmpIt;
- } while (JmpIt != MBB.instr_end() &&
- X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
- X86::COND_INVALID);
- break;
- }
+ rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
+ break;
+ }
- // Otherwise we can just rewrite in-place.
- if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
- rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) {
- rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- } else if (MI.getOpcode() == TargetOpcode::COPY) {
- rewriteCopy(MI, *FlagUse, CopyDefI);
- } else {
- // We assume that arithmetic instructions that use flags also def them.
- assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
- "Expected a def of EFLAGS for this instruction!");
-
- // NB!!! Several arithmetic instructions only *partially* update
- // flags. Theoretically, we could generate MI code sequences that
- // would rely on this fact and observe different flags independently.
- // But currently LLVM models all of these instructions as clobbering
- // all the flags in an undef way. We rely on that to simplify the
- // logic.
- FlagsKilled = true;
-
- rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- break;
+ // If this was the last use of the flags, we're done.
+ if (FlagsKilled)
+ break;
}
- // If this was the last use of the flags, we're done.
+ // If the flags were killed, we're done with this block.
if (FlagsKilled)
break;
- }
- // If we didn't find a kill (or equivalent) check that the flags don't
- // live-out of the basic block. Currently we don't support lowering copies
- // of flags that live out in this fashion.
- if (!FlagsKilled &&
- llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) {
- return SuccMBB->isLiveIn(X86::EFLAGS);
- })) {
- DEBUG({
- dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n"
- << "----\n";
- MBB.dump();
- dbgs() << "----\n"
- << "ERROR: Cannot lower this EFLAGS copy!\n";
- });
- report_fatal_error(
- "Cannot lower EFLAGS copy that lives out of a basic block!");
- }
+ // Otherwise we need to scan successors for ones where the flags live-in
+ // and queue those up for processing.
+ for (MachineBasicBlock *SuccMBB : UseMBB.successors())
+ if (SuccMBB->isLiveIn(X86::EFLAGS) &&
+ VisitedBlocks.insert(SuccMBB).second)
+ Blocks.push_back(SuccMBB);
+ } while (!Blocks.empty());
// Now rewrite the jumps that use the flags. These we handle specially
- // because if there are multiple jumps we'll have to do surgery on the CFG.
+ // because if there are multiple jumps in a single basic block we'll have
+ // to do surgery on the CFG.
+ MachineBasicBlock *LastJmpMBB = nullptr;
for (MachineInstr *JmpI : JmpIs) {
- // Past the first jump we need to split the blocks apart.
- if (JmpI != JmpIs.front())
+ // Past the first jump within a basic block we need to split the blocks
+ // apart.
+ if (JmpI->getParent() == LastJmpMBB)
splitBlock(*JmpI->getParent(), *JmpI, *TII);
+ else
+ LastJmpMBB = JmpI->getParent();
rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
}