LLVM API Documentation

TailDuplication.cpp
Go to the documentation of this file.
00001 //===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass duplicates basic blocks ending in unconditional branches into
00011 // the tails of their predecessors.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/CodeGen/Passes.h"
00016 #include "llvm/ADT/DenseSet.h"
00017 #include "llvm/ADT/SetVector.h"
00018 #include "llvm/ADT/SmallSet.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
00021 #include "llvm/CodeGen/MachineFunctionPass.h"
00022 #include "llvm/CodeGen/MachineInstrBuilder.h"
00023 #include "llvm/CodeGen/MachineModuleInfo.h"
00024 #include "llvm/CodeGen/MachineRegisterInfo.h"
00025 #include "llvm/CodeGen/MachineSSAUpdater.h"
00026 #include "llvm/CodeGen/RegisterScavenging.h"
00027 #include "llvm/IR/Function.h"
00028 #include "llvm/Support/CommandLine.h"
00029 #include "llvm/Support/Debug.h"
00030 #include "llvm/Support/ErrorHandling.h"
00031 #include "llvm/Support/raw_ostream.h"
00032 #include "llvm/Target/TargetInstrInfo.h"
00033 #include "llvm/Target/TargetRegisterInfo.h"
00034 #include "llvm/Target/TargetSubtargetInfo.h"
00035 using namespace llvm;
00036 
00037 #define DEBUG_TYPE "tailduplication"
00038 
00039 STATISTIC(NumTails     , "Number of tails duplicated");
00040 STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
00041 STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
00042 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
00043 STATISTIC(NumAddedPHIs , "Number of phis added");
00044 
00045 // Heuristic for tail duplication.
00046 static cl::opt<unsigned>
00047 TailDuplicateSize("tail-dup-size",
00048                   cl::desc("Maximum instructions to consider tail duplicating"),
00049                   cl::init(2), cl::Hidden);
00050 
00051 static cl::opt<bool>
00052 TailDupVerify("tail-dup-verify",
00053               cl::desc("Verify sanity of PHI instructions during taildup"),
00054               cl::init(false), cl::Hidden);
00055 
00056 static cl::opt<unsigned>
00057 TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
00058 
00059 typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
00060 
00061 namespace {
00062   /// TailDuplicatePass - Perform tail duplication.
00063   class TailDuplicatePass : public MachineFunctionPass {
00064     const TargetInstrInfo *TII;
00065     const TargetRegisterInfo *TRI;
00066     const MachineBranchProbabilityInfo *MBPI;
00067     MachineModuleInfo *MMI;
00068     MachineRegisterInfo *MRI;
00069     std::unique_ptr<RegScavenger> RS;
00070     bool PreRegAlloc;
00071 
00072     // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
00073     SmallVector<unsigned, 16> SSAUpdateVRs;
00074 
00075     // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
00076     // source virtual registers.
00077     DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
00078 
00079   public:
00080     static char ID;
00081     explicit TailDuplicatePass() :
00082       MachineFunctionPass(ID), PreRegAlloc(false) {}
00083 
00084     bool runOnMachineFunction(MachineFunction &MF) override;
00085 
00086     void getAnalysisUsage(AnalysisUsage &AU) const override;
00087 
00088   private:
00089     void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
00090                            MachineBasicBlock *BB);
00091     void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
00092                     MachineBasicBlock *PredBB,
00093                     DenseMap<unsigned, unsigned> &LocalVRMap,
00094                     SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies,
00095                     const DenseSet<unsigned> &UsedByPhi,
00096                     bool Remove);
00097     void DuplicateInstruction(MachineInstr *MI,
00098                               MachineBasicBlock *TailBB,
00099                               MachineBasicBlock *PredBB,
00100                               MachineFunction &MF,
00101                               DenseMap<unsigned, unsigned> &LocalVRMap,
00102                               const DenseSet<unsigned> &UsedByPhi);
00103     void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
00104                               SmallVectorImpl<MachineBasicBlock *> &TDBBs,
00105                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
00106     bool TailDuplicateBlocks(MachineFunction &MF);
00107     bool shouldTailDuplicate(const MachineFunction &MF,
00108                              bool IsSimple, MachineBasicBlock &TailBB);
00109     bool isSimpleBB(MachineBasicBlock *TailBB);
00110     bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
00111     bool duplicateSimpleBB(MachineBasicBlock *TailBB,
00112                            SmallVectorImpl<MachineBasicBlock *> &TDBBs,
00113                            const DenseSet<unsigned> &RegsUsedByPhi,
00114                            SmallVectorImpl<MachineInstr *> &Copies);
00115     bool TailDuplicate(MachineBasicBlock *TailBB,
00116                        bool IsSimple,
00117                        MachineFunction &MF,
00118                        SmallVectorImpl<MachineBasicBlock *> &TDBBs,
00119                        SmallVectorImpl<MachineInstr *> &Copies);
00120     bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
00121                                 bool IsSimple,
00122                                 MachineFunction &MF);
00123 
00124     void RemoveDeadBlock(MachineBasicBlock *MBB);
00125   };
00126 
00127   char TailDuplicatePass::ID = 0;
00128 }
00129 
00130 char &llvm::TailDuplicateID = TailDuplicatePass::ID;
00131 
00132 INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
00133                 false, false)
00134 
00135 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
00136   if (skipOptnoneFunction(*MF.getFunction()))
00137     return false;
00138 
00139   TII = MF.getSubtarget().getInstrInfo();
00140   TRI = MF.getSubtarget().getRegisterInfo();
00141   MRI = &MF.getRegInfo();
00142   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
00143   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
00144 
00145   PreRegAlloc = MRI->isSSA();
00146   RS.reset();
00147   if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
00148     RS.reset(new RegScavenger());
00149 
00150   bool MadeChange = false;
00151   while (TailDuplicateBlocks(MF))
00152     MadeChange = true;
00153 
00154   return MadeChange;
00155 }
00156 
00157 void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
00158   AU.addRequired<MachineBranchProbabilityInfo>();
00159   MachineFunctionPass::getAnalysisUsage(AU);
00160 }
00161 
00162 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
00163   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
00164     MachineBasicBlock *MBB = I;
00165     SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
00166                                                 MBB->pred_end());
00167     MachineBasicBlock::iterator MI = MBB->begin();
00168     while (MI != MBB->end()) {
00169       if (!MI->isPHI())
00170         break;
00171       for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00172              PE = Preds.end(); PI != PE; ++PI) {
00173         MachineBasicBlock *PredBB = *PI;
00174         bool Found = false;
00175         for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
00176           MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
00177           if (PHIBB == PredBB) {
00178             Found = true;
00179             break;
00180           }
00181         }
00182         if (!Found) {
00183           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
00184           dbgs() << "  missing input from predecessor BB#"
00185                  << PredBB->getNumber() << '\n';
00186           llvm_unreachable(nullptr);
00187         }
00188       }
00189 
00190       for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
00191         MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
00192         if (CheckExtra && !Preds.count(PHIBB)) {
00193           dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
00194                  << ": " << *MI;
00195           dbgs() << "  extra input from predecessor BB#"
00196                  << PHIBB->getNumber() << '\n';
00197           llvm_unreachable(nullptr);
00198         }
00199         if (PHIBB->getNumber() < 0) {
00200           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
00201           dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
00202           llvm_unreachable(nullptr);
00203         }
00204       }
00205       ++MI;
00206     }
00207   }
00208 }
00209 
00210 /// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
00211 bool
00212 TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
00213                                           bool IsSimple,
00214                                           MachineFunction &MF) {
00215   // Save the successors list.
00216   SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
00217                                               MBB->succ_end());
00218 
00219   SmallVector<MachineBasicBlock*, 8> TDBBs;
00220   SmallVector<MachineInstr*, 16> Copies;
00221   if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
00222     return false;
00223 
00224   ++NumTails;
00225 
00226   SmallVector<MachineInstr*, 8> NewPHIs;
00227   MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
00228 
00229   // TailBB's immediate successors are now successors of those predecessors
00230   // which duplicated TailBB. Add the predecessors as sources to the PHI
00231   // instructions.
00232   bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
00233   if (PreRegAlloc)
00234     UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
00235 
00236   // If it is dead, remove it.
00237   if (isDead) {
00238     NumInstrDups -= MBB->size();
00239     RemoveDeadBlock(MBB);
00240     ++NumDeadBlocks;
00241   }
00242 
00243   // Update SSA form.
00244   if (!SSAUpdateVRs.empty()) {
00245     for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
00246       unsigned VReg = SSAUpdateVRs[i];
00247       SSAUpdate.Initialize(VReg);
00248 
00249       // If the original definition is still around, add it as an available
00250       // value.
00251       MachineInstr *DefMI = MRI->getVRegDef(VReg);
00252       MachineBasicBlock *DefBB = nullptr;
00253       if (DefMI) {
00254         DefBB = DefMI->getParent();
00255         SSAUpdate.AddAvailableValue(DefBB, VReg);
00256       }
00257 
00258       // Add the new vregs as available values.
00259       DenseMap<unsigned, AvailableValsTy>::iterator LI =
00260         SSAUpdateVals.find(VReg);
00261       for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
00262         MachineBasicBlock *SrcBB = LI->second[j].first;
00263         unsigned SrcReg = LI->second[j].second;
00264         SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
00265       }
00266 
00267       // Rewrite uses that are outside of the original def's block.
00268       MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
00269       while (UI != MRI->use_end()) {
00270         MachineOperand &UseMO = *UI;
00271         MachineInstr *UseMI = UseMO.getParent();
00272         ++UI;
00273         if (UseMI->isDebugValue()) {
00274           // SSAUpdate can replace the use with an undef. That creates
00275           // a debug instruction that is a kill.
00276           // FIXME: Should it SSAUpdate job to delete debug instructions
00277           // instead of replacing the use with undef?
00278           UseMI->eraseFromParent();
00279           continue;
00280         }
00281         if (UseMI->getParent() == DefBB && !UseMI->isPHI())
00282           continue;
00283         SSAUpdate.RewriteUse(UseMO);
00284       }
00285     }
00286 
00287     SSAUpdateVRs.clear();
00288     SSAUpdateVals.clear();
00289   }
00290 
00291   // Eliminate some of the copies inserted by tail duplication to maintain
00292   // SSA form.
00293   for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
00294     MachineInstr *Copy = Copies[i];
00295     if (!Copy->isCopy())
00296       continue;
00297     unsigned Dst = Copy->getOperand(0).getReg();
00298     unsigned Src = Copy->getOperand(1).getReg();
00299     if (MRI->hasOneNonDBGUse(Src) &&
00300         MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
00301       // Copy is the only use. Do trivial copy propagation here.
00302       MRI->replaceRegWith(Dst, Src);
00303       Copy->eraseFromParent();
00304     }
00305   }
00306 
00307   if (NewPHIs.size())
00308     NumAddedPHIs += NewPHIs.size();
00309 
00310   return true;
00311 }
00312 
00313 /// TailDuplicateBlocks - Look for small blocks that are unconditionally
00314 /// branched to and do not fall through. Tail-duplicate their instructions
00315 /// into their predecessors to eliminate (dynamic) branches.
00316 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
00317   bool MadeChange = false;
00318 
00319   if (PreRegAlloc && TailDupVerify) {
00320     DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
00321     VerifyPHIs(MF, true);
00322   }
00323 
00324   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
00325     MachineBasicBlock *MBB = I++;
00326 
00327     if (NumTails == TailDupLimit)
00328       break;
00329 
00330     bool IsSimple = isSimpleBB(MBB);
00331 
00332     if (!shouldTailDuplicate(MF, IsSimple, *MBB))
00333       continue;
00334 
00335     MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
00336   }
00337 
00338   if (PreRegAlloc && TailDupVerify)
00339     VerifyPHIs(MF, false);
00340 
00341   return MadeChange;
00342 }
00343 
00344 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
00345                          const MachineRegisterInfo *MRI) {
00346   for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
00347     if (UseMI.isDebugValue())
00348       continue;
00349     if (UseMI.getParent() != BB)
00350       return true;
00351   }
00352   return false;
00353 }
00354 
00355 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
00356   for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
00357     if (MI->getOperand(i+1).getMBB() == SrcBB)
00358       return i;
00359   return 0;
00360 }
00361 
00362 
00363 // Remember which registers are used by phis in this block. This is
00364 // used to determine which registers are liveout while modifying the
00365 // block (which is why we need to copy the information).
00366 static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
00367                               DenseSet<unsigned> *UsedByPhi) {
00368   for (const auto &MI : BB) {
00369     if (!MI.isPHI())
00370       break;
00371     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
00372       unsigned SrcReg = MI.getOperand(i).getReg();
00373       UsedByPhi->insert(SrcReg);
00374     }
00375   }
00376 }
00377 
00378 /// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
00379 /// SSA update.
00380 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
00381                                           MachineBasicBlock *BB) {
00382   DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
00383   if (LI != SSAUpdateVals.end())
00384     LI->second.push_back(std::make_pair(BB, NewReg));
00385   else {
00386     AvailableValsTy Vals;
00387     Vals.push_back(std::make_pair(BB, NewReg));
00388     SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
00389     SSAUpdateVRs.push_back(OrigReg);
00390   }
00391 }
00392 
00393 /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
00394 /// Remember the source register that's contributed by PredBB and update SSA
00395 /// update map.
00396 void TailDuplicatePass::ProcessPHI(
00397     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
00398     DenseMap<unsigned, unsigned> &LocalVRMap,
00399     SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies,
00400     const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
00401   unsigned DefReg = MI->getOperand(0).getReg();
00402   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
00403   assert(SrcOpIdx && "Unable to find matching PHI source?");
00404   unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
00405   const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
00406   LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
00407 
00408   // Insert a copy from source to the end of the block. The def register is the
00409   // available value liveout of the block.
00410   unsigned NewDef = MRI->createVirtualRegister(RC);
00411   Copies.push_back(std::make_pair(NewDef, SrcReg));
00412   if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
00413     AddSSAUpdateEntry(DefReg, NewDef, PredBB);
00414 
00415   if (!Remove)
00416     return;
00417 
00418   // Remove PredBB from the PHI node.
00419   MI->RemoveOperand(SrcOpIdx+1);
00420   MI->RemoveOperand(SrcOpIdx);
00421   if (MI->getNumOperands() == 1)
00422     MI->eraseFromParent();
00423 }
00424 
00425 /// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
00426 /// the source operands due to earlier PHI translation.
00427 void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
00428                                      MachineBasicBlock *TailBB,
00429                                      MachineBasicBlock *PredBB,
00430                                      MachineFunction &MF,
00431                                      DenseMap<unsigned, unsigned> &LocalVRMap,
00432                                      const DenseSet<unsigned> &UsedByPhi) {
00433   MachineInstr *NewMI = TII->duplicate(MI, MF);
00434   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
00435     MachineOperand &MO = NewMI->getOperand(i);
00436     if (!MO.isReg())
00437       continue;
00438     unsigned Reg = MO.getReg();
00439     if (!TargetRegisterInfo::isVirtualRegister(Reg))
00440       continue;
00441     if (MO.isDef()) {
00442       const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00443       unsigned NewReg = MRI->createVirtualRegister(RC);
00444       MO.setReg(NewReg);
00445       LocalVRMap.insert(std::make_pair(Reg, NewReg));
00446       if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
00447         AddSSAUpdateEntry(Reg, NewReg, PredBB);
00448     } else {
00449       DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
00450       if (VI != LocalVRMap.end()) {
00451         MO.setReg(VI->second);
00452         MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
00453       }
00454     }
00455   }
00456   PredBB->insert(PredBB->instr_end(), NewMI);
00457 }
00458 
00459 /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
00460 /// blocks, the successors have gained new predecessors. Update the PHI
00461 /// instructions in them accordingly.
00462 void
00463 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
00464                                   SmallVectorImpl<MachineBasicBlock *> &TDBBs,
00465                                   SmallSetVector<MachineBasicBlock*,8> &Succs) {
00466   for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
00467          SE = Succs.end(); SI != SE; ++SI) {
00468     MachineBasicBlock *SuccBB = *SI;
00469     for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
00470          II != EE; ++II) {
00471       if (!II->isPHI())
00472         break;
00473       MachineInstrBuilder MIB(*FromBB->getParent(), II);
00474       unsigned Idx = 0;
00475       for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
00476         MachineOperand &MO = II->getOperand(i+1);
00477         if (MO.getMBB() == FromBB) {
00478           Idx = i;
00479           break;
00480         }
00481       }
00482 
00483       assert(Idx != 0);
00484       MachineOperand &MO0 = II->getOperand(Idx);
00485       unsigned Reg = MO0.getReg();
00486       if (isDead) {
00487         // Folded into the previous BB.
00488         // There could be duplicate phi source entries. FIXME: Should sdisel
00489         // or earlier pass fixed this?
00490         for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
00491           MachineOperand &MO = II->getOperand(i+1);
00492           if (MO.getMBB() == FromBB) {
00493             II->RemoveOperand(i+1);
00494             II->RemoveOperand(i);
00495           }
00496         }
00497       } else
00498         Idx = 0;
00499 
00500       // If Idx is set, the operands at Idx and Idx+1 must be removed.
00501       // We reuse the location to avoid expensive RemoveOperand calls.
00502 
00503       DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
00504       if (LI != SSAUpdateVals.end()) {
00505         // This register is defined in the tail block.
00506         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
00507           MachineBasicBlock *SrcBB = LI->second[j].first;
00508           // If we didn't duplicate a bb into a particular predecessor, we
00509           // might still have added an entry to SSAUpdateVals to correcly
00510           // recompute SSA. If that case, avoid adding a dummy extra argument
00511           // this PHI.
00512           if (!SrcBB->isSuccessor(SuccBB))
00513             continue;
00514 
00515           unsigned SrcReg = LI->second[j].second;
00516           if (Idx != 0) {
00517             II->getOperand(Idx).setReg(SrcReg);
00518             II->getOperand(Idx+1).setMBB(SrcBB);
00519             Idx = 0;
00520           } else {
00521             MIB.addReg(SrcReg).addMBB(SrcBB);
00522           }
00523         }
00524       } else {
00525         // Live in tail block, must also be live in predecessors.
00526         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
00527           MachineBasicBlock *SrcBB = TDBBs[j];
00528           if (Idx != 0) {
00529             II->getOperand(Idx).setReg(Reg);
00530             II->getOperand(Idx+1).setMBB(SrcBB);
00531             Idx = 0;
00532           } else {
00533             MIB.addReg(Reg).addMBB(SrcBB);
00534           }
00535         }
00536       }
00537       if (Idx != 0) {
00538         II->RemoveOperand(Idx+1);
00539         II->RemoveOperand(Idx);
00540       }
00541     }
00542   }
00543 }
00544 
00545 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
00546 bool
00547 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
00548                                        bool IsSimple,
00549                                        MachineBasicBlock &TailBB) {
00550   // Only duplicate blocks that end with unconditional branches.
00551   if (TailBB.canFallThrough())
00552     return false;
00553 
00554   // Don't try to tail-duplicate single-block loops.
00555   if (TailBB.isSuccessor(&TailBB))
00556     return false;
00557 
00558   // Set the limit on the cost to duplicate. When optimizing for size,
00559   // duplicate only one, because one branch instruction can be eliminated to
00560   // compensate for the duplication.
00561   unsigned MaxDuplicateCount;
00562   if (TailDuplicateSize.getNumOccurrences() == 0 &&
00563       MF.getFunction()->getAttributes().
00564         hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
00565     MaxDuplicateCount = 1;
00566   else
00567     MaxDuplicateCount = TailDuplicateSize;
00568 
00569   // If the target has hardware branch prediction that can handle indirect
00570   // branches, duplicating them can often make them predictable when there
00571   // are common paths through the code.  The limit needs to be high enough
00572   // to allow undoing the effects of tail merging and other optimizations
00573   // that rearrange the predecessors of the indirect branch.
00574 
00575   bool HasIndirectbr = false;
00576   if (!TailBB.empty())
00577     HasIndirectbr = TailBB.back().isIndirectBranch();
00578 
00579   if (HasIndirectbr && PreRegAlloc)
00580     MaxDuplicateCount = 20;
00581 
00582   // Check the instructions in the block to determine whether tail-duplication
00583   // is invalid or unlikely to be profitable.
00584   unsigned InstrCount = 0;
00585   for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
00586     // Non-duplicable things shouldn't be tail-duplicated.
00587     if (I->isNotDuplicable())
00588       return false;
00589 
00590     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
00591     // A return may expand into a lot more instructions (e.g. reload of callee
00592     // saved registers) after PEI.
00593     if (PreRegAlloc && I->isReturn())
00594       return false;
00595 
00596     // Avoid duplicating calls before register allocation. Calls presents a
00597     // barrier to register allocation so duplicating them may end up increasing
00598     // spills.
00599     if (PreRegAlloc && I->isCall())
00600       return false;
00601 
00602     if (!I->isPHI() && !I->isDebugValue())
00603       InstrCount += 1;
00604 
00605     if (InstrCount > MaxDuplicateCount)
00606       return false;
00607   }
00608 
00609   if (HasIndirectbr && PreRegAlloc)
00610     return true;
00611 
00612   if (IsSimple)
00613     return true;
00614 
00615   if (!PreRegAlloc)
00616     return true;
00617 
00618   return canCompletelyDuplicateBB(TailBB);
00619 }
00620 
00621 /// isSimpleBB - True if this BB has only one unconditional jump.
00622 bool
00623 TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
00624   if (TailBB->succ_size() != 1)
00625     return false;
00626   if (TailBB->pred_empty())
00627     return false;
00628   MachineBasicBlock::iterator I = TailBB->begin();
00629   MachineBasicBlock::iterator E = TailBB->end();
00630   while (I != E && I->isDebugValue())
00631     ++I;
00632   if (I == E)
00633     return true;
00634   return I->isUnconditionalBranch();
00635 }
00636 
00637 static bool
00638 bothUsedInPHI(const MachineBasicBlock &A,
00639               SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
00640   for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
00641          SE = A.succ_end(); SI != SE; ++SI) {
00642     MachineBasicBlock *BB = *SI;
00643     if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
00644       return true;
00645   }
00646 
00647   return false;
00648 }
00649 
00650 bool
00651 TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
00652   for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
00653        PE = BB.pred_end(); PI != PE; ++PI) {
00654     MachineBasicBlock *PredBB = *PI;
00655 
00656     if (PredBB->succ_size() > 1)
00657       return false;
00658 
00659     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
00660     SmallVector<MachineOperand, 4> PredCond;
00661     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
00662       return false;
00663 
00664     if (!PredCond.empty())
00665       return false;
00666   }
00667   return true;
00668 }
00669 
00670 bool
00671 TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
00672                                     SmallVectorImpl<MachineBasicBlock *> &TDBBs,
00673                                     const DenseSet<unsigned> &UsedByPhi,
00674                                     SmallVectorImpl<MachineInstr *> &Copies) {
00675   SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
00676                                            TailBB->succ_end());
00677   SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
00678                                            TailBB->pred_end());
00679   bool Changed = false;
00680   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00681        PE = Preds.end(); PI != PE; ++PI) {
00682     MachineBasicBlock *PredBB = *PI;
00683 
00684     if (PredBB->getLandingPadSuccessor())
00685       continue;
00686 
00687     if (bothUsedInPHI(*PredBB, Succs))
00688       continue;
00689 
00690     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
00691     SmallVector<MachineOperand, 4> PredCond;
00692     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
00693       continue;
00694 
00695     Changed = true;
00696     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
00697                  << "From simple Succ: " << *TailBB);
00698 
00699     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
00700     MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB));
00701 
00702     // Make PredFBB explicit.
00703     if (PredCond.empty())
00704       PredFBB = PredTBB;
00705 
00706     // Make fall through explicit.
00707     if (!PredTBB)
00708       PredTBB = NextBB;
00709     if (!PredFBB)
00710       PredFBB = NextBB;
00711 
00712     // Redirect
00713     if (PredFBB == TailBB)
00714       PredFBB = NewTarget;
00715     if (PredTBB == TailBB)
00716       PredTBB = NewTarget;
00717 
00718     // Make the branch unconditional if possible
00719     if (PredTBB == PredFBB) {
00720       PredCond.clear();
00721       PredFBB = nullptr;
00722     }
00723 
00724     // Avoid adding fall through branches.
00725     if (PredFBB == NextBB)
00726       PredFBB = nullptr;
00727     if (PredTBB == NextBB && PredFBB == nullptr)
00728       PredTBB = nullptr;
00729 
00730     TII->RemoveBranch(*PredBB);
00731 
00732     if (PredTBB)
00733       TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
00734 
00735     uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB);
00736     PredBB->removeSuccessor(TailBB);
00737     unsigned NumSuccessors = PredBB->succ_size();
00738     assert(NumSuccessors <= 1);
00739     if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
00740       PredBB->addSuccessor(NewTarget, Weight);
00741 
00742     TDBBs.push_back(PredBB);
00743   }
00744   return Changed;
00745 }
00746 
00747 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
00748 /// of its predecessors.
00749 bool
00750 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
00751                                  bool IsSimple,
00752                                  MachineFunction &MF,
00753                                  SmallVectorImpl<MachineBasicBlock *> &TDBBs,
00754                                  SmallVectorImpl<MachineInstr *> &Copies) {
00755   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
00756 
00757   DenseSet<unsigned> UsedByPhi;
00758   getRegsUsedByPHIs(*TailBB, &UsedByPhi);
00759 
00760   if (IsSimple)
00761     return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
00762 
00763   // Iterate through all the unique predecessors and tail-duplicate this
00764   // block into them, if possible. Copying the list ahead of time also
00765   // avoids trouble with the predecessor list reallocating.
00766   bool Changed = false;
00767   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
00768                                               TailBB->pred_end());
00769   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00770        PE = Preds.end(); PI != PE; ++PI) {
00771     MachineBasicBlock *PredBB = *PI;
00772 
00773     assert(TailBB != PredBB &&
00774            "Single-block loop should have been rejected earlier!");
00775     // EH edges are ignored by AnalyzeBranch.
00776     if (PredBB->succ_size() > 1)
00777       continue;
00778 
00779     MachineBasicBlock *PredTBB, *PredFBB;
00780     SmallVector<MachineOperand, 4> PredCond;
00781     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
00782       continue;
00783     if (!PredCond.empty())
00784       continue;
00785     // Don't duplicate into a fall-through predecessor (at least for now).
00786     if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
00787       continue;
00788 
00789     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
00790                  << "From Succ: " << *TailBB);
00791 
00792     TDBBs.push_back(PredBB);
00793 
00794     // Remove PredBB's unconditional branch.
00795     TII->RemoveBranch(*PredBB);
00796 
00797     if (RS && !TailBB->livein_empty()) {
00798       // Update PredBB livein.
00799       RS->enterBasicBlock(PredBB);
00800       if (!PredBB->empty())
00801         RS->forward(std::prev(PredBB->end()));
00802       for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
00803              E = TailBB->livein_end(); I != E; ++I) {
00804         if (!RS->isRegUsed(*I, false))
00805           // If a register is previously livein to the tail but it's not live
00806           // at the end of predecessor BB, then it should be added to its
00807           // livein list.
00808           PredBB->addLiveIn(*I);
00809       }
00810     }
00811 
00812     // Clone the contents of TailBB into PredBB.
00813     DenseMap<unsigned, unsigned> LocalVRMap;
00814     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
00815     // Use instr_iterator here to properly handle bundles, e.g.
00816     // ARM Thumb2 IT block.
00817     MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
00818     while (I != TailBB->instr_end()) {
00819       MachineInstr *MI = &*I;
00820       ++I;
00821       if (MI->isPHI()) {
00822         // Replace the uses of the def of the PHI with the register coming
00823         // from PredBB.
00824         ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
00825       } else {
00826         // Replace def of virtual registers with new registers, and update
00827         // uses with PHI source register or the new registers.
00828         DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
00829       }
00830     }
00831     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
00832     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
00833       Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
00834                                TII->get(TargetOpcode::COPY),
00835                                CopyInfos[i].first).addReg(CopyInfos[i].second));
00836     }
00837 
00838     // Simplify
00839     TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
00840 
00841     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
00842 
00843     // Update the CFG.
00844     PredBB->removeSuccessor(PredBB->succ_begin());
00845     assert(PredBB->succ_empty() &&
00846            "TailDuplicate called on block with multiple successors!");
00847     for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
00848            E = TailBB->succ_end(); I != E; ++I)
00849       PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I));
00850 
00851     Changed = true;
00852     ++NumTailDups;
00853   }
00854 
00855   // If TailBB was duplicated into all its predecessors except for the prior
00856   // block, which falls through unconditionally, move the contents of this
00857   // block into the prior block.
00858   MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB));
00859   MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
00860   SmallVector<MachineOperand, 4> PriorCond;
00861   // This has to check PrevBB->succ_size() because EH edges are ignored by
00862   // AnalyzeBranch.
00863   if (PrevBB->succ_size() == 1 &&
00864       !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
00865       PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
00866       !TailBB->hasAddressTaken()) {
00867     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
00868           << "From MBB: " << *TailBB);
00869     if (PreRegAlloc) {
00870       DenseMap<unsigned, unsigned> LocalVRMap;
00871       SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
00872       MachineBasicBlock::iterator I = TailBB->begin();
00873       // Process PHI instructions first.
00874       while (I != TailBB->end() && I->isPHI()) {
00875         // Replace the uses of the def of the PHI with the register coming
00876         // from PredBB.
00877         MachineInstr *MI = &*I++;
00878         ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
00879         if (MI->getParent())
00880           MI->eraseFromParent();
00881       }
00882 
00883       // Now copy the non-PHI instructions.
00884       while (I != TailBB->end()) {
00885         // Replace def of virtual registers with new registers, and update
00886         // uses with PHI source register or the new registers.
00887         MachineInstr *MI = &*I++;
00888         assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
00889         DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
00890         MI->eraseFromParent();
00891       }
00892       MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
00893       for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
00894         Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
00895                                  TII->get(TargetOpcode::COPY),
00896                                  CopyInfos[i].first)
00897                            .addReg(CopyInfos[i].second));
00898       }
00899     } else {
00900       // No PHIs to worry about, just splice the instructions over.
00901       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
00902     }
00903     PrevBB->removeSuccessor(PrevBB->succ_begin());
00904     assert(PrevBB->succ_empty());
00905     PrevBB->transferSuccessors(TailBB);
00906     TDBBs.push_back(PrevBB);
00907     Changed = true;
00908   }
00909 
00910   // If this is after register allocation, there are no phis to fix.
00911   if (!PreRegAlloc)
00912     return Changed;
00913 
00914   // If we made no changes so far, we are safe.
00915   if (!Changed)
00916     return Changed;
00917 
00918 
00919   // Handle the nasty case in that we duplicated a block that is part of a loop
00920   // into some but not all of its predecessors. For example:
00921   //    1 -> 2 <-> 3                 |
00922   //          \                      |
00923   //           \---> rest            |
00924   // if we duplicate 2 into 1 but not into 3, we end up with
00925   // 12 -> 3 <-> 2 -> rest           |
00926   //   \             /               |
00927   //    \----->-----/                |
00928   // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
00929   // with a phi in 3 (which now dominates 2).
00930   // What we do here is introduce a copy in 3 of the register defined by the
00931   // phi, just like when we are duplicating 2 into 3, but we don't copy any
00932   // real instructions or remove the 3 -> 2 edge from the phi in 2.
00933   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00934        PE = Preds.end(); PI != PE; ++PI) {
00935     MachineBasicBlock *PredBB = *PI;
00936     if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
00937       continue;
00938 
00939     // EH edges
00940     if (PredBB->succ_size() != 1)
00941       continue;
00942 
00943     DenseMap<unsigned, unsigned> LocalVRMap;
00944     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
00945     MachineBasicBlock::iterator I = TailBB->begin();
00946     // Process PHI instructions first.
00947     while (I != TailBB->end() && I->isPHI()) {
00948       // Replace the uses of the def of the PHI with the register coming
00949       // from PredBB.
00950       MachineInstr *MI = &*I++;
00951       ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
00952     }
00953     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
00954     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
00955       Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
00956                                TII->get(TargetOpcode::COPY),
00957                                CopyInfos[i].first).addReg(CopyInfos[i].second));
00958     }
00959   }
00960 
00961   return Changed;
00962 }
00963 
00964 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
00965 /// function, updating the CFG.
00966 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
00967   assert(MBB->pred_empty() && "MBB must be dead!");
00968   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
00969 
00970   // Remove all successors.
00971   while (!MBB->succ_empty())
00972     MBB->removeSuccessor(MBB->succ_end()-1);
00973 
00974   // Remove the block.
00975   MBB->eraseFromParent();
00976 }