LLVM API Documentation
00001 //===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass duplicates basic blocks ending in unconditional branches into 00011 // the tails of their predecessors. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/CodeGen/Passes.h" 00016 #include "llvm/ADT/DenseSet.h" 00017 #include "llvm/ADT/SetVector.h" 00018 #include "llvm/ADT/SmallSet.h" 00019 #include "llvm/ADT/Statistic.h" 00020 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 00021 #include "llvm/CodeGen/MachineFunctionPass.h" 00022 #include "llvm/CodeGen/MachineInstrBuilder.h" 00023 #include "llvm/CodeGen/MachineModuleInfo.h" 00024 #include "llvm/CodeGen/MachineRegisterInfo.h" 00025 #include "llvm/CodeGen/MachineSSAUpdater.h" 00026 #include "llvm/CodeGen/RegisterScavenging.h" 00027 #include "llvm/IR/Function.h" 00028 #include "llvm/Support/CommandLine.h" 00029 #include "llvm/Support/Debug.h" 00030 #include "llvm/Support/ErrorHandling.h" 00031 #include "llvm/Support/raw_ostream.h" 00032 #include "llvm/Target/TargetInstrInfo.h" 00033 #include "llvm/Target/TargetRegisterInfo.h" 00034 #include "llvm/Target/TargetSubtargetInfo.h" 00035 using namespace llvm; 00036 00037 #define DEBUG_TYPE "tailduplication" 00038 00039 STATISTIC(NumTails , "Number of tails duplicated"); 00040 STATISTIC(NumTailDups , "Number of tail duplicated blocks"); 00041 STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); 00042 STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 00043 STATISTIC(NumAddedPHIs , "Number of phis added"); 00044 00045 // Heuristic for tail duplication. 00046 static cl::opt<unsigned> 00047 TailDuplicateSize("tail-dup-size", 00048 cl::desc("Maximum instructions to consider tail duplicating"), 00049 cl::init(2), cl::Hidden); 00050 00051 static cl::opt<bool> 00052 TailDupVerify("tail-dup-verify", 00053 cl::desc("Verify sanity of PHI instructions during taildup"), 00054 cl::init(false), cl::Hidden); 00055 00056 static cl::opt<unsigned> 00057 TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); 00058 00059 typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; 00060 00061 namespace { 00062 /// TailDuplicatePass - Perform tail duplication. 00063 class TailDuplicatePass : public MachineFunctionPass { 00064 const TargetInstrInfo *TII; 00065 const TargetRegisterInfo *TRI; 00066 const MachineBranchProbabilityInfo *MBPI; 00067 MachineModuleInfo *MMI; 00068 MachineRegisterInfo *MRI; 00069 std::unique_ptr<RegScavenger> RS; 00070 bool PreRegAlloc; 00071 00072 // SSAUpdateVRs - A list of virtual registers for which to update SSA form. 00073 SmallVector<unsigned, 16> SSAUpdateVRs; 00074 00075 // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of 00076 // source virtual registers. 00077 DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; 00078 00079 public: 00080 static char ID; 00081 explicit TailDuplicatePass() : 00082 MachineFunctionPass(ID), PreRegAlloc(false) {} 00083 00084 bool runOnMachineFunction(MachineFunction &MF) override; 00085 00086 void getAnalysisUsage(AnalysisUsage &AU) const override; 00087 00088 private: 00089 void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 00090 MachineBasicBlock *BB); 00091 void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, 00092 MachineBasicBlock *PredBB, 00093 DenseMap<unsigned, unsigned> &LocalVRMap, 00094 SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies, 00095 const DenseSet<unsigned> &UsedByPhi, 00096 bool Remove); 00097 void DuplicateInstruction(MachineInstr *MI, 00098 MachineBasicBlock *TailBB, 00099 MachineBasicBlock *PredBB, 00100 MachineFunction &MF, 00101 DenseMap<unsigned, unsigned> &LocalVRMap, 00102 const DenseSet<unsigned> &UsedByPhi); 00103 void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 00104 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 00105 SmallSetVector<MachineBasicBlock*, 8> &Succs); 00106 bool TailDuplicateBlocks(MachineFunction &MF); 00107 bool shouldTailDuplicate(const MachineFunction &MF, 00108 bool IsSimple, MachineBasicBlock &TailBB); 00109 bool isSimpleBB(MachineBasicBlock *TailBB); 00110 bool canCompletelyDuplicateBB(MachineBasicBlock &BB); 00111 bool duplicateSimpleBB(MachineBasicBlock *TailBB, 00112 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 00113 const DenseSet<unsigned> &RegsUsedByPhi, 00114 SmallVectorImpl<MachineInstr *> &Copies); 00115 bool TailDuplicate(MachineBasicBlock *TailBB, 00116 bool IsSimple, 00117 MachineFunction &MF, 00118 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 00119 SmallVectorImpl<MachineInstr *> &Copies); 00120 bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, 00121 bool IsSimple, 00122 MachineFunction &MF); 00123 00124 void RemoveDeadBlock(MachineBasicBlock *MBB); 00125 }; 00126 00127 char TailDuplicatePass::ID = 0; 00128 } 00129 00130 char &llvm::TailDuplicateID = TailDuplicatePass::ID; 00131 00132 INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", 00133 false, false) 00134 00135 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { 00136 if (skipOptnoneFunction(*MF.getFunction())) 00137 return false; 00138 00139 TII = MF.getSubtarget().getInstrInfo(); 00140 TRI = MF.getSubtarget().getRegisterInfo(); 00141 MRI = &MF.getRegInfo(); 00142 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 00143 MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 00144 00145 PreRegAlloc = MRI->isSSA(); 00146 RS.reset(); 00147 if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) 00148 RS.reset(new RegScavenger()); 00149 00150 bool MadeChange = false; 00151 while (TailDuplicateBlocks(MF)) 00152 MadeChange = true; 00153 00154 return MadeChange; 00155 } 00156 00157 void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { 00158 AU.addRequired<MachineBranchProbabilityInfo>(); 00159 MachineFunctionPass::getAnalysisUsage(AU); 00160 } 00161 00162 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 00163 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 00164 MachineBasicBlock *MBB = I; 00165 SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), 00166 MBB->pred_end()); 00167 MachineBasicBlock::iterator MI = MBB->begin(); 00168 while (MI != MBB->end()) { 00169 if (!MI->isPHI()) 00170 break; 00171 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 00172 PE = Preds.end(); PI != PE; ++PI) { 00173 MachineBasicBlock *PredBB = *PI; 00174 bool Found = false; 00175 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 00176 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 00177 if (PHIBB == PredBB) { 00178 Found = true; 00179 break; 00180 } 00181 } 00182 if (!Found) { 00183 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 00184 dbgs() << " missing input from predecessor BB#" 00185 << PredBB->getNumber() << '\n'; 00186 llvm_unreachable(nullptr); 00187 } 00188 } 00189 00190 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 00191 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 00192 if (CheckExtra && !Preds.count(PHIBB)) { 00193 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() 00194 << ": " << *MI; 00195 dbgs() << " extra input from predecessor BB#" 00196 << PHIBB->getNumber() << '\n'; 00197 llvm_unreachable(nullptr); 00198 } 00199 if (PHIBB->getNumber() < 0) { 00200 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 00201 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 00202 llvm_unreachable(nullptr); 00203 } 00204 } 00205 ++MI; 00206 } 00207 } 00208 } 00209 00210 /// TailDuplicateAndUpdate - Tail duplicate the block and cleanup. 00211 bool 00212 TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, 00213 bool IsSimple, 00214 MachineFunction &MF) { 00215 // Save the successors list. 00216 SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), 00217 MBB->succ_end()); 00218 00219 SmallVector<MachineBasicBlock*, 8> TDBBs; 00220 SmallVector<MachineInstr*, 16> Copies; 00221 if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) 00222 return false; 00223 00224 ++NumTails; 00225 00226 SmallVector<MachineInstr*, 8> NewPHIs; 00227 MachineSSAUpdater SSAUpdate(MF, &NewPHIs); 00228 00229 // TailBB's immediate successors are now successors of those predecessors 00230 // which duplicated TailBB. Add the predecessors as sources to the PHI 00231 // instructions. 00232 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 00233 if (PreRegAlloc) 00234 UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 00235 00236 // If it is dead, remove it. 00237 if (isDead) { 00238 NumInstrDups -= MBB->size(); 00239 RemoveDeadBlock(MBB); 00240 ++NumDeadBlocks; 00241 } 00242 00243 // Update SSA form. 00244 if (!SSAUpdateVRs.empty()) { 00245 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 00246 unsigned VReg = SSAUpdateVRs[i]; 00247 SSAUpdate.Initialize(VReg); 00248 00249 // If the original definition is still around, add it as an available 00250 // value. 00251 MachineInstr *DefMI = MRI->getVRegDef(VReg); 00252 MachineBasicBlock *DefBB = nullptr; 00253 if (DefMI) { 00254 DefBB = DefMI->getParent(); 00255 SSAUpdate.AddAvailableValue(DefBB, VReg); 00256 } 00257 00258 // Add the new vregs as available values. 00259 DenseMap<unsigned, AvailableValsTy>::iterator LI = 00260 SSAUpdateVals.find(VReg); 00261 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 00262 MachineBasicBlock *SrcBB = LI->second[j].first; 00263 unsigned SrcReg = LI->second[j].second; 00264 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 00265 } 00266 00267 // Rewrite uses that are outside of the original def's block. 00268 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 00269 while (UI != MRI->use_end()) { 00270 MachineOperand &UseMO = *UI; 00271 MachineInstr *UseMI = UseMO.getParent(); 00272 ++UI; 00273 if (UseMI->isDebugValue()) { 00274 // SSAUpdate can replace the use with an undef. That creates 00275 // a debug instruction that is a kill. 00276 // FIXME: Should it SSAUpdate job to delete debug instructions 00277 // instead of replacing the use with undef? 00278 UseMI->eraseFromParent(); 00279 continue; 00280 } 00281 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 00282 continue; 00283 SSAUpdate.RewriteUse(UseMO); 00284 } 00285 } 00286 00287 SSAUpdateVRs.clear(); 00288 SSAUpdateVals.clear(); 00289 } 00290 00291 // Eliminate some of the copies inserted by tail duplication to maintain 00292 // SSA form. 00293 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 00294 MachineInstr *Copy = Copies[i]; 00295 if (!Copy->isCopy()) 00296 continue; 00297 unsigned Dst = Copy->getOperand(0).getReg(); 00298 unsigned Src = Copy->getOperand(1).getReg(); 00299 if (MRI->hasOneNonDBGUse(Src) && 00300 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 00301 // Copy is the only use. Do trivial copy propagation here. 00302 MRI->replaceRegWith(Dst, Src); 00303 Copy->eraseFromParent(); 00304 } 00305 } 00306 00307 if (NewPHIs.size()) 00308 NumAddedPHIs += NewPHIs.size(); 00309 00310 return true; 00311 } 00312 00313 /// TailDuplicateBlocks - Look for small blocks that are unconditionally 00314 /// branched to and do not fall through. Tail-duplicate their instructions 00315 /// into their predecessors to eliminate (dynamic) branches. 00316 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { 00317 bool MadeChange = false; 00318 00319 if (PreRegAlloc && TailDupVerify) { 00320 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 00321 VerifyPHIs(MF, true); 00322 } 00323 00324 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { 00325 MachineBasicBlock *MBB = I++; 00326 00327 if (NumTails == TailDupLimit) 00328 break; 00329 00330 bool IsSimple = isSimpleBB(MBB); 00331 00332 if (!shouldTailDuplicate(MF, IsSimple, *MBB)) 00333 continue; 00334 00335 MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); 00336 } 00337 00338 if (PreRegAlloc && TailDupVerify) 00339 VerifyPHIs(MF, false); 00340 00341 return MadeChange; 00342 } 00343 00344 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 00345 const MachineRegisterInfo *MRI) { 00346 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 00347 if (UseMI.isDebugValue()) 00348 continue; 00349 if (UseMI.getParent() != BB) 00350 return true; 00351 } 00352 return false; 00353 } 00354 00355 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 00356 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 00357 if (MI->getOperand(i+1).getMBB() == SrcBB) 00358 return i; 00359 return 0; 00360 } 00361 00362 00363 // Remember which registers are used by phis in this block. This is 00364 // used to determine which registers are liveout while modifying the 00365 // block (which is why we need to copy the information). 00366 static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 00367 DenseSet<unsigned> *UsedByPhi) { 00368 for (const auto &MI : BB) { 00369 if (!MI.isPHI()) 00370 break; 00371 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 00372 unsigned SrcReg = MI.getOperand(i).getReg(); 00373 UsedByPhi->insert(SrcReg); 00374 } 00375 } 00376 } 00377 00378 /// AddSSAUpdateEntry - Add a definition and source virtual registers pair for 00379 /// SSA update. 00380 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 00381 MachineBasicBlock *BB) { 00382 DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); 00383 if (LI != SSAUpdateVals.end()) 00384 LI->second.push_back(std::make_pair(BB, NewReg)); 00385 else { 00386 AvailableValsTy Vals; 00387 Vals.push_back(std::make_pair(BB, NewReg)); 00388 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 00389 SSAUpdateVRs.push_back(OrigReg); 00390 } 00391 } 00392 00393 /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. 00394 /// Remember the source register that's contributed by PredBB and update SSA 00395 /// update map. 00396 void TailDuplicatePass::ProcessPHI( 00397 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 00398 DenseMap<unsigned, unsigned> &LocalVRMap, 00399 SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies, 00400 const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 00401 unsigned DefReg = MI->getOperand(0).getReg(); 00402 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 00403 assert(SrcOpIdx && "Unable to find matching PHI source?"); 00404 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 00405 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 00406 LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); 00407 00408 // Insert a copy from source to the end of the block. The def register is the 00409 // available value liveout of the block. 00410 unsigned NewDef = MRI->createVirtualRegister(RC); 00411 Copies.push_back(std::make_pair(NewDef, SrcReg)); 00412 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 00413 AddSSAUpdateEntry(DefReg, NewDef, PredBB); 00414 00415 if (!Remove) 00416 return; 00417 00418 // Remove PredBB from the PHI node. 00419 MI->RemoveOperand(SrcOpIdx+1); 00420 MI->RemoveOperand(SrcOpIdx); 00421 if (MI->getNumOperands() == 1) 00422 MI->eraseFromParent(); 00423 } 00424 00425 /// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update 00426 /// the source operands due to earlier PHI translation. 00427 void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, 00428 MachineBasicBlock *TailBB, 00429 MachineBasicBlock *PredBB, 00430 MachineFunction &MF, 00431 DenseMap<unsigned, unsigned> &LocalVRMap, 00432 const DenseSet<unsigned> &UsedByPhi) { 00433 MachineInstr *NewMI = TII->duplicate(MI, MF); 00434 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 00435 MachineOperand &MO = NewMI->getOperand(i); 00436 if (!MO.isReg()) 00437 continue; 00438 unsigned Reg = MO.getReg(); 00439 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00440 continue; 00441 if (MO.isDef()) { 00442 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 00443 unsigned NewReg = MRI->createVirtualRegister(RC); 00444 MO.setReg(NewReg); 00445 LocalVRMap.insert(std::make_pair(Reg, NewReg)); 00446 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 00447 AddSSAUpdateEntry(Reg, NewReg, PredBB); 00448 } else { 00449 DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); 00450 if (VI != LocalVRMap.end()) { 00451 MO.setReg(VI->second); 00452 MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); 00453 } 00454 } 00455 } 00456 PredBB->insert(PredBB->instr_end(), NewMI); 00457 } 00458 00459 /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor 00460 /// blocks, the successors have gained new predecessors. Update the PHI 00461 /// instructions in them accordingly. 00462 void 00463 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 00464 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 00465 SmallSetVector<MachineBasicBlock*,8> &Succs) { 00466 for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), 00467 SE = Succs.end(); SI != SE; ++SI) { 00468 MachineBasicBlock *SuccBB = *SI; 00469 for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); 00470 II != EE; ++II) { 00471 if (!II->isPHI()) 00472 break; 00473 MachineInstrBuilder MIB(*FromBB->getParent(), II); 00474 unsigned Idx = 0; 00475 for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { 00476 MachineOperand &MO = II->getOperand(i+1); 00477 if (MO.getMBB() == FromBB) { 00478 Idx = i; 00479 break; 00480 } 00481 } 00482 00483 assert(Idx != 0); 00484 MachineOperand &MO0 = II->getOperand(Idx); 00485 unsigned Reg = MO0.getReg(); 00486 if (isDead) { 00487 // Folded into the previous BB. 00488 // There could be duplicate phi source entries. FIXME: Should sdisel 00489 // or earlier pass fixed this? 00490 for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { 00491 MachineOperand &MO = II->getOperand(i+1); 00492 if (MO.getMBB() == FromBB) { 00493 II->RemoveOperand(i+1); 00494 II->RemoveOperand(i); 00495 } 00496 } 00497 } else 00498 Idx = 0; 00499 00500 // If Idx is set, the operands at Idx and Idx+1 must be removed. 00501 // We reuse the location to avoid expensive RemoveOperand calls. 00502 00503 DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); 00504 if (LI != SSAUpdateVals.end()) { 00505 // This register is defined in the tail block. 00506 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 00507 MachineBasicBlock *SrcBB = LI->second[j].first; 00508 // If we didn't duplicate a bb into a particular predecessor, we 00509 // might still have added an entry to SSAUpdateVals to correcly 00510 // recompute SSA. If that case, avoid adding a dummy extra argument 00511 // this PHI. 00512 if (!SrcBB->isSuccessor(SuccBB)) 00513 continue; 00514 00515 unsigned SrcReg = LI->second[j].second; 00516 if (Idx != 0) { 00517 II->getOperand(Idx).setReg(SrcReg); 00518 II->getOperand(Idx+1).setMBB(SrcBB); 00519 Idx = 0; 00520 } else { 00521 MIB.addReg(SrcReg).addMBB(SrcBB); 00522 } 00523 } 00524 } else { 00525 // Live in tail block, must also be live in predecessors. 00526 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 00527 MachineBasicBlock *SrcBB = TDBBs[j]; 00528 if (Idx != 0) { 00529 II->getOperand(Idx).setReg(Reg); 00530 II->getOperand(Idx+1).setMBB(SrcBB); 00531 Idx = 0; 00532 } else { 00533 MIB.addReg(Reg).addMBB(SrcBB); 00534 } 00535 } 00536 } 00537 if (Idx != 0) { 00538 II->RemoveOperand(Idx+1); 00539 II->RemoveOperand(Idx); 00540 } 00541 } 00542 } 00543 } 00544 00545 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block. 00546 bool 00547 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, 00548 bool IsSimple, 00549 MachineBasicBlock &TailBB) { 00550 // Only duplicate blocks that end with unconditional branches. 00551 if (TailBB.canFallThrough()) 00552 return false; 00553 00554 // Don't try to tail-duplicate single-block loops. 00555 if (TailBB.isSuccessor(&TailBB)) 00556 return false; 00557 00558 // Set the limit on the cost to duplicate. When optimizing for size, 00559 // duplicate only one, because one branch instruction can be eliminated to 00560 // compensate for the duplication. 00561 unsigned MaxDuplicateCount; 00562 if (TailDuplicateSize.getNumOccurrences() == 0 && 00563 MF.getFunction()->getAttributes(). 00564 hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) 00565 MaxDuplicateCount = 1; 00566 else 00567 MaxDuplicateCount = TailDuplicateSize; 00568 00569 // If the target has hardware branch prediction that can handle indirect 00570 // branches, duplicating them can often make them predictable when there 00571 // are common paths through the code. The limit needs to be high enough 00572 // to allow undoing the effects of tail merging and other optimizations 00573 // that rearrange the predecessors of the indirect branch. 00574 00575 bool HasIndirectbr = false; 00576 if (!TailBB.empty()) 00577 HasIndirectbr = TailBB.back().isIndirectBranch(); 00578 00579 if (HasIndirectbr && PreRegAlloc) 00580 MaxDuplicateCount = 20; 00581 00582 // Check the instructions in the block to determine whether tail-duplication 00583 // is invalid or unlikely to be profitable. 00584 unsigned InstrCount = 0; 00585 for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { 00586 // Non-duplicable things shouldn't be tail-duplicated. 00587 if (I->isNotDuplicable()) 00588 return false; 00589 00590 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 00591 // A return may expand into a lot more instructions (e.g. reload of callee 00592 // saved registers) after PEI. 00593 if (PreRegAlloc && I->isReturn()) 00594 return false; 00595 00596 // Avoid duplicating calls before register allocation. Calls presents a 00597 // barrier to register allocation so duplicating them may end up increasing 00598 // spills. 00599 if (PreRegAlloc && I->isCall()) 00600 return false; 00601 00602 if (!I->isPHI() && !I->isDebugValue()) 00603 InstrCount += 1; 00604 00605 if (InstrCount > MaxDuplicateCount) 00606 return false; 00607 } 00608 00609 if (HasIndirectbr && PreRegAlloc) 00610 return true; 00611 00612 if (IsSimple) 00613 return true; 00614 00615 if (!PreRegAlloc) 00616 return true; 00617 00618 return canCompletelyDuplicateBB(TailBB); 00619 } 00620 00621 /// isSimpleBB - True if this BB has only one unconditional jump. 00622 bool 00623 TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { 00624 if (TailBB->succ_size() != 1) 00625 return false; 00626 if (TailBB->pred_empty()) 00627 return false; 00628 MachineBasicBlock::iterator I = TailBB->begin(); 00629 MachineBasicBlock::iterator E = TailBB->end(); 00630 while (I != E && I->isDebugValue()) 00631 ++I; 00632 if (I == E) 00633 return true; 00634 return I->isUnconditionalBranch(); 00635 } 00636 00637 static bool 00638 bothUsedInPHI(const MachineBasicBlock &A, 00639 SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { 00640 for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(), 00641 SE = A.succ_end(); SI != SE; ++SI) { 00642 MachineBasicBlock *BB = *SI; 00643 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 00644 return true; 00645 } 00646 00647 return false; 00648 } 00649 00650 bool 00651 TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 00652 for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), 00653 PE = BB.pred_end(); PI != PE; ++PI) { 00654 MachineBasicBlock *PredBB = *PI; 00655 00656 if (PredBB->succ_size() > 1) 00657 return false; 00658 00659 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 00660 SmallVector<MachineOperand, 4> PredCond; 00661 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 00662 return false; 00663 00664 if (!PredCond.empty()) 00665 return false; 00666 } 00667 return true; 00668 } 00669 00670 bool 00671 TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, 00672 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 00673 const DenseSet<unsigned> &UsedByPhi, 00674 SmallVectorImpl<MachineInstr *> &Copies) { 00675 SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), 00676 TailBB->succ_end()); 00677 SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 00678 TailBB->pred_end()); 00679 bool Changed = false; 00680 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 00681 PE = Preds.end(); PI != PE; ++PI) { 00682 MachineBasicBlock *PredBB = *PI; 00683 00684 if (PredBB->getLandingPadSuccessor()) 00685 continue; 00686 00687 if (bothUsedInPHI(*PredBB, Succs)) 00688 continue; 00689 00690 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 00691 SmallVector<MachineOperand, 4> PredCond; 00692 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 00693 continue; 00694 00695 Changed = true; 00696 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 00697 << "From simple Succ: " << *TailBB); 00698 00699 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 00700 MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB)); 00701 00702 // Make PredFBB explicit. 00703 if (PredCond.empty()) 00704 PredFBB = PredTBB; 00705 00706 // Make fall through explicit. 00707 if (!PredTBB) 00708 PredTBB = NextBB; 00709 if (!PredFBB) 00710 PredFBB = NextBB; 00711 00712 // Redirect 00713 if (PredFBB == TailBB) 00714 PredFBB = NewTarget; 00715 if (PredTBB == TailBB) 00716 PredTBB = NewTarget; 00717 00718 // Make the branch unconditional if possible 00719 if (PredTBB == PredFBB) { 00720 PredCond.clear(); 00721 PredFBB = nullptr; 00722 } 00723 00724 // Avoid adding fall through branches. 00725 if (PredFBB == NextBB) 00726 PredFBB = nullptr; 00727 if (PredTBB == NextBB && PredFBB == nullptr) 00728 PredTBB = nullptr; 00729 00730 TII->RemoveBranch(*PredBB); 00731 00732 if (PredTBB) 00733 TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); 00734 00735 uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB); 00736 PredBB->removeSuccessor(TailBB); 00737 unsigned NumSuccessors = PredBB->succ_size(); 00738 assert(NumSuccessors <= 1); 00739 if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget) 00740 PredBB->addSuccessor(NewTarget, Weight); 00741 00742 TDBBs.push_back(PredBB); 00743 } 00744 return Changed; 00745 } 00746 00747 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each 00748 /// of its predecessors. 00749 bool 00750 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, 00751 bool IsSimple, 00752 MachineFunction &MF, 00753 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 00754 SmallVectorImpl<MachineInstr *> &Copies) { 00755 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 00756 00757 DenseSet<unsigned> UsedByPhi; 00758 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 00759 00760 if (IsSimple) 00761 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 00762 00763 // Iterate through all the unique predecessors and tail-duplicate this 00764 // block into them, if possible. Copying the list ahead of time also 00765 // avoids trouble with the predecessor list reallocating. 00766 bool Changed = false; 00767 SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 00768 TailBB->pred_end()); 00769 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 00770 PE = Preds.end(); PI != PE; ++PI) { 00771 MachineBasicBlock *PredBB = *PI; 00772 00773 assert(TailBB != PredBB && 00774 "Single-block loop should have been rejected earlier!"); 00775 // EH edges are ignored by AnalyzeBranch. 00776 if (PredBB->succ_size() > 1) 00777 continue; 00778 00779 MachineBasicBlock *PredTBB, *PredFBB; 00780 SmallVector<MachineOperand, 4> PredCond; 00781 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 00782 continue; 00783 if (!PredCond.empty()) 00784 continue; 00785 // Don't duplicate into a fall-through predecessor (at least for now). 00786 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 00787 continue; 00788 00789 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 00790 << "From Succ: " << *TailBB); 00791 00792 TDBBs.push_back(PredBB); 00793 00794 // Remove PredBB's unconditional branch. 00795 TII->RemoveBranch(*PredBB); 00796 00797 if (RS && !TailBB->livein_empty()) { 00798 // Update PredBB livein. 00799 RS->enterBasicBlock(PredBB); 00800 if (!PredBB->empty()) 00801 RS->forward(std::prev(PredBB->end())); 00802 for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), 00803 E = TailBB->livein_end(); I != E; ++I) { 00804 if (!RS->isRegUsed(*I, false)) 00805 // If a register is previously livein to the tail but it's not live 00806 // at the end of predecessor BB, then it should be added to its 00807 // livein list. 00808 PredBB->addLiveIn(*I); 00809 } 00810 } 00811 00812 // Clone the contents of TailBB into PredBB. 00813 DenseMap<unsigned, unsigned> LocalVRMap; 00814 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 00815 // Use instr_iterator here to properly handle bundles, e.g. 00816 // ARM Thumb2 IT block. 00817 MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); 00818 while (I != TailBB->instr_end()) { 00819 MachineInstr *MI = &*I; 00820 ++I; 00821 if (MI->isPHI()) { 00822 // Replace the uses of the def of the PHI with the register coming 00823 // from PredBB. 00824 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 00825 } else { 00826 // Replace def of virtual registers with new registers, and update 00827 // uses with PHI source register or the new registers. 00828 DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); 00829 } 00830 } 00831 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 00832 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 00833 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 00834 TII->get(TargetOpcode::COPY), 00835 CopyInfos[i].first).addReg(CopyInfos[i].second)); 00836 } 00837 00838 // Simplify 00839 TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 00840 00841 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 00842 00843 // Update the CFG. 00844 PredBB->removeSuccessor(PredBB->succ_begin()); 00845 assert(PredBB->succ_empty() && 00846 "TailDuplicate called on block with multiple successors!"); 00847 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 00848 E = TailBB->succ_end(); I != E; ++I) 00849 PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I)); 00850 00851 Changed = true; 00852 ++NumTailDups; 00853 } 00854 00855 // If TailBB was duplicated into all its predecessors except for the prior 00856 // block, which falls through unconditionally, move the contents of this 00857 // block into the prior block. 00858 MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB)); 00859 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 00860 SmallVector<MachineOperand, 4> PriorCond; 00861 // This has to check PrevBB->succ_size() because EH edges are ignored by 00862 // AnalyzeBranch. 00863 if (PrevBB->succ_size() == 1 && 00864 !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 00865 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 00866 !TailBB->hasAddressTaken()) { 00867 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 00868 << "From MBB: " << *TailBB); 00869 if (PreRegAlloc) { 00870 DenseMap<unsigned, unsigned> LocalVRMap; 00871 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 00872 MachineBasicBlock::iterator I = TailBB->begin(); 00873 // Process PHI instructions first. 00874 while (I != TailBB->end() && I->isPHI()) { 00875 // Replace the uses of the def of the PHI with the register coming 00876 // from PredBB. 00877 MachineInstr *MI = &*I++; 00878 ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 00879 if (MI->getParent()) 00880 MI->eraseFromParent(); 00881 } 00882 00883 // Now copy the non-PHI instructions. 00884 while (I != TailBB->end()) { 00885 // Replace def of virtual registers with new registers, and update 00886 // uses with PHI source register or the new registers. 00887 MachineInstr *MI = &*I++; 00888 assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 00889 DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); 00890 MI->eraseFromParent(); 00891 } 00892 MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); 00893 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 00894 Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), 00895 TII->get(TargetOpcode::COPY), 00896 CopyInfos[i].first) 00897 .addReg(CopyInfos[i].second)); 00898 } 00899 } else { 00900 // No PHIs to worry about, just splice the instructions over. 00901 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 00902 } 00903 PrevBB->removeSuccessor(PrevBB->succ_begin()); 00904 assert(PrevBB->succ_empty()); 00905 PrevBB->transferSuccessors(TailBB); 00906 TDBBs.push_back(PrevBB); 00907 Changed = true; 00908 } 00909 00910 // If this is after register allocation, there are no phis to fix. 00911 if (!PreRegAlloc) 00912 return Changed; 00913 00914 // If we made no changes so far, we are safe. 00915 if (!Changed) 00916 return Changed; 00917 00918 00919 // Handle the nasty case in that we duplicated a block that is part of a loop 00920 // into some but not all of its predecessors. For example: 00921 // 1 -> 2 <-> 3 | 00922 // \ | 00923 // \---> rest | 00924 // if we duplicate 2 into 1 but not into 3, we end up with 00925 // 12 -> 3 <-> 2 -> rest | 00926 // \ / | 00927 // \----->-----/ | 00928 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 00929 // with a phi in 3 (which now dominates 2). 00930 // What we do here is introduce a copy in 3 of the register defined by the 00931 // phi, just like when we are duplicating 2 into 3, but we don't copy any 00932 // real instructions or remove the 3 -> 2 edge from the phi in 2. 00933 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 00934 PE = Preds.end(); PI != PE; ++PI) { 00935 MachineBasicBlock *PredBB = *PI; 00936 if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) 00937 continue; 00938 00939 // EH edges 00940 if (PredBB->succ_size() != 1) 00941 continue; 00942 00943 DenseMap<unsigned, unsigned> LocalVRMap; 00944 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 00945 MachineBasicBlock::iterator I = TailBB->begin(); 00946 // Process PHI instructions first. 00947 while (I != TailBB->end() && I->isPHI()) { 00948 // Replace the uses of the def of the PHI with the register coming 00949 // from PredBB. 00950 MachineInstr *MI = &*I++; 00951 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 00952 } 00953 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 00954 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 00955 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 00956 TII->get(TargetOpcode::COPY), 00957 CopyInfos[i].first).addReg(CopyInfos[i].second)); 00958 } 00959 } 00960 00961 return Changed; 00962 } 00963 00964 /// RemoveDeadBlock - Remove the specified dead machine basic block from the 00965 /// function, updating the CFG. 00966 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { 00967 assert(MBB->pred_empty() && "MBB must be dead!"); 00968 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 00969 00970 // Remove all successors. 00971 while (!MBB->succ_empty()) 00972 MBB->removeSuccessor(MBB->succ_end()-1); 00973 00974 // Remove the block. 00975 MBB->eraseFromParent(); 00976 }