LLVM API Documentation
00001 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs loop invariant code motion on machine instructions. We 00011 // attempt to remove as much code from the body of a loop as possible. 00012 // 00013 // This pass does not attempt to throttle itself to limit register pressure. 00014 // The register allocation phases are expected to perform rematerialization 00015 // to recover when register pressure is high. 00016 // 00017 // This pass is not intended to be a replacement or a complete alternative 00018 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 00019 // constructs that are not exposed before lowering and instruction selection. 00020 // 00021 //===----------------------------------------------------------------------===// 00022 00023 #include "llvm/CodeGen/Passes.h" 00024 #include "llvm/ADT/DenseMap.h" 00025 #include "llvm/ADT/SmallSet.h" 00026 #include "llvm/ADT/Statistic.h" 00027 #include "llvm/Analysis/AliasAnalysis.h" 00028 #include "llvm/CodeGen/MachineDominators.h" 00029 #include "llvm/CodeGen/MachineFrameInfo.h" 00030 #include "llvm/CodeGen/MachineLoopInfo.h" 00031 #include "llvm/CodeGen/MachineMemOperand.h" 00032 #include "llvm/CodeGen/MachineRegisterInfo.h" 00033 #include "llvm/CodeGen/PseudoSourceValue.h" 00034 #include "llvm/MC/MCInstrItineraries.h" 00035 #include "llvm/Support/CommandLine.h" 00036 #include "llvm/Support/Debug.h" 00037 #include "llvm/Support/raw_ostream.h" 00038 #include "llvm/Target/TargetInstrInfo.h" 00039 #include "llvm/Target/TargetLowering.h" 00040 #include "llvm/Target/TargetMachine.h" 00041 #include "llvm/Target/TargetRegisterInfo.h" 00042 #include "llvm/Target/TargetSubtargetInfo.h" 00043 using namespace llvm; 00044 00045 #define DEBUG_TYPE "machine-licm" 00046 00047 static cl::opt<bool> 00048 AvoidSpeculation("avoid-speculation", 00049 cl::desc("MachineLICM should avoid speculation"), 00050 cl::init(true), cl::Hidden); 00051 00052 STATISTIC(NumHoisted, 00053 "Number of machine instructions hoisted out of loops"); 00054 STATISTIC(NumLowRP, 00055 "Number of instructions hoisted in low reg pressure situation"); 00056 STATISTIC(NumHighLatency, 00057 "Number of high latency instructions hoisted"); 00058 STATISTIC(NumCSEed, 00059 "Number of hoisted machine instructions CSEed"); 00060 STATISTIC(NumPostRAHoisted, 00061 "Number of machine instructions hoisted out of loops post regalloc"); 00062 00063 namespace { 00064 class MachineLICM : public MachineFunctionPass { 00065 const TargetMachine *TM; 00066 const TargetInstrInfo *TII; 00067 const TargetLoweringBase *TLI; 00068 const TargetRegisterInfo *TRI; 00069 const MachineFrameInfo *MFI; 00070 MachineRegisterInfo *MRI; 00071 const InstrItineraryData *InstrItins; 00072 bool PreRegAlloc; 00073 00074 // Various analyses that we use... 00075 AliasAnalysis *AA; // Alias analysis info. 00076 MachineLoopInfo *MLI; // Current MachineLoopInfo 00077 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 00078 00079 // State that is updated as we process loops 00080 bool Changed; // True if a loop is changed. 00081 bool FirstInLoop; // True if it's the first LICM in the loop. 00082 MachineLoop *CurLoop; // The current loop we are working on. 00083 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 00084 00085 // Exit blocks for CurLoop. 00086 SmallVector<MachineBasicBlock*, 8> ExitBlocks; 00087 00088 bool isExitBlock(const MachineBasicBlock *MBB) const { 00089 return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 00090 ExitBlocks.end(); 00091 } 00092 00093 // Track 'estimated' register pressure. 00094 SmallSet<unsigned, 32> RegSeen; 00095 SmallVector<unsigned, 8> RegPressure; 00096 00097 // Register pressure "limit" per register class. If the pressure 00098 // is higher than the limit, then it's considered high. 00099 SmallVector<unsigned, 8> RegLimit; 00100 00101 // Register pressure on path leading from loop preheader to current BB. 00102 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 00103 00104 // For each opcode, keep a list of potential CSE instructions. 00105 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 00106 00107 enum { 00108 SpeculateFalse = 0, 00109 SpeculateTrue = 1, 00110 SpeculateUnknown = 2 00111 }; 00112 00113 // If a MBB does not dominate loop exiting blocks then it may not safe 00114 // to hoist loads from this block. 00115 // Tri-state: 0 - false, 1 - true, 2 - unknown 00116 unsigned SpeculationState; 00117 00118 public: 00119 static char ID; // Pass identification, replacement for typeid 00120 MachineLICM() : 00121 MachineFunctionPass(ID), PreRegAlloc(true) { 00122 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 00123 } 00124 00125 explicit MachineLICM(bool PreRA) : 00126 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 00127 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 00128 } 00129 00130 bool runOnMachineFunction(MachineFunction &MF) override; 00131 00132 void getAnalysisUsage(AnalysisUsage &AU) const override { 00133 AU.addRequired<MachineLoopInfo>(); 00134 AU.addRequired<MachineDominatorTree>(); 00135 AU.addRequired<AliasAnalysis>(); 00136 AU.addPreserved<MachineLoopInfo>(); 00137 AU.addPreserved<MachineDominatorTree>(); 00138 MachineFunctionPass::getAnalysisUsage(AU); 00139 } 00140 00141 void releaseMemory() override { 00142 RegSeen.clear(); 00143 RegPressure.clear(); 00144 RegLimit.clear(); 00145 BackTrace.clear(); 00146 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 00147 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 00148 CI->second.clear(); 00149 CSEMap.clear(); 00150 } 00151 00152 private: 00153 /// CandidateInfo - Keep track of information about hoisting candidates. 00154 struct CandidateInfo { 00155 MachineInstr *MI; 00156 unsigned Def; 00157 int FI; 00158 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 00159 : MI(mi), Def(def), FI(fi) {} 00160 }; 00161 00162 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 00163 /// invariants out to the preheader. 00164 void HoistRegionPostRA(); 00165 00166 /// HoistPostRA - When an instruction is found to only use loop invariant 00167 /// operands that is safe to hoist, this instruction is called to do the 00168 /// dirty work. 00169 void HoistPostRA(MachineInstr *MI, unsigned Def); 00170 00171 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 00172 /// gather register def and frame object update information. 00173 void ProcessMI(MachineInstr *MI, 00174 BitVector &PhysRegDefs, 00175 BitVector &PhysRegClobbers, 00176 SmallSet<int, 32> &StoredFIs, 00177 SmallVectorImpl<CandidateInfo> &Candidates); 00178 00179 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 00180 /// current loop. 00181 void AddToLiveIns(unsigned Reg); 00182 00183 /// IsLICMCandidate - Returns true if the instruction may be a suitable 00184 /// candidate for LICM. e.g. If the instruction is a call, then it's 00185 /// obviously not safe to hoist it. 00186 bool IsLICMCandidate(MachineInstr &I); 00187 00188 /// IsLoopInvariantInst - Returns true if the instruction is loop 00189 /// invariant. I.e., all virtual register operands are defined outside of 00190 /// the loop, physical registers aren't accessed (explicitly or implicitly), 00191 /// and the instruction is hoistable. 00192 /// 00193 bool IsLoopInvariantInst(MachineInstr &I); 00194 00195 /// HasLoopPHIUse - Return true if the specified instruction is used by any 00196 /// phi node in the current loop. 00197 bool HasLoopPHIUse(const MachineInstr *MI) const; 00198 00199 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 00200 /// and an use in the current loop, return true if the target considered 00201 /// it 'high'. 00202 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 00203 unsigned Reg) const; 00204 00205 bool IsCheapInstruction(MachineInstr &MI) const; 00206 00207 /// CanCauseHighRegPressure - Visit BBs from header to current BB, 00208 /// check if hoisting an instruction of the given cost matrix can cause high 00209 /// register pressure. 00210 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); 00211 00212 /// UpdateBackTraceRegPressure - Traverse the back trace from header to 00213 /// the current block and update their register pressures to reflect the 00214 /// effect of hoisting MI from the current block to the preheader. 00215 void UpdateBackTraceRegPressure(const MachineInstr *MI); 00216 00217 /// IsProfitableToHoist - Return true if it is potentially profitable to 00218 /// hoist the given loop invariant. 00219 bool IsProfitableToHoist(MachineInstr &MI); 00220 00221 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 00222 /// If not then a load from this mbb may not be safe to hoist. 00223 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 00224 00225 void EnterScope(MachineBasicBlock *MBB); 00226 00227 void ExitScope(MachineBasicBlock *MBB); 00228 00229 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 00230 /// dominator tree node if its a leaf or all of its children are done. Walk 00231 /// up the dominator tree to destroy ancestors which are now done. 00232 void ExitScopeIfDone(MachineDomTreeNode *Node, 00233 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 00234 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 00235 00236 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 00237 /// blocks dominated by the specified header block, and that are in the 00238 /// current loop) in depth first order w.r.t the DominatorTree. This allows 00239 /// us to visit definitions before uses, allowing us to hoist a loop body in 00240 /// one pass without iteration. 00241 /// 00242 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 00243 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 00244 00245 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 00246 /// index, return the ID and cost of its representative register class by 00247 /// reference. 00248 void getRegisterClassIDAndCost(const MachineInstr *MI, 00249 unsigned Reg, unsigned OpIdx, 00250 unsigned &RCId, unsigned &RCCost) const; 00251 00252 /// InitRegPressure - Find all virtual register references that are liveout 00253 /// of the preheader to initialize the starting "register pressure". Note 00254 /// this does not count live through (livein but not used) registers. 00255 void InitRegPressure(MachineBasicBlock *BB); 00256 00257 /// UpdateRegPressure - Update estimate of register pressure after the 00258 /// specified instruction. 00259 void UpdateRegPressure(const MachineInstr *MI); 00260 00261 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 00262 /// the load itself could be hoisted. Return the unfolded and hoistable 00263 /// load, or null if the load couldn't be unfolded or if it wouldn't 00264 /// be hoistable. 00265 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 00266 00267 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 00268 /// duplicate of MI. Return this instruction if it's found. 00269 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 00270 std::vector<const MachineInstr*> &PrevMIs); 00271 00272 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 00273 /// the preheader that compute the same value. If it's found, do a RAU on 00274 /// with the definition of the existing instruction rather than hoisting 00275 /// the instruction to the preheader. 00276 bool EliminateCSE(MachineInstr *MI, 00277 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 00278 00279 /// MayCSE - Return true if the given instruction will be CSE'd if it's 00280 /// hoisted out of the loop. 00281 bool MayCSE(MachineInstr *MI); 00282 00283 /// Hoist - When an instruction is found to only use loop invariant operands 00284 /// that is safe to hoist, this instruction is called to do the dirty work. 00285 /// It returns true if the instruction is hoisted. 00286 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 00287 00288 /// InitCSEMap - Initialize the CSE map with instructions that are in the 00289 /// current loop preheader that may become duplicates of instructions that 00290 /// are hoisted out of the loop. 00291 void InitCSEMap(MachineBasicBlock *BB); 00292 00293 /// getCurPreheader - Get the preheader for the current loop, splitting 00294 /// a critical edge if needed. 00295 MachineBasicBlock *getCurPreheader(); 00296 }; 00297 } // end anonymous namespace 00298 00299 char MachineLICM::ID = 0; 00300 char &llvm::MachineLICMID = MachineLICM::ID; 00301 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 00302 "Machine Loop Invariant Code Motion", false, false) 00303 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 00304 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 00305 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 00306 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 00307 "Machine Loop Invariant Code Motion", false, false) 00308 00309 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 00310 /// loop that has a unique predecessor. 00311 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 00312 // Check whether this loop even has a unique predecessor. 00313 if (!CurLoop->getLoopPredecessor()) 00314 return false; 00315 // Ok, now check to see if any of its outer loops do. 00316 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 00317 if (L->getLoopPredecessor()) 00318 return false; 00319 // None of them did, so this is the outermost with a unique predecessor. 00320 return true; 00321 } 00322 00323 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 00324 if (skipOptnoneFunction(*MF.getFunction())) 00325 return false; 00326 00327 Changed = FirstInLoop = false; 00328 TM = &MF.getTarget(); 00329 TII = TM->getSubtargetImpl()->getInstrInfo(); 00330 TLI = TM->getSubtargetImpl()->getTargetLowering(); 00331 TRI = TM->getSubtargetImpl()->getRegisterInfo(); 00332 MFI = MF.getFrameInfo(); 00333 MRI = &MF.getRegInfo(); 00334 InstrItins = TM->getSubtargetImpl()->getInstrItineraryData(); 00335 00336 PreRegAlloc = MRI->isSSA(); 00337 00338 if (PreRegAlloc) 00339 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 00340 else 00341 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 00342 DEBUG(dbgs() << MF.getName() << " ********\n"); 00343 00344 if (PreRegAlloc) { 00345 // Estimate register pressure during pre-regalloc pass. 00346 unsigned NumRC = TRI->getNumRegClasses(); 00347 RegPressure.resize(NumRC); 00348 std::fill(RegPressure.begin(), RegPressure.end(), 0); 00349 RegLimit.resize(NumRC); 00350 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 00351 E = TRI->regclass_end(); I != E; ++I) 00352 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 00353 } 00354 00355 // Get our Loop information... 00356 MLI = &getAnalysis<MachineLoopInfo>(); 00357 DT = &getAnalysis<MachineDominatorTree>(); 00358 AA = &getAnalysis<AliasAnalysis>(); 00359 00360 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 00361 while (!Worklist.empty()) { 00362 CurLoop = Worklist.pop_back_val(); 00363 CurPreheader = nullptr; 00364 ExitBlocks.clear(); 00365 00366 // If this is done before regalloc, only visit outer-most preheader-sporting 00367 // loops. 00368 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 00369 Worklist.append(CurLoop->begin(), CurLoop->end()); 00370 continue; 00371 } 00372 00373 CurLoop->getExitBlocks(ExitBlocks); 00374 00375 if (!PreRegAlloc) 00376 HoistRegionPostRA(); 00377 else { 00378 // CSEMap is initialized for loop header when the first instruction is 00379 // being hoisted. 00380 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 00381 FirstInLoop = true; 00382 HoistOutOfLoop(N); 00383 CSEMap.clear(); 00384 } 00385 } 00386 00387 return Changed; 00388 } 00389 00390 /// InstructionStoresToFI - Return true if instruction stores to the 00391 /// specified frame. 00392 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 00393 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 00394 oe = MI->memoperands_end(); o != oe; ++o) { 00395 if (!(*o)->isStore() || !(*o)->getPseudoValue()) 00396 continue; 00397 if (const FixedStackPseudoSourceValue *Value = 00398 dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { 00399 if (Value->getFrameIndex() == FI) 00400 return true; 00401 } 00402 } 00403 return false; 00404 } 00405 00406 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 00407 /// gather register def and frame object update information. 00408 void MachineLICM::ProcessMI(MachineInstr *MI, 00409 BitVector &PhysRegDefs, 00410 BitVector &PhysRegClobbers, 00411 SmallSet<int, 32> &StoredFIs, 00412 SmallVectorImpl<CandidateInfo> &Candidates) { 00413 bool RuledOut = false; 00414 bool HasNonInvariantUse = false; 00415 unsigned Def = 0; 00416 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00417 const MachineOperand &MO = MI->getOperand(i); 00418 if (MO.isFI()) { 00419 // Remember if the instruction stores to the frame index. 00420 int FI = MO.getIndex(); 00421 if (!StoredFIs.count(FI) && 00422 MFI->isSpillSlotObjectIndex(FI) && 00423 InstructionStoresToFI(MI, FI)) 00424 StoredFIs.insert(FI); 00425 HasNonInvariantUse = true; 00426 continue; 00427 } 00428 00429 // We can't hoist an instruction defining a physreg that is clobbered in 00430 // the loop. 00431 if (MO.isRegMask()) { 00432 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 00433 continue; 00434 } 00435 00436 if (!MO.isReg()) 00437 continue; 00438 unsigned Reg = MO.getReg(); 00439 if (!Reg) 00440 continue; 00441 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 00442 "Not expecting virtual register!"); 00443 00444 if (!MO.isDef()) { 00445 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 00446 // If it's using a non-loop-invariant register, then it's obviously not 00447 // safe to hoist. 00448 HasNonInvariantUse = true; 00449 continue; 00450 } 00451 00452 if (MO.isImplicit()) { 00453 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00454 PhysRegClobbers.set(*AI); 00455 if (!MO.isDead()) 00456 // Non-dead implicit def? This cannot be hoisted. 00457 RuledOut = true; 00458 // No need to check if a dead implicit def is also defined by 00459 // another instruction. 00460 continue; 00461 } 00462 00463 // FIXME: For now, avoid instructions with multiple defs, unless 00464 // it's a dead implicit def. 00465 if (Def) 00466 RuledOut = true; 00467 else 00468 Def = Reg; 00469 00470 // If we have already seen another instruction that defines the same 00471 // register, then this is not safe. Two defs is indicated by setting a 00472 // PhysRegClobbers bit. 00473 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 00474 if (PhysRegDefs.test(*AS)) 00475 PhysRegClobbers.set(*AS); 00476 PhysRegDefs.set(*AS); 00477 } 00478 if (PhysRegClobbers.test(Reg)) 00479 // MI defined register is seen defined by another instruction in 00480 // the loop, it cannot be a LICM candidate. 00481 RuledOut = true; 00482 } 00483 00484 // Only consider reloads for now and remats which do not have register 00485 // operands. FIXME: Consider unfold load folding instructions. 00486 if (Def && !RuledOut) { 00487 int FI = INT_MIN; 00488 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 00489 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 00490 Candidates.push_back(CandidateInfo(MI, Def, FI)); 00491 } 00492 } 00493 00494 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 00495 /// invariants out to the preheader. 00496 void MachineLICM::HoistRegionPostRA() { 00497 MachineBasicBlock *Preheader = getCurPreheader(); 00498 if (!Preheader) 00499 return; 00500 00501 unsigned NumRegs = TRI->getNumRegs(); 00502 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 00503 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 00504 00505 SmallVector<CandidateInfo, 32> Candidates; 00506 SmallSet<int, 32> StoredFIs; 00507 00508 // Walk the entire region, count number of defs for each register, and 00509 // collect potential LICM candidates. 00510 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 00511 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 00512 MachineBasicBlock *BB = Blocks[i]; 00513 00514 // If the header of the loop containing this basic block is a landing pad, 00515 // then don't try to hoist instructions out of this loop. 00516 const MachineLoop *ML = MLI->getLoopFor(BB); 00517 if (ML && ML->getHeader()->isLandingPad()) continue; 00518 00519 // Conservatively treat live-in's as an external def. 00520 // FIXME: That means a reload that're reused in successor block(s) will not 00521 // be LICM'ed. 00522 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 00523 E = BB->livein_end(); I != E; ++I) { 00524 unsigned Reg = *I; 00525 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00526 PhysRegDefs.set(*AI); 00527 } 00528 00529 SpeculationState = SpeculateUnknown; 00530 for (MachineBasicBlock::iterator 00531 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 00532 MachineInstr *MI = &*MII; 00533 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 00534 } 00535 } 00536 00537 // Gather the registers read / clobbered by the terminator. 00538 BitVector TermRegs(NumRegs); 00539 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 00540 if (TI != Preheader->end()) { 00541 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 00542 const MachineOperand &MO = TI->getOperand(i); 00543 if (!MO.isReg()) 00544 continue; 00545 unsigned Reg = MO.getReg(); 00546 if (!Reg) 00547 continue; 00548 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00549 TermRegs.set(*AI); 00550 } 00551 } 00552 00553 // Now evaluate whether the potential candidates qualify. 00554 // 1. Check if the candidate defined register is defined by another 00555 // instruction in the loop. 00556 // 2. If the candidate is a load from stack slot (always true for now), 00557 // check if the slot is stored anywhere in the loop. 00558 // 3. Make sure candidate def should not clobber 00559 // registers read by the terminator. Similarly its def should not be 00560 // clobbered by the terminator. 00561 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 00562 if (Candidates[i].FI != INT_MIN && 00563 StoredFIs.count(Candidates[i].FI)) 00564 continue; 00565 00566 unsigned Def = Candidates[i].Def; 00567 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 00568 bool Safe = true; 00569 MachineInstr *MI = Candidates[i].MI; 00570 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 00571 const MachineOperand &MO = MI->getOperand(j); 00572 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 00573 continue; 00574 unsigned Reg = MO.getReg(); 00575 if (PhysRegDefs.test(Reg) || 00576 PhysRegClobbers.test(Reg)) { 00577 // If it's using a non-loop-invariant register, then it's obviously 00578 // not safe to hoist. 00579 Safe = false; 00580 break; 00581 } 00582 } 00583 if (Safe) 00584 HoistPostRA(MI, Candidates[i].Def); 00585 } 00586 } 00587 } 00588 00589 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 00590 /// loop, and make sure it is not killed by any instructions in the loop. 00591 void MachineLICM::AddToLiveIns(unsigned Reg) { 00592 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 00593 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 00594 MachineBasicBlock *BB = Blocks[i]; 00595 if (!BB->isLiveIn(Reg)) 00596 BB->addLiveIn(Reg); 00597 for (MachineBasicBlock::iterator 00598 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 00599 MachineInstr *MI = &*MII; 00600 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00601 MachineOperand &MO = MI->getOperand(i); 00602 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 00603 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 00604 MO.setIsKill(false); 00605 } 00606 } 00607 } 00608 } 00609 00610 /// HoistPostRA - When an instruction is found to only use loop invariant 00611 /// operands that is safe to hoist, this instruction is called to do the 00612 /// dirty work. 00613 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 00614 MachineBasicBlock *Preheader = getCurPreheader(); 00615 00616 // Now move the instructions to the predecessor, inserting it before any 00617 // terminator instructions. 00618 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 00619 << MI->getParent()->getNumber() << ": " << *MI); 00620 00621 // Splice the instruction to the preheader. 00622 MachineBasicBlock *MBB = MI->getParent(); 00623 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 00624 00625 // Add register to livein list to all the BBs in the current loop since a 00626 // loop invariant must be kept live throughout the whole loop. This is 00627 // important to ensure later passes do not scavenge the def register. 00628 AddToLiveIns(Def); 00629 00630 ++NumPostRAHoisted; 00631 Changed = true; 00632 } 00633 00634 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 00635 // If not then a load from this mbb may not be safe to hoist. 00636 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 00637 if (SpeculationState != SpeculateUnknown) 00638 return SpeculationState == SpeculateFalse; 00639 00640 if (BB != CurLoop->getHeader()) { 00641 // Check loop exiting blocks. 00642 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 00643 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 00644 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 00645 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 00646 SpeculationState = SpeculateTrue; 00647 return false; 00648 } 00649 } 00650 00651 SpeculationState = SpeculateFalse; 00652 return true; 00653 } 00654 00655 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 00656 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 00657 00658 // Remember livein register pressure. 00659 BackTrace.push_back(RegPressure); 00660 } 00661 00662 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 00663 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 00664 BackTrace.pop_back(); 00665 } 00666 00667 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 00668 /// dominator tree node if its a leaf or all of its children are done. Walk 00669 /// up the dominator tree to destroy ancestors which are now done. 00670 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 00671 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 00672 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 00673 if (OpenChildren[Node]) 00674 return; 00675 00676 // Pop scope. 00677 ExitScope(Node->getBlock()); 00678 00679 // Now traverse upwards to pop ancestors whose offsprings are all done. 00680 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 00681 unsigned Left = --OpenChildren[Parent]; 00682 if (Left != 0) 00683 break; 00684 ExitScope(Parent->getBlock()); 00685 Node = Parent; 00686 } 00687 } 00688 00689 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 00690 /// blocks dominated by the specified header block, and that are in the 00691 /// current loop) in depth first order w.r.t the DominatorTree. This allows 00692 /// us to visit definitions before uses, allowing us to hoist a loop body in 00693 /// one pass without iteration. 00694 /// 00695 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 00696 SmallVector<MachineDomTreeNode*, 32> Scopes; 00697 SmallVector<MachineDomTreeNode*, 8> WorkList; 00698 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 00699 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 00700 00701 // Perform a DFS walk to determine the order of visit. 00702 WorkList.push_back(HeaderN); 00703 do { 00704 MachineDomTreeNode *Node = WorkList.pop_back_val(); 00705 assert(Node && "Null dominator tree node?"); 00706 MachineBasicBlock *BB = Node->getBlock(); 00707 00708 // If the header of the loop containing this basic block is a landing pad, 00709 // then don't try to hoist instructions out of this loop. 00710 const MachineLoop *ML = MLI->getLoopFor(BB); 00711 if (ML && ML->getHeader()->isLandingPad()) 00712 continue; 00713 00714 // If this subregion is not in the top level loop at all, exit. 00715 if (!CurLoop->contains(BB)) 00716 continue; 00717 00718 Scopes.push_back(Node); 00719 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 00720 unsigned NumChildren = Children.size(); 00721 00722 // Don't hoist things out of a large switch statement. This often causes 00723 // code to be hoisted that wasn't going to be executed, and increases 00724 // register pressure in a situation where it's likely to matter. 00725 if (BB->succ_size() >= 25) 00726 NumChildren = 0; 00727 00728 OpenChildren[Node] = NumChildren; 00729 // Add children in reverse order as then the next popped worklist node is 00730 // the first child of this node. This means we ultimately traverse the 00731 // DOM tree in exactly the same order as if we'd recursed. 00732 for (int i = (int)NumChildren-1; i >= 0; --i) { 00733 MachineDomTreeNode *Child = Children[i]; 00734 ParentMap[Child] = Node; 00735 WorkList.push_back(Child); 00736 } 00737 } while (!WorkList.empty()); 00738 00739 if (Scopes.size() != 0) { 00740 MachineBasicBlock *Preheader = getCurPreheader(); 00741 if (!Preheader) 00742 return; 00743 00744 // Compute registers which are livein into the loop headers. 00745 RegSeen.clear(); 00746 BackTrace.clear(); 00747 InitRegPressure(Preheader); 00748 } 00749 00750 // Now perform LICM. 00751 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 00752 MachineDomTreeNode *Node = Scopes[i]; 00753 MachineBasicBlock *MBB = Node->getBlock(); 00754 00755 MachineBasicBlock *Preheader = getCurPreheader(); 00756 if (!Preheader) 00757 continue; 00758 00759 EnterScope(MBB); 00760 00761 // Process the block 00762 SpeculationState = SpeculateUnknown; 00763 for (MachineBasicBlock::iterator 00764 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 00765 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 00766 MachineInstr *MI = &*MII; 00767 if (!Hoist(MI, Preheader)) 00768 UpdateRegPressure(MI); 00769 MII = NextMII; 00770 } 00771 00772 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 00773 ExitScopeIfDone(Node, OpenChildren, ParentMap); 00774 } 00775 } 00776 00777 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 00778 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 00779 } 00780 00781 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 00782 /// index, return the ID and cost of its representative register class. 00783 void 00784 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 00785 unsigned Reg, unsigned OpIdx, 00786 unsigned &RCId, unsigned &RCCost) const { 00787 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 00788 MVT VT = *RC->vt_begin(); 00789 if (VT == MVT::Untyped) { 00790 RCId = RC->getID(); 00791 RCCost = 1; 00792 } else { 00793 RCId = TLI->getRepRegClassFor(VT)->getID(); 00794 RCCost = TLI->getRepRegClassCostFor(VT); 00795 } 00796 } 00797 00798 /// InitRegPressure - Find all virtual register references that are liveout of 00799 /// the preheader to initialize the starting "register pressure". Note this 00800 /// does not count live through (livein but not used) registers. 00801 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 00802 std::fill(RegPressure.begin(), RegPressure.end(), 0); 00803 00804 // If the preheader has only a single predecessor and it ends with a 00805 // fallthrough or an unconditional branch, then scan its predecessor for live 00806 // defs as well. This happens whenever the preheader is created by splitting 00807 // the critical edge from the loop predecessor to the loop header. 00808 if (BB->pred_size() == 1) { 00809 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 00810 SmallVector<MachineOperand, 4> Cond; 00811 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 00812 InitRegPressure(*BB->pred_begin()); 00813 } 00814 00815 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 00816 MII != E; ++MII) { 00817 MachineInstr *MI = &*MII; 00818 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 00819 const MachineOperand &MO = MI->getOperand(i); 00820 if (!MO.isReg() || MO.isImplicit()) 00821 continue; 00822 unsigned Reg = MO.getReg(); 00823 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00824 continue; 00825 00826 bool isNew = RegSeen.insert(Reg); 00827 unsigned RCId, RCCost; 00828 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 00829 if (MO.isDef()) 00830 RegPressure[RCId] += RCCost; 00831 else { 00832 bool isKill = isOperandKill(MO, MRI); 00833 if (isNew && !isKill) 00834 // Haven't seen this, it must be a livein. 00835 RegPressure[RCId] += RCCost; 00836 else if (!isNew && isKill) 00837 RegPressure[RCId] -= RCCost; 00838 } 00839 } 00840 } 00841 } 00842 00843 /// UpdateRegPressure - Update estimate of register pressure after the 00844 /// specified instruction. 00845 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 00846 if (MI->isImplicitDef()) 00847 return; 00848 00849 SmallVector<unsigned, 4> Defs; 00850 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 00851 const MachineOperand &MO = MI->getOperand(i); 00852 if (!MO.isReg() || MO.isImplicit()) 00853 continue; 00854 unsigned Reg = MO.getReg(); 00855 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00856 continue; 00857 00858 bool isNew = RegSeen.insert(Reg); 00859 if (MO.isDef()) 00860 Defs.push_back(Reg); 00861 else if (!isNew && isOperandKill(MO, MRI)) { 00862 unsigned RCId, RCCost; 00863 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 00864 if (RCCost > RegPressure[RCId]) 00865 RegPressure[RCId] = 0; 00866 else 00867 RegPressure[RCId] -= RCCost; 00868 } 00869 } 00870 00871 unsigned Idx = 0; 00872 while (!Defs.empty()) { 00873 unsigned Reg = Defs.pop_back_val(); 00874 unsigned RCId, RCCost; 00875 getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 00876 RegPressure[RCId] += RCCost; 00877 ++Idx; 00878 } 00879 } 00880 00881 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 00882 /// loads from global offset table or constant pool. 00883 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 00884 assert (MI.mayLoad() && "Expected MI that loads!"); 00885 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 00886 E = MI.memoperands_end(); I != E; ++I) { 00887 if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { 00888 if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 00889 return true; 00890 } 00891 } 00892 return false; 00893 } 00894 00895 /// IsLICMCandidate - Returns true if the instruction may be a suitable 00896 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 00897 /// not safe to hoist it. 00898 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 00899 // Check if it's safe to move the instruction. 00900 bool DontMoveAcrossStore = true; 00901 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 00902 return false; 00903 00904 // If it is load then check if it is guaranteed to execute by making sure that 00905 // it dominates all exiting blocks. If it doesn't, then there is a path out of 00906 // the loop which does not execute this load, so we can't hoist it. Loads 00907 // from constant memory are not safe to speculate all the time, for example 00908 // indexed load from a jump table. 00909 // Stores and side effects are already checked by isSafeToMove. 00910 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 00911 !IsGuaranteedToExecute(I.getParent())) 00912 return false; 00913 00914 return true; 00915 } 00916 00917 /// IsLoopInvariantInst - Returns true if the instruction is loop 00918 /// invariant. I.e., all virtual register operands are defined outside of the 00919 /// loop, physical registers aren't accessed explicitly, and there are no side 00920 /// effects that aren't captured by the operands or other flags. 00921 /// 00922 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 00923 if (!IsLICMCandidate(I)) 00924 return false; 00925 00926 // The instruction is loop invariant if all of its operands are. 00927 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 00928 const MachineOperand &MO = I.getOperand(i); 00929 00930 if (!MO.isReg()) 00931 continue; 00932 00933 unsigned Reg = MO.getReg(); 00934 if (Reg == 0) continue; 00935 00936 // Don't hoist an instruction that uses or defines a physical register. 00937 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 00938 if (MO.isUse()) { 00939 // If the physreg has no defs anywhere, it's just an ambient register 00940 // and we can freely move its uses. Alternatively, if it's allocatable, 00941 // it could get allocated to something with a def during allocation. 00942 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 00943 return false; 00944 // Otherwise it's safe to move. 00945 continue; 00946 } else if (!MO.isDead()) { 00947 // A def that isn't dead. We can't move it. 00948 return false; 00949 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 00950 // If the reg is live into the loop, we can't hoist an instruction 00951 // which would clobber it. 00952 return false; 00953 } 00954 } 00955 00956 if (!MO.isUse()) 00957 continue; 00958 00959 assert(MRI->getVRegDef(Reg) && 00960 "Machine instr not mapped for this vreg?!"); 00961 00962 // If the loop contains the definition of an operand, then the instruction 00963 // isn't loop invariant. 00964 if (CurLoop->contains(MRI->getVRegDef(Reg))) 00965 return false; 00966 } 00967 00968 // If we got this far, the instruction is loop invariant! 00969 return true; 00970 } 00971 00972 00973 /// HasLoopPHIUse - Return true if the specified instruction is used by a 00974 /// phi node and hoisting it could cause a copy to be inserted. 00975 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 00976 SmallVector<const MachineInstr*, 8> Work(1, MI); 00977 do { 00978 MI = Work.pop_back_val(); 00979 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 00980 if (!MO->isReg() || !MO->isDef()) 00981 continue; 00982 unsigned Reg = MO->getReg(); 00983 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00984 continue; 00985 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 00986 // A PHI may cause a copy to be inserted. 00987 if (UseMI.isPHI()) { 00988 // A PHI inside the loop causes a copy because the live range of Reg is 00989 // extended across the PHI. 00990 if (CurLoop->contains(&UseMI)) 00991 return true; 00992 // A PHI in an exit block can cause a copy to be inserted if the PHI 00993 // has multiple predecessors in the loop with different values. 00994 // For now, approximate by rejecting all exit blocks. 00995 if (isExitBlock(UseMI.getParent())) 00996 return true; 00997 continue; 00998 } 00999 // Look past copies as well. 01000 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 01001 Work.push_back(&UseMI); 01002 } 01003 } 01004 } while (!Work.empty()); 01005 return false; 01006 } 01007 01008 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 01009 /// and an use in the current loop, return true if the target considered 01010 /// it 'high'. 01011 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 01012 unsigned DefIdx, unsigned Reg) const { 01013 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 01014 return false; 01015 01016 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 01017 if (UseMI.isCopyLike()) 01018 continue; 01019 if (!CurLoop->contains(UseMI.getParent())) 01020 continue; 01021 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 01022 const MachineOperand &MO = UseMI.getOperand(i); 01023 if (!MO.isReg() || !MO.isUse()) 01024 continue; 01025 unsigned MOReg = MO.getReg(); 01026 if (MOReg != Reg) 01027 continue; 01028 01029 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i)) 01030 return true; 01031 } 01032 01033 // Only look at the first in loop use. 01034 break; 01035 } 01036 01037 return false; 01038 } 01039 01040 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or 01041 /// the operand latency between its def and a use is one or less. 01042 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 01043 if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) 01044 return true; 01045 if (!InstrItins || InstrItins->isEmpty()) 01046 return false; 01047 01048 bool isCheap = false; 01049 unsigned NumDefs = MI.getDesc().getNumDefs(); 01050 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 01051 MachineOperand &DefMO = MI.getOperand(i); 01052 if (!DefMO.isReg() || !DefMO.isDef()) 01053 continue; 01054 --NumDefs; 01055 unsigned Reg = DefMO.getReg(); 01056 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 01057 continue; 01058 01059 if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 01060 return false; 01061 isCheap = true; 01062 } 01063 01064 return isCheap; 01065 } 01066 01067 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check 01068 /// if hoisting an instruction of the given cost matrix can cause high 01069 /// register pressure. 01070 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, 01071 bool CheapInstr) { 01072 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 01073 CI != CE; ++CI) { 01074 if (CI->second <= 0) 01075 continue; 01076 01077 unsigned RCId = CI->first; 01078 unsigned Limit = RegLimit[RCId]; 01079 int Cost = CI->second; 01080 01081 // Don't hoist cheap instructions if they would increase register pressure, 01082 // even if we're under the limit. 01083 if (CheapInstr) 01084 return true; 01085 01086 for (unsigned i = BackTrace.size(); i != 0; --i) { 01087 SmallVectorImpl<unsigned> &RP = BackTrace[i-1]; 01088 if (RP[RCId] + Cost >= Limit) 01089 return true; 01090 } 01091 } 01092 01093 return false; 01094 } 01095 01096 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the 01097 /// current block and update their register pressures to reflect the effect 01098 /// of hoisting MI from the current block to the preheader. 01099 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 01100 if (MI->isImplicitDef()) 01101 return; 01102 01103 // First compute the 'cost' of the instruction, i.e. its contribution 01104 // to register pressure. 01105 DenseMap<unsigned, int> Cost; 01106 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 01107 const MachineOperand &MO = MI->getOperand(i); 01108 if (!MO.isReg() || MO.isImplicit()) 01109 continue; 01110 unsigned Reg = MO.getReg(); 01111 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01112 continue; 01113 01114 unsigned RCId, RCCost; 01115 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 01116 if (MO.isDef()) { 01117 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 01118 if (CI != Cost.end()) 01119 CI->second += RCCost; 01120 else 01121 Cost.insert(std::make_pair(RCId, RCCost)); 01122 } else if (isOperandKill(MO, MRI)) { 01123 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 01124 if (CI != Cost.end()) 01125 CI->second -= RCCost; 01126 else 01127 Cost.insert(std::make_pair(RCId, -RCCost)); 01128 } 01129 } 01130 01131 // Update register pressure of blocks from loop header to current block. 01132 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 01133 SmallVectorImpl<unsigned> &RP = BackTrace[i]; 01134 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 01135 CI != CE; ++CI) { 01136 unsigned RCId = CI->first; 01137 RP[RCId] += CI->second; 01138 } 01139 } 01140 } 01141 01142 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist 01143 /// the given loop invariant. 01144 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 01145 if (MI.isImplicitDef()) 01146 return true; 01147 01148 // Besides removing computation from the loop, hoisting an instruction has 01149 // these effects: 01150 // 01151 // - The value defined by the instruction becomes live across the entire 01152 // loop. This increases register pressure in the loop. 01153 // 01154 // - If the value is used by a PHI in the loop, a copy will be required for 01155 // lowering the PHI after extending the live range. 01156 // 01157 // - When hoisting the last use of a value in the loop, that value no longer 01158 // needs to be live in the loop. This lowers register pressure in the loop. 01159 01160 bool CheapInstr = IsCheapInstruction(MI); 01161 bool CreatesCopy = HasLoopPHIUse(&MI); 01162 01163 // Don't hoist a cheap instruction if it would create a copy in the loop. 01164 if (CheapInstr && CreatesCopy) { 01165 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 01166 return false; 01167 } 01168 01169 // Rematerializable instructions should always be hoisted since the register 01170 // allocator can just pull them down again when needed. 01171 if (TII->isTriviallyReMaterializable(&MI, AA)) 01172 return true; 01173 01174 // Estimate register pressure to determine whether to LICM the instruction. 01175 // In low register pressure situation, we can be more aggressive about 01176 // hoisting. Also, favors hoisting long latency instructions even in 01177 // moderately high pressure situation. 01178 // Cheap instructions will only be hoisted if they don't increase register 01179 // pressure at all. 01180 // FIXME: If there are long latency loop-invariant instructions inside the 01181 // loop at this point, why didn't the optimizer's LICM hoist them? 01182 DenseMap<unsigned, int> Cost; 01183 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 01184 const MachineOperand &MO = MI.getOperand(i); 01185 if (!MO.isReg() || MO.isImplicit()) 01186 continue; 01187 unsigned Reg = MO.getReg(); 01188 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01189 continue; 01190 01191 unsigned RCId, RCCost; 01192 getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 01193 if (MO.isDef()) { 01194 if (HasHighOperandLatency(MI, i, Reg)) { 01195 DEBUG(dbgs() << "Hoist High Latency: " << MI); 01196 ++NumHighLatency; 01197 return true; 01198 } 01199 Cost[RCId] += RCCost; 01200 } else if (isOperandKill(MO, MRI)) { 01201 // Is a virtual register use is a kill, hoisting it out of the loop 01202 // may actually reduce register pressure or be register pressure 01203 // neutral. 01204 Cost[RCId] -= RCCost; 01205 } 01206 } 01207 01208 // Visit BBs from header to current BB, if hoisting this doesn't cause 01209 // high register pressure, then it's safe to proceed. 01210 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 01211 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 01212 ++NumLowRP; 01213 return true; 01214 } 01215 01216 // Don't risk increasing register pressure if it would create copies. 01217 if (CreatesCopy) { 01218 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 01219 return false; 01220 } 01221 01222 // Do not "speculate" in high register pressure situation. If an 01223 // instruction is not guaranteed to be executed in the loop, it's best to be 01224 // conservative. 01225 if (AvoidSpeculation && 01226 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 01227 DEBUG(dbgs() << "Won't speculate: " << MI); 01228 return false; 01229 } 01230 01231 // High register pressure situation, only hoist if the instruction is going 01232 // to be remat'ed. 01233 if (!TII->isTriviallyReMaterializable(&MI, AA) && 01234 !MI.isInvariantLoad(AA)) { 01235 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 01236 return false; 01237 } 01238 01239 return true; 01240 } 01241 01242 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 01243 // Don't unfold simple loads. 01244 if (MI->canFoldAsLoad()) 01245 return nullptr; 01246 01247 // If not, we may be able to unfold a load and hoist that. 01248 // First test whether the instruction is loading from an amenable 01249 // memory location. 01250 if (!MI->isInvariantLoad(AA)) 01251 return nullptr; 01252 01253 // Next determine the register class for a temporary register. 01254 unsigned LoadRegIndex; 01255 unsigned NewOpc = 01256 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 01257 /*UnfoldLoad=*/true, 01258 /*UnfoldStore=*/false, 01259 &LoadRegIndex); 01260 if (NewOpc == 0) return nullptr; 01261 const MCInstrDesc &MID = TII->get(NewOpc); 01262 if (MID.getNumDefs() != 1) return nullptr; 01263 MachineFunction &MF = *MI->getParent()->getParent(); 01264 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 01265 // Ok, we're unfolding. Create a temporary register and do the unfold. 01266 unsigned Reg = MRI->createVirtualRegister(RC); 01267 01268 SmallVector<MachineInstr *, 2> NewMIs; 01269 bool Success = 01270 TII->unfoldMemoryOperand(MF, MI, Reg, 01271 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 01272 NewMIs); 01273 (void)Success; 01274 assert(Success && 01275 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 01276 "succeeded!"); 01277 assert(NewMIs.size() == 2 && 01278 "Unfolded a load into multiple instructions!"); 01279 MachineBasicBlock *MBB = MI->getParent(); 01280 MachineBasicBlock::iterator Pos = MI; 01281 MBB->insert(Pos, NewMIs[0]); 01282 MBB->insert(Pos, NewMIs[1]); 01283 // If unfolding produced a load that wasn't loop-invariant or profitable to 01284 // hoist, discard the new instructions and bail. 01285 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 01286 NewMIs[0]->eraseFromParent(); 01287 NewMIs[1]->eraseFromParent(); 01288 return nullptr; 01289 } 01290 01291 // Update register pressure for the unfolded instruction. 01292 UpdateRegPressure(NewMIs[1]); 01293 01294 // Otherwise we successfully unfolded a load that we can hoist. 01295 MI->eraseFromParent(); 01296 return NewMIs[0]; 01297 } 01298 01299 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 01300 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 01301 const MachineInstr *MI = &*I; 01302 unsigned Opcode = MI->getOpcode(); 01303 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 01304 CI = CSEMap.find(Opcode); 01305 if (CI != CSEMap.end()) 01306 CI->second.push_back(MI); 01307 else { 01308 std::vector<const MachineInstr*> CSEMIs; 01309 CSEMIs.push_back(MI); 01310 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 01311 } 01312 } 01313 } 01314 01315 const MachineInstr* 01316 MachineLICM::LookForDuplicate(const MachineInstr *MI, 01317 std::vector<const MachineInstr*> &PrevMIs) { 01318 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 01319 const MachineInstr *PrevMI = PrevMIs[i]; 01320 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) 01321 return PrevMI; 01322 } 01323 return nullptr; 01324 } 01325 01326 bool MachineLICM::EliminateCSE(MachineInstr *MI, 01327 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 01328 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 01329 // the undef property onto uses. 01330 if (CI == CSEMap.end() || MI->isImplicitDef()) 01331 return false; 01332 01333 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 01334 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 01335 01336 // Replace virtual registers defined by MI by their counterparts defined 01337 // by Dup. 01338 SmallVector<unsigned, 2> Defs; 01339 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01340 const MachineOperand &MO = MI->getOperand(i); 01341 01342 // Physical registers may not differ here. 01343 assert((!MO.isReg() || MO.getReg() == 0 || 01344 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 01345 MO.getReg() == Dup->getOperand(i).getReg()) && 01346 "Instructions with different phys regs are not identical!"); 01347 01348 if (MO.isReg() && MO.isDef() && 01349 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 01350 Defs.push_back(i); 01351 } 01352 01353 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 01354 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 01355 unsigned Idx = Defs[i]; 01356 unsigned Reg = MI->getOperand(Idx).getReg(); 01357 unsigned DupReg = Dup->getOperand(Idx).getReg(); 01358 OrigRCs.push_back(MRI->getRegClass(DupReg)); 01359 01360 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 01361 // Restore old RCs if more than one defs. 01362 for (unsigned j = 0; j != i; ++j) 01363 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 01364 return false; 01365 } 01366 } 01367 01368 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 01369 unsigned Idx = Defs[i]; 01370 unsigned Reg = MI->getOperand(Idx).getReg(); 01371 unsigned DupReg = Dup->getOperand(Idx).getReg(); 01372 MRI->replaceRegWith(Reg, DupReg); 01373 MRI->clearKillFlags(DupReg); 01374 } 01375 01376 MI->eraseFromParent(); 01377 ++NumCSEed; 01378 return true; 01379 } 01380 return false; 01381 } 01382 01383 /// MayCSE - Return true if the given instruction will be CSE'd if it's 01384 /// hoisted out of the loop. 01385 bool MachineLICM::MayCSE(MachineInstr *MI) { 01386 unsigned Opcode = MI->getOpcode(); 01387 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 01388 CI = CSEMap.find(Opcode); 01389 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 01390 // the undef property onto uses. 01391 if (CI == CSEMap.end() || MI->isImplicitDef()) 01392 return false; 01393 01394 return LookForDuplicate(MI, CI->second) != nullptr; 01395 } 01396 01397 /// Hoist - When an instruction is found to use only loop invariant operands 01398 /// that are safe to hoist, this instruction is called to do the dirty work. 01399 /// 01400 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 01401 // First check whether we should hoist this instruction. 01402 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 01403 // If not, try unfolding a hoistable load. 01404 MI = ExtractHoistableLoad(MI); 01405 if (!MI) return false; 01406 } 01407 01408 // Now move the instructions to the predecessor, inserting it before any 01409 // terminator instructions. 01410 DEBUG({ 01411 dbgs() << "Hoisting " << *MI; 01412 if (Preheader->getBasicBlock()) 01413 dbgs() << " to MachineBasicBlock " 01414 << Preheader->getName(); 01415 if (MI->getParent()->getBasicBlock()) 01416 dbgs() << " from MachineBasicBlock " 01417 << MI->getParent()->getName(); 01418 dbgs() << "\n"; 01419 }); 01420 01421 // If this is the first instruction being hoisted to the preheader, 01422 // initialize the CSE map with potential common expressions. 01423 if (FirstInLoop) { 01424 InitCSEMap(Preheader); 01425 FirstInLoop = false; 01426 } 01427 01428 // Look for opportunity to CSE the hoisted instruction. 01429 unsigned Opcode = MI->getOpcode(); 01430 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 01431 CI = CSEMap.find(Opcode); 01432 if (!EliminateCSE(MI, CI)) { 01433 // Otherwise, splice the instruction to the preheader. 01434 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 01435 01436 // Update register pressure for BBs from header to this block. 01437 UpdateBackTraceRegPressure(MI); 01438 01439 // Clear the kill flags of any register this instruction defines, 01440 // since they may need to be live throughout the entire loop 01441 // rather than just live for part of it. 01442 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01443 MachineOperand &MO = MI->getOperand(i); 01444 if (MO.isReg() && MO.isDef() && !MO.isDead()) 01445 MRI->clearKillFlags(MO.getReg()); 01446 } 01447 01448 // Add to the CSE map. 01449 if (CI != CSEMap.end()) 01450 CI->second.push_back(MI); 01451 else { 01452 std::vector<const MachineInstr*> CSEMIs; 01453 CSEMIs.push_back(MI); 01454 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 01455 } 01456 } 01457 01458 ++NumHoisted; 01459 Changed = true; 01460 01461 return true; 01462 } 01463 01464 MachineBasicBlock *MachineLICM::getCurPreheader() { 01465 // Determine the block to which to hoist instructions. If we can't find a 01466 // suitable loop predecessor, we can't do any hoisting. 01467 01468 // If we've tried to get a preheader and failed, don't try again. 01469 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 01470 return nullptr; 01471 01472 if (!CurPreheader) { 01473 CurPreheader = CurLoop->getLoopPreheader(); 01474 if (!CurPreheader) { 01475 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 01476 if (!Pred) { 01477 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 01478 return nullptr; 01479 } 01480 01481 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 01482 if (!CurPreheader) { 01483 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 01484 return nullptr; 01485 } 01486 } 01487 } 01488 return CurPreheader; 01489 }