LLVM API Documentation

LICM.cpp
Go to the documentation of this file.
00001 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass performs loop invariant code motion, attempting to remove as much
00011 // code from the body of a loop as possible.  It does this by either hoisting
00012 // code into the preheader block, or by sinking code to the exit blocks if it is
00013 // safe.  This pass also promotes must-aliased memory locations in the loop to
00014 // live in registers, thus hoisting and sinking "invariant" loads and stores.
00015 //
00016 // This pass uses alias analysis for two purposes:
00017 //
00018 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
00019 //     that a load or call inside of a loop never aliases anything stored to,
00020 //     we can hoist it or sink it like any other instruction.
00021 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
00022 //     the loop, we try to move the store to happen AFTER the loop instead of
00023 //     inside of the loop.  This can only happen if a few conditions are true:
00024 //       A. The pointer stored through is loop invariant
00025 //       B. There are no stores or loads in the loop which _may_ alias the
00026 //          pointer.  There are no calls in the loop which mod/ref the pointer.
00027 //     If these conditions are true, we can promote the loads and stores in the
00028 //     loop of the pointer to use a temporary alloca'd variable.  We then use
00029 //     the SSAUpdater to construct the appropriate SSA form for the value.
00030 //
00031 //===----------------------------------------------------------------------===//
00032 
00033 #include "llvm/Transforms/Scalar.h"
00034 #include "llvm/ADT/Statistic.h"
00035 #include "llvm/Analysis/AliasAnalysis.h"
00036 #include "llvm/Analysis/AliasSetTracker.h"
00037 #include "llvm/Analysis/ConstantFolding.h"
00038 #include "llvm/Analysis/LoopInfo.h"
00039 #include "llvm/Analysis/LoopPass.h"
00040 #include "llvm/Analysis/ScalarEvolution.h"
00041 #include "llvm/Analysis/ValueTracking.h"
00042 #include "llvm/IR/CFG.h"
00043 #include "llvm/IR/Constants.h"
00044 #include "llvm/IR/DataLayout.h"
00045 #include "llvm/IR/DerivedTypes.h"
00046 #include "llvm/IR/Dominators.h"
00047 #include "llvm/IR/Instructions.h"
00048 #include "llvm/IR/IntrinsicInst.h"
00049 #include "llvm/IR/LLVMContext.h"
00050 #include "llvm/IR/Metadata.h"
00051 #include "llvm/IR/PredIteratorCache.h"
00052 #include "llvm/Support/CommandLine.h"
00053 #include "llvm/Support/Debug.h"
00054 #include "llvm/Support/raw_ostream.h"
00055 #include "llvm/Target/TargetLibraryInfo.h"
00056 #include "llvm/Transforms/Utils/Local.h"
00057 #include "llvm/Transforms/Utils/LoopUtils.h"
00058 #include "llvm/Transforms/Utils/SSAUpdater.h"
00059 #include <algorithm>
00060 using namespace llvm;
00061 
00062 #define DEBUG_TYPE "licm"
00063 
00064 STATISTIC(NumSunk      , "Number of instructions sunk out of loop");
00065 STATISTIC(NumHoisted   , "Number of instructions hoisted out of loop");
00066 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
00067 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
00068 STATISTIC(NumPromoted  , "Number of memory locations promoted to registers");
00069 
00070 static cl::opt<bool>
00071 DisablePromotion("disable-licm-promotion", cl::Hidden,
00072                  cl::desc("Disable memory promotion in LICM pass"));
00073 
00074 namespace {
00075   struct LICM : public LoopPass {
00076     static char ID; // Pass identification, replacement for typeid
00077     LICM() : LoopPass(ID) {
00078       initializeLICMPass(*PassRegistry::getPassRegistry());
00079     }
00080 
00081     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
00082 
00083     /// This transformation requires natural loop information & requires that
00084     /// loop preheaders be inserted into the CFG...
00085     ///
00086     void getAnalysisUsage(AnalysisUsage &AU) const override {
00087       AU.setPreservesCFG();
00088       AU.addRequired<DominatorTreeWrapperPass>();
00089       AU.addRequired<LoopInfo>();
00090       AU.addRequiredID(LoopSimplifyID);
00091       AU.addPreservedID(LoopSimplifyID);
00092       AU.addRequiredID(LCSSAID);
00093       AU.addPreservedID(LCSSAID);
00094       AU.addRequired<AliasAnalysis>();
00095       AU.addPreserved<AliasAnalysis>();
00096       AU.addPreserved<ScalarEvolution>();
00097       AU.addRequired<TargetLibraryInfo>();
00098     }
00099 
00100     using llvm::Pass::doFinalization;
00101 
00102     bool doFinalization() override {
00103       assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
00104       return false;
00105     }
00106 
00107   private:
00108     AliasAnalysis *AA;       // Current AliasAnalysis information
00109     LoopInfo      *LI;       // Current LoopInfo
00110     DominatorTree *DT;       // Dominator Tree for the current Loop.
00111 
00112     const DataLayout *DL;    // DataLayout for constant folding.
00113     TargetLibraryInfo *TLI;  // TargetLibraryInfo for constant folding.
00114 
00115     // State that is updated as we process loops.
00116     bool Changed;            // Set to true when we change anything.
00117     BasicBlock *Preheader;   // The preheader block of the current loop...
00118     Loop *CurLoop;           // The current loop we are working on...
00119     AliasSetTracker *CurAST; // AliasSet information for the current loop...
00120     bool MayThrow;           // The current loop contains an instruction which
00121                              // may throw, thus preventing code motion of
00122                              // instructions with side effects.
00123     DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
00124 
00125     /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
00126     void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
00127                                  Loop *L) override;
00128 
00129     /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
00130     /// set.
00131     void deleteAnalysisValue(Value *V, Loop *L) override;
00132 
00133     /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
00134     /// dominated by the specified block, and that are in the current loop) in
00135     /// reverse depth first order w.r.t the DominatorTree.  This allows us to
00136     /// visit uses before definitions, allowing us to sink a loop body in one
00137     /// pass without iteration.
00138     ///
00139     void SinkRegion(DomTreeNode *N);
00140 
00141     /// HoistRegion - Walk the specified region of the CFG (defined by all
00142     /// blocks dominated by the specified block, and that are in the current
00143     /// loop) in depth first order w.r.t the DominatorTree.  This allows us to
00144     /// visit definitions before uses, allowing us to hoist a loop body in one
00145     /// pass without iteration.
00146     ///
00147     void HoistRegion(DomTreeNode *N);
00148 
00149     /// inSubLoop - Little predicate that returns true if the specified basic
00150     /// block is in a subloop of the current one, not the current one itself.
00151     ///
00152     bool inSubLoop(BasicBlock *BB) {
00153       assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
00154       return LI->getLoopFor(BB) != CurLoop;
00155     }
00156 
00157     /// sink - When an instruction is found to only be used outside of the loop,
00158     /// this function moves it to the exit blocks and patches up SSA form as
00159     /// needed.
00160     ///
00161     void sink(Instruction &I);
00162 
00163     /// hoist - When an instruction is found to only use loop invariant operands
00164     /// that is safe to hoist, this instruction is called to do the dirty work.
00165     ///
00166     void hoist(Instruction &I);
00167 
00168     /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
00169     /// is not a trapping instruction or if it is a trapping instruction and is
00170     /// guaranteed to execute.
00171     ///
00172     bool isSafeToExecuteUnconditionally(Instruction &I);
00173 
00174     /// isGuaranteedToExecute - Check that the instruction is guaranteed to
00175     /// execute.
00176     ///
00177     bool isGuaranteedToExecute(Instruction &I);
00178 
00179     /// pointerInvalidatedByLoop - Return true if the body of this loop may
00180     /// store into the memory location pointed to by V.
00181     ///
00182     bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
00183                                   const AAMDNodes &AAInfo) {
00184       // Check to see if any of the basic blocks in CurLoop invalidate *V.
00185       return CurAST->getAliasSetForPointer(V, Size, AAInfo).isMod();
00186     }
00187 
00188     bool canSinkOrHoistInst(Instruction &I);
00189     bool isNotUsedInLoop(Instruction &I);
00190 
00191     void PromoteAliasSet(AliasSet &AS,
00192                          SmallVectorImpl<BasicBlock*> &ExitBlocks,
00193                          SmallVectorImpl<Instruction*> &InsertPts,
00194                          PredIteratorCache &PIC);
00195 
00196     /// \brief Create a copy of the instruction in the exit block and patch up
00197     /// SSA.
00198     /// PN is a user of I in ExitBlock that can be used to get the number and
00199     /// list of predecessors fast.
00200     Instruction *CloneInstructionInExitBlock(Instruction &I,
00201                                              BasicBlock &ExitBlock,
00202                                              PHINode &PN);
00203   };
00204 }
00205 
00206 char LICM::ID = 0;
00207 INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
00208 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
00209 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
00210 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
00211 INITIALIZE_PASS_DEPENDENCY(LCSSA)
00212 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
00213 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
00214 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
00215 INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
00216 
00217 Pass *llvm::createLICMPass() { return new LICM(); }
00218 
00219 /// Hoist expressions out of the specified loop. Note, alias info for inner
00220 /// loop is not preserved so it is not a good idea to run LICM multiple
00221 /// times on one loop.
00222 ///
00223 bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
00224   if (skipOptnoneFunction(L))
00225     return false;
00226 
00227   Changed = false;
00228 
00229   // Get our Loop and Alias Analysis information...
00230   LI = &getAnalysis<LoopInfo>();
00231   AA = &getAnalysis<AliasAnalysis>();
00232   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
00233 
00234   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
00235   DL = DLP ? &DLP->getDataLayout() : nullptr;
00236   TLI = &getAnalysis<TargetLibraryInfo>();
00237 
00238   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
00239 
00240   CurAST = new AliasSetTracker(*AA);
00241   // Collect Alias info from subloops.
00242   for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
00243        LoopItr != LoopItrE; ++LoopItr) {
00244     Loop *InnerL = *LoopItr;
00245     AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
00246     assert(InnerAST && "Where is my AST?");
00247 
00248     // What if InnerLoop was modified by other passes ?
00249     CurAST->add(*InnerAST);
00250 
00251     // Once we've incorporated the inner loop's AST into ours, we don't need the
00252     // subloop's anymore.
00253     delete InnerAST;
00254     LoopToAliasSetMap.erase(InnerL);
00255   }
00256 
00257   CurLoop = L;
00258 
00259   // Get the preheader block to move instructions into...
00260   Preheader = L->getLoopPreheader();
00261 
00262   // Loop over the body of this loop, looking for calls, invokes, and stores.
00263   // Because subloops have already been incorporated into AST, we skip blocks in
00264   // subloops.
00265   //
00266   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
00267        I != E; ++I) {
00268     BasicBlock *BB = *I;
00269     if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
00270       CurAST->add(*BB);                 // Incorporate the specified basic block
00271   }
00272 
00273   MayThrow = false;
00274   // TODO: We've already searched for instructions which may throw in subloops.
00275   // We may want to reuse this information.
00276   for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
00277        (BB != BBE) && !MayThrow ; ++BB)
00278     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
00279          (I != E) && !MayThrow; ++I)
00280       MayThrow |= I->mayThrow();
00281 
00282   // We want to visit all of the instructions in this loop... that are not parts
00283   // of our subloops (they have already had their invariants hoisted out of
00284   // their loop, into this loop, so there is no need to process the BODIES of
00285   // the subloops).
00286   //
00287   // Traverse the body of the loop in depth first order on the dominator tree so
00288   // that we are guaranteed to see definitions before we see uses.  This allows
00289   // us to sink instructions in one pass, without iteration.  After sinking
00290   // instructions, we perform another pass to hoist them out of the loop.
00291   //
00292   if (L->hasDedicatedExits())
00293     SinkRegion(DT->getNode(L->getHeader()));
00294   if (Preheader)
00295     HoistRegion(DT->getNode(L->getHeader()));
00296 
00297   // Now that all loop invariants have been removed from the loop, promote any
00298   // memory references to scalars that we can.
00299   if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) {
00300     SmallVector<BasicBlock *, 8> ExitBlocks;
00301     SmallVector<Instruction *, 8> InsertPts;
00302     PredIteratorCache PIC;
00303 
00304     // Loop over all of the alias sets in the tracker object.
00305     for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
00306          I != E; ++I)
00307       PromoteAliasSet(*I, ExitBlocks, InsertPts, PIC);
00308 
00309     // Once we have promoted values across the loop body we have to recursively
00310     // reform LCSSA as any nested loop may now have values defined within the
00311     // loop used in the outer loop.
00312     // FIXME: This is really heavy handed. It would be a bit better to use an
00313     // SSAUpdater strategy during promotion that was LCSSA aware and reformed
00314     // it as it went.
00315     if (Changed)
00316       formLCSSARecursively(*L, *DT, getAnalysisIfAvailable<ScalarEvolution>());
00317   }
00318 
00319   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
00320   // specifically moving instructions across the loop boundary and so it is
00321   // especially in need of sanity checking here.
00322   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
00323   assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
00324          "Parent loop not left in LCSSA form after LICM!");
00325 
00326   // Clear out loops state information for the next iteration
00327   CurLoop = nullptr;
00328   Preheader = nullptr;
00329 
00330   // If this loop is nested inside of another one, save the alias information
00331   // for when we process the outer loop.
00332   if (L->getParentLoop())
00333     LoopToAliasSetMap[L] = CurAST;
00334   else
00335     delete CurAST;
00336   return Changed;
00337 }
00338 
00339 /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
00340 /// dominated by the specified block, and that are in the current loop) in
00341 /// reverse depth first order w.r.t the DominatorTree.  This allows us to visit
00342 /// uses before definitions, allowing us to sink a loop body in one pass without
00343 /// iteration.
00344 ///
00345 void LICM::SinkRegion(DomTreeNode *N) {
00346   assert(N != nullptr && "Null dominator tree node?");
00347   BasicBlock *BB = N->getBlock();
00348 
00349   // If this subregion is not in the top level loop at all, exit.
00350   if (!CurLoop->contains(BB)) return;
00351 
00352   // We are processing blocks in reverse dfo, so process children first.
00353   const std::vector<DomTreeNode*> &Children = N->getChildren();
00354   for (unsigned i = 0, e = Children.size(); i != e; ++i)
00355     SinkRegion(Children[i]);
00356 
00357   // Only need to process the contents of this block if it is not part of a
00358   // subloop (which would already have been processed).
00359   if (inSubLoop(BB)) return;
00360 
00361   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
00362     Instruction &I = *--II;
00363 
00364     // If the instruction is dead, we would try to sink it because it isn't used
00365     // in the loop, instead, just delete it.
00366     if (isInstructionTriviallyDead(&I, TLI)) {
00367       DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
00368       ++II;
00369       CurAST->deleteValue(&I);
00370       I.eraseFromParent();
00371       Changed = true;
00372       continue;
00373     }
00374 
00375     // Check to see if we can sink this instruction to the exit blocks
00376     // of the loop.  We can do this if the all users of the instruction are
00377     // outside of the loop.  In this case, it doesn't even matter if the
00378     // operands of the instruction are loop invariant.
00379     //
00380     if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
00381       ++II;
00382       sink(I);
00383     }
00384   }
00385 }
00386 
00387 /// HoistRegion - Walk the specified region of the CFG (defined by all blocks
00388 /// dominated by the specified block, and that are in the current loop) in depth
00389 /// first order w.r.t the DominatorTree.  This allows us to visit definitions
00390 /// before uses, allowing us to hoist a loop body in one pass without iteration.
00391 ///
00392 void LICM::HoistRegion(DomTreeNode *N) {
00393   assert(N != nullptr && "Null dominator tree node?");
00394   BasicBlock *BB = N->getBlock();
00395 
00396   // If this subregion is not in the top level loop at all, exit.
00397   if (!CurLoop->contains(BB)) return;
00398 
00399   // Only need to process the contents of this block if it is not part of a
00400   // subloop (which would already have been processed).
00401   if (!inSubLoop(BB))
00402     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
00403       Instruction &I = *II++;
00404 
00405       // Try constant folding this instruction.  If all the operands are
00406       // constants, it is technically hoistable, but it would be better to just
00407       // fold it.
00408       if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) {
00409         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
00410         CurAST->copyValue(&I, C);
00411         CurAST->deleteValue(&I);
00412         I.replaceAllUsesWith(C);
00413         I.eraseFromParent();
00414         continue;
00415       }
00416 
00417       // Try hoisting the instruction out to the preheader.  We can only do this
00418       // if all of the operands of the instruction are loop invariant and if it
00419       // is safe to hoist the instruction.
00420       //
00421       if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
00422           isSafeToExecuteUnconditionally(I))
00423         hoist(I);
00424     }
00425 
00426   const std::vector<DomTreeNode*> &Children = N->getChildren();
00427   for (unsigned i = 0, e = Children.size(); i != e; ++i)
00428     HoistRegion(Children[i]);
00429 }
00430 
00431 /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
00432 /// instruction.
00433 ///
00434 bool LICM::canSinkOrHoistInst(Instruction &I) {
00435   // Loads have extra constraints we have to verify before we can hoist them.
00436   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
00437     if (!LI->isUnordered())
00438       return false;        // Don't hoist volatile/atomic loads!
00439 
00440     // Loads from constant memory are always safe to move, even if they end up
00441     // in the same alias set as something that ends up being modified.
00442     if (AA->pointsToConstantMemory(LI->getOperand(0)))
00443       return true;
00444     if (LI->getMetadata("invariant.load"))
00445       return true;
00446 
00447     // Don't hoist loads which have may-aliased stores in loop.
00448     uint64_t Size = 0;
00449     if (LI->getType()->isSized())
00450       Size = AA->getTypeStoreSize(LI->getType());
00451 
00452     AAMDNodes AAInfo;
00453     LI->getAAMetadata(AAInfo);
00454 
00455     return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo);
00456   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
00457     // Don't sink or hoist dbg info; it's legal, but not useful.
00458     if (isa<DbgInfoIntrinsic>(I))
00459       return false;
00460 
00461     // Handle simple cases by querying alias analysis.
00462     AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
00463     if (Behavior == AliasAnalysis::DoesNotAccessMemory)
00464       return true;
00465     if (AliasAnalysis::onlyReadsMemory(Behavior)) {
00466       // If this call only reads from memory and there are no writes to memory
00467       // in the loop, we can hoist or sink the call as appropriate.
00468       bool FoundMod = false;
00469       for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
00470            I != E; ++I) {
00471         AliasSet &AS = *I;
00472         if (!AS.isForwardingAliasSet() && AS.isMod()) {
00473           FoundMod = true;
00474           break;
00475         }
00476       }
00477       if (!FoundMod) return true;
00478     }
00479 
00480     // FIXME: This should use mod/ref information to see if we can hoist or
00481     // sink the call.
00482 
00483     return false;
00484   }
00485 
00486   // Only these instructions are hoistable/sinkable.
00487   if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
00488       !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
00489       !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
00490       !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
00491       !isa<InsertValueInst>(I))
00492     return false;
00493 
00494   return isSafeToExecuteUnconditionally(I);
00495 }
00496 
00497 /// \brief Returns true if a PHINode is a trivially replaceable with an
00498 /// Instruction.
00499 ///
00500 /// This is true when all incoming values are that instruction. This pattern
00501 /// occurs most often with LCSSA PHI nodes.
00502 static bool isTriviallyReplacablePHI(PHINode &PN, Instruction &I) {
00503   for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
00504     if (PN.getIncomingValue(i) != &I)
00505       return false;
00506 
00507   return true;
00508 }
00509 
00510 /// isNotUsedInLoop - Return true if the only users of this instruction are
00511 /// outside of the loop.  If this is true, we can sink the instruction to the
00512 /// exit blocks of the loop.
00513 ///
00514 bool LICM::isNotUsedInLoop(Instruction &I) {
00515   for (User *U : I.users()) {
00516     Instruction *UI = cast<Instruction>(U);
00517     if (PHINode *PN = dyn_cast<PHINode>(UI)) {
00518       // A PHI node where all of the incoming values are this instruction are
00519       // special -- they can just be RAUW'ed with the instruction and thus
00520       // don't require a use in the predecessor. This is a particular important
00521       // special case because it is the pattern found in LCSSA form.
00522       if (isTriviallyReplacablePHI(*PN, I)) {
00523         if (CurLoop->contains(PN))
00524           return false;
00525         else
00526           continue;
00527       }
00528 
00529       // Otherwise, PHI node uses occur in predecessor blocks if the incoming
00530       // values. Check for such a use being inside the loop.
00531       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
00532         if (PN->getIncomingValue(i) == &I)
00533           if (CurLoop->contains(PN->getIncomingBlock(i)))
00534             return false;
00535 
00536       continue;
00537     }
00538 
00539     if (CurLoop->contains(UI))
00540       return false;
00541   }
00542   return true;
00543 }
00544 
00545 Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
00546                                                BasicBlock &ExitBlock,
00547                                                PHINode &PN) {
00548   Instruction *New = I.clone();
00549   ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
00550   if (!I.getName().empty()) New->setName(I.getName() + ".le");
00551 
00552   // Build LCSSA PHI nodes for any in-loop operands. Note that this is
00553   // particularly cheap because we can rip off the PHI node that we're
00554   // replacing for the number and blocks of the predecessors.
00555   // OPT: If this shows up in a profile, we can instead finish sinking all
00556   // invariant instructions, and then walk their operands to re-establish
00557   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
00558   // sinking bottom-up.
00559   for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
00560        ++OI)
00561     if (Instruction *OInst = dyn_cast<Instruction>(*OI))
00562       if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
00563         if (!OLoop->contains(&PN)) {
00564           PHINode *OpPN =
00565               PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
00566                               OInst->getName() + ".lcssa", ExitBlock.begin());
00567           for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
00568             OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
00569           *OI = OpPN;
00570         }
00571   return New;
00572 }
00573 
00574 /// sink - When an instruction is found to only be used outside of the loop,
00575 /// this function moves it to the exit blocks and patches up SSA form as needed.
00576 /// This method is guaranteed to remove the original instruction from its
00577 /// position, and may either delete it or move it to outside of the loop.
00578 ///
00579 void LICM::sink(Instruction &I) {
00580   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
00581 
00582   if (isa<LoadInst>(I)) ++NumMovedLoads;
00583   else if (isa<CallInst>(I)) ++NumMovedCalls;
00584   ++NumSunk;
00585   Changed = true;
00586 
00587 #ifndef NDEBUG
00588   SmallVector<BasicBlock *, 32> ExitBlocks;
00589   CurLoop->getUniqueExitBlocks(ExitBlocks);
00590   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
00591 #endif
00592 
00593   // Clones of this instruction. Don't create more than one per exit block!
00594   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
00595 
00596   // If this instruction is only used outside of the loop, then all users are
00597   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
00598   // the instruction.
00599   while (!I.use_empty()) {
00600     Instruction *User = I.user_back();
00601     if (!DT->isReachableFromEntry(User->getParent())) {
00602       User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
00603       continue;
00604     }
00605     // The user must be a PHI node.
00606     PHINode *PN = cast<PHINode>(User);
00607 
00608     BasicBlock *ExitBlock = PN->getParent();
00609     assert(ExitBlockSet.count(ExitBlock) &&
00610            "The LCSSA PHI is not in an exit block!");
00611 
00612     Instruction *New;
00613     auto It = SunkCopies.find(ExitBlock);
00614     if (It != SunkCopies.end())
00615       New = It->second;
00616     else
00617       New = SunkCopies[ExitBlock] =
00618           CloneInstructionInExitBlock(I, *ExitBlock, *PN);
00619 
00620     PN->replaceAllUsesWith(New);
00621     PN->eraseFromParent();
00622   }
00623 
00624   CurAST->deleteValue(&I);
00625   I.eraseFromParent();
00626 }
00627 
00628 /// hoist - When an instruction is found to only use loop invariant operands
00629 /// that is safe to hoist, this instruction is called to do the dirty work.
00630 ///
00631 void LICM::hoist(Instruction &I) {
00632   DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
00633         << I << "\n");
00634 
00635   // Move the new node to the Preheader, before its terminator.
00636   I.moveBefore(Preheader->getTerminator());
00637 
00638   if (isa<LoadInst>(I)) ++NumMovedLoads;
00639   else if (isa<CallInst>(I)) ++NumMovedCalls;
00640   ++NumHoisted;
00641   Changed = true;
00642 }
00643 
00644 /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
00645 /// not a trapping instruction or if it is a trapping instruction and is
00646 /// guaranteed to execute.
00647 ///
00648 bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
00649   // If it is not a trapping instruction, it is always safe to hoist.
00650   if (isSafeToSpeculativelyExecute(&Inst, DL))
00651     return true;
00652 
00653   return isGuaranteedToExecute(Inst);
00654 }
00655 
00656 bool LICM::isGuaranteedToExecute(Instruction &Inst) {
00657 
00658   // Somewhere in this loop there is an instruction which may throw and make us
00659   // exit the loop.
00660   if (MayThrow)
00661     return false;
00662 
00663   // Otherwise we have to check to make sure that the instruction dominates all
00664   // of the exit blocks.  If it doesn't, then there is a path out of the loop
00665   // which does not execute this instruction, so we can't hoist it.
00666 
00667   // If the instruction is in the header block for the loop (which is very
00668   // common), it is always guaranteed to dominate the exit blocks.  Since this
00669   // is a common case, and can save some work, check it now.
00670   if (Inst.getParent() == CurLoop->getHeader())
00671     return true;
00672 
00673   // Get the exit blocks for the current loop.
00674   SmallVector<BasicBlock*, 8> ExitBlocks;
00675   CurLoop->getExitBlocks(ExitBlocks);
00676 
00677   // Verify that the block dominates each of the exit blocks of the loop.
00678   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
00679     if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
00680       return false;
00681 
00682   // As a degenerate case, if the loop is statically infinite then we haven't
00683   // proven anything since there are no exit blocks.
00684   if (ExitBlocks.empty())
00685     return false;
00686 
00687   return true;
00688 }
00689 
00690 namespace {
00691   class LoopPromoter : public LoadAndStorePromoter {
00692     Value *SomePtr;  // Designated pointer to store to.
00693     SmallPtrSetImpl<Value*> &PointerMustAliases;
00694     SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
00695     SmallVectorImpl<Instruction*> &LoopInsertPts;
00696     PredIteratorCache &PredCache;
00697     AliasSetTracker &AST;
00698     LoopInfo &LI;
00699     DebugLoc DL;
00700     int Alignment;
00701     AAMDNodes AATags;
00702 
00703     Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
00704       if (Instruction *I = dyn_cast<Instruction>(V))
00705         if (Loop *L = LI.getLoopFor(I->getParent()))
00706           if (!L->contains(BB)) {
00707             // We need to create an LCSSA PHI node for the incoming value and
00708             // store that.
00709             PHINode *PN = PHINode::Create(
00710                 I->getType(), PredCache.GetNumPreds(BB),
00711                 I->getName() + ".lcssa", BB->begin());
00712             for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI)
00713               PN->addIncoming(I, *PI);
00714             return PN;
00715           }
00716       return V;
00717     }
00718 
00719   public:
00720     LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
00721                  SSAUpdater &S, SmallPtrSetImpl<Value *> &PMA,
00722                  SmallVectorImpl<BasicBlock *> &LEB,
00723                  SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
00724                  AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
00725                  const AAMDNodes &AATags)
00726         : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
00727           LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
00728           LI(li), DL(dl), Alignment(alignment), AATags(AATags) {}
00729 
00730     bool isInstInList(Instruction *I,
00731                       const SmallVectorImpl<Instruction*> &) const override {
00732       Value *Ptr;
00733       if (LoadInst *LI = dyn_cast<LoadInst>(I))
00734         Ptr = LI->getOperand(0);
00735       else
00736         Ptr = cast<StoreInst>(I)->getPointerOperand();
00737       return PointerMustAliases.count(Ptr);
00738     }
00739 
00740     void doExtraRewritesBeforeFinalDeletion() const override {
00741       // Insert stores after in the loop exit blocks.  Each exit block gets a
00742       // store of the live-out values that feed them.  Since we've already told
00743       // the SSA updater about the defs in the loop and the preheader
00744       // definition, it is all set and we can start using it.
00745       for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
00746         BasicBlock *ExitBlock = LoopExitBlocks[i];
00747         Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
00748         LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
00749         Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
00750         Instruction *InsertPos = LoopInsertPts[i];
00751         StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
00752         NewSI->setAlignment(Alignment);
00753         NewSI->setDebugLoc(DL);
00754         if (AATags) NewSI->setAAMetadata(AATags);
00755       }
00756     }
00757 
00758     void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
00759       // Update alias analysis.
00760       AST.copyValue(LI, V);
00761     }
00762     void instructionDeleted(Instruction *I) const override {
00763       AST.deleteValue(I);
00764     }
00765   };
00766 } // end anon namespace
00767 
00768 /// PromoteAliasSet - Try to promote memory values to scalars by sinking
00769 /// stores out of the loop and moving loads to before the loop.  We do this by
00770 /// looping over the stores in the loop, looking for stores to Must pointers
00771 /// which are loop invariant.
00772 ///
00773 void LICM::PromoteAliasSet(AliasSet &AS,
00774                            SmallVectorImpl<BasicBlock*> &ExitBlocks,
00775                            SmallVectorImpl<Instruction*> &InsertPts,
00776                            PredIteratorCache &PIC) {
00777   // We can promote this alias set if it has a store, if it is a "Must" alias
00778   // set, if the pointer is loop invariant, and if we are not eliminating any
00779   // volatile loads or stores.
00780   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
00781       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
00782     return;
00783 
00784   assert(!AS.empty() &&
00785          "Must alias set should have at least one pointer element in it!");
00786   Value *SomePtr = AS.begin()->getValue();
00787 
00788   // It isn't safe to promote a load/store from the loop if the load/store is
00789   // conditional.  For example, turning:
00790   //
00791   //    for () { if (c) *P += 1; }
00792   //
00793   // into:
00794   //
00795   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
00796   //
00797   // is not safe, because *P may only be valid to access if 'c' is true.
00798   //
00799   // It is safe to promote P if all uses are direct load/stores and if at
00800   // least one is guaranteed to be executed.
00801   bool GuaranteedToExecute = false;
00802 
00803   SmallVector<Instruction*, 64> LoopUses;
00804   SmallPtrSet<Value*, 4> PointerMustAliases;
00805 
00806   // We start with an alignment of one and try to find instructions that allow
00807   // us to prove better alignment.
00808   unsigned Alignment = 1;
00809   AAMDNodes AATags;
00810 
00811   // Check that all of the pointers in the alias set have the same type.  We
00812   // cannot (yet) promote a memory location that is loaded and stored in
00813   // different sizes.  While we are at it, collect alignment and AA info.
00814   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
00815     Value *ASIV = ASI->getValue();
00816     PointerMustAliases.insert(ASIV);
00817 
00818     // Check that all of the pointers in the alias set have the same type.  We
00819     // cannot (yet) promote a memory location that is loaded and stored in
00820     // different sizes.
00821     if (SomePtr->getType() != ASIV->getType())
00822       return;
00823 
00824     for (User *U : ASIV->users()) {
00825       // Ignore instructions that are outside the loop.
00826       Instruction *UI = dyn_cast<Instruction>(U);
00827       if (!UI || !CurLoop->contains(UI))
00828         continue;
00829 
00830       // If there is an non-load/store instruction in the loop, we can't promote
00831       // it.
00832       if (LoadInst *load = dyn_cast<LoadInst>(UI)) {
00833         assert(!load->isVolatile() && "AST broken");
00834         if (!load->isSimple())
00835           return;
00836       } else if (StoreInst *store = dyn_cast<StoreInst>(UI)) {
00837         // Stores *of* the pointer are not interesting, only stores *to* the
00838         // pointer.
00839         if (UI->getOperand(1) != ASIV)
00840           continue;
00841         assert(!store->isVolatile() && "AST broken");
00842         if (!store->isSimple())
00843           return;
00844 
00845         // Note that we only check GuaranteedToExecute inside the store case
00846         // so that we do not introduce stores where they did not exist before
00847         // (which would break the LLVM concurrency model).
00848 
00849         // If the alignment of this instruction allows us to specify a more
00850         // restrictive (and performant) alignment and if we are sure this
00851         // instruction will be executed, update the alignment.
00852         // Larger is better, with the exception of 0 being the best alignment.
00853         unsigned InstAlignment = store->getAlignment();
00854         if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
00855           if (isGuaranteedToExecute(*UI)) {
00856             GuaranteedToExecute = true;
00857             Alignment = InstAlignment;
00858           }
00859 
00860         if (!GuaranteedToExecute)
00861           GuaranteedToExecute = isGuaranteedToExecute(*UI);
00862 
00863       } else
00864         return; // Not a load or store.
00865 
00866       // Merge the AA tags.
00867       if (LoopUses.empty()) {
00868         // On the first load/store, just take its AA tags.
00869         UI->getAAMetadata(AATags);
00870       } else if (AATags) {
00871         UI->getAAMetadata(AATags, /* Merge = */ true);
00872       }
00873 
00874       LoopUses.push_back(UI);
00875     }
00876   }
00877 
00878   // If there isn't a guaranteed-to-execute instruction, we can't promote.
00879   if (!GuaranteedToExecute)
00880     return;
00881 
00882   // Otherwise, this is safe to promote, lets do it!
00883   DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
00884   Changed = true;
00885   ++NumPromoted;
00886 
00887   // Grab a debug location for the inserted loads/stores; given that the
00888   // inserted loads/stores have little relation to the original loads/stores,
00889   // this code just arbitrarily picks a location from one, since any debug
00890   // location is better than none.
00891   DebugLoc DL = LoopUses[0]->getDebugLoc();
00892 
00893   // Figure out the loop exits and their insertion points, if this is the
00894   // first promotion.
00895   if (ExitBlocks.empty()) {
00896     CurLoop->getUniqueExitBlocks(ExitBlocks);
00897     InsertPts.resize(ExitBlocks.size());
00898     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
00899       InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
00900   }
00901 
00902   // We use the SSAUpdater interface to insert phi nodes as required.
00903   SmallVector<PHINode*, 16> NewPHIs;
00904   SSAUpdater SSA(&NewPHIs);
00905   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
00906                         InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
00907 
00908   // Set up the preheader to have a definition of the value.  It is the live-out
00909   // value from the preheader that uses in the loop will use.
00910   LoadInst *PreheaderLoad =
00911     new LoadInst(SomePtr, SomePtr->getName()+".promoted",
00912                  Preheader->getTerminator());
00913   PreheaderLoad->setAlignment(Alignment);
00914   PreheaderLoad->setDebugLoc(DL);
00915   if (AATags) PreheaderLoad->setAAMetadata(AATags);
00916   SSA.AddAvailableValue(Preheader, PreheaderLoad);
00917 
00918   // Rewrite all the loads in the loop and remember all the definitions from
00919   // stores in the loop.
00920   Promoter.run(LoopUses);
00921 
00922   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
00923   if (PreheaderLoad->use_empty())
00924     PreheaderLoad->eraseFromParent();
00925 }
00926 
00927 
00928 /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
00929 void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
00930   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
00931   if (!AST)
00932     return;
00933 
00934   AST->copyValue(From, To);
00935 }
00936 
00937 /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
00938 /// set.
00939 void LICM::deleteAnalysisValue(Value *V, Loop *L) {
00940   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
00941   if (!AST)
00942     return;
00943 
00944   AST->deleteValue(V);
00945 }