LLVM API Documentation
00001 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements some loop unrolling utilities for loops with run-time 00011 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time 00012 // trip counts. 00013 // 00014 // The functions in this file are used to generate extra code when the 00015 // run-time trip count modulo the unroll factor is not 0. When this is the 00016 // case, we need to generate code to execute these 'left over' iterations. 00017 // 00018 // The current strategy generates an if-then-else sequence prior to the 00019 // unrolled loop to execute the 'left over' iterations. Other strategies 00020 // include generate a loop before or after the unrolled loop. 00021 // 00022 //===----------------------------------------------------------------------===// 00023 00024 #include "llvm/Transforms/Utils/UnrollLoop.h" 00025 #include "llvm/ADT/Statistic.h" 00026 #include "llvm/Analysis/LoopIterator.h" 00027 #include "llvm/Analysis/LoopPass.h" 00028 #include "llvm/Analysis/ScalarEvolution.h" 00029 #include "llvm/Analysis/ScalarEvolutionExpander.h" 00030 #include "llvm/IR/BasicBlock.h" 00031 #include "llvm/Support/Debug.h" 00032 #include "llvm/Support/raw_ostream.h" 00033 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00034 #include "llvm/Transforms/Utils/Cloning.h" 00035 #include <algorithm> 00036 00037 using namespace llvm; 00038 00039 #define DEBUG_TYPE "loop-unroll" 00040 00041 STATISTIC(NumRuntimeUnrolled, 00042 "Number of loops unrolled with run-time trip counts"); 00043 00044 /// Connect the unrolling prolog code to the original loop. 00045 /// The unrolling prolog code contains code to execute the 00046 /// 'extra' iterations if the run-time trip count modulo the 00047 /// unroll count is non-zero. 00048 /// 00049 /// This function performs the following: 00050 /// - Create PHI nodes at prolog end block to combine values 00051 /// that exit the prolog code and jump around the prolog. 00052 /// - Add a PHI operand to a PHI node at the loop exit block 00053 /// for values that exit the prolog and go around the loop. 00054 /// - Branch around the original loop if the trip count is less 00055 /// than the unroll factor. 00056 /// 00057 static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, 00058 BasicBlock *LastPrologBB, BasicBlock *PrologEnd, 00059 BasicBlock *OrigPH, BasicBlock *NewPH, 00060 ValueToValueMapTy &LVMap, Pass *P) { 00061 BasicBlock *Latch = L->getLoopLatch(); 00062 assert(Latch && "Loop must have a latch"); 00063 00064 // Create a PHI node for each outgoing value from the original loop 00065 // (which means it is an outgoing value from the prolog code too). 00066 // The new PHI node is inserted in the prolog end basic block. 00067 // The new PHI name is added as an operand of a PHI node in either 00068 // the loop header or the loop exit block. 00069 for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); 00070 SBI != SBE; ++SBI) { 00071 for (BasicBlock::iterator BBI = (*SBI)->begin(); 00072 PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { 00073 00074 // Add a new PHI node to the prolog end block and add the 00075 // appropriate incoming values. 00076 PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", 00077 PrologEnd->getTerminator()); 00078 // Adding a value to the new PHI node from the original loop preheader. 00079 // This is the value that skips all the prolog code. 00080 if (L->contains(PN)) { 00081 NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); 00082 } else { 00083 NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); 00084 } 00085 00086 Value *V = PN->getIncomingValueForBlock(Latch); 00087 if (Instruction *I = dyn_cast<Instruction>(V)) { 00088 if (L->contains(I)) { 00089 V = LVMap[I]; 00090 } 00091 } 00092 // Adding a value to the new PHI node from the last prolog block 00093 // that was created. 00094 NewPN->addIncoming(V, LastPrologBB); 00095 00096 // Update the existing PHI node operand with the value from the 00097 // new PHI node. How this is done depends on if the existing 00098 // PHI node is in the original loop block, or the exit block. 00099 if (L->contains(PN)) { 00100 PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); 00101 } else { 00102 PN->addIncoming(NewPN, PrologEnd); 00103 } 00104 } 00105 } 00106 00107 // Create a branch around the orignal loop, which is taken if the 00108 // trip count is less than the unroll factor. 00109 Instruction *InsertPt = PrologEnd->getTerminator(); 00110 Instruction *BrLoopExit = 00111 new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, 00112 ConstantInt::get(TripCount->getType(), Count)); 00113 BasicBlock *Exit = L->getUniqueExitBlock(); 00114 assert(Exit && "Loop must have a single exit block only"); 00115 // Split the exit to maintain loop canonicalization guarantees 00116 SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); 00117 if (!Exit->isLandingPad()) { 00118 SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); 00119 } else { 00120 SmallVector<BasicBlock*, 2> NewBBs; 00121 SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", 00122 P, NewBBs); 00123 } 00124 // Add the branch to the exit block (around the unrolled loop) 00125 BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); 00126 InsertPt->eraseFromParent(); 00127 } 00128 00129 /// Create a clone of the blocks in a loop and connect them together. 00130 /// This function doesn't create a clone of the loop structure. 00131 /// 00132 /// There are two value maps that are defined and used. VMap is 00133 /// for the values in the current loop instance. LVMap contains 00134 /// the values from the last loop instance. We need the LVMap values 00135 /// to update the initial values for the current loop instance. 00136 /// 00137 static void CloneLoopBlocks(Loop *L, 00138 bool FirstCopy, 00139 BasicBlock *InsertTop, 00140 BasicBlock *InsertBot, 00141 std::vector<BasicBlock *> &NewBlocks, 00142 LoopBlocksDFS &LoopBlocks, 00143 ValueToValueMapTy &VMap, 00144 ValueToValueMapTy &LVMap, 00145 LoopInfo *LI) { 00146 00147 BasicBlock *Preheader = L->getLoopPreheader(); 00148 BasicBlock *Header = L->getHeader(); 00149 BasicBlock *Latch = L->getLoopLatch(); 00150 Function *F = Header->getParent(); 00151 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); 00152 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); 00153 // For each block in the original loop, create a new copy, 00154 // and update the value map with the newly created values. 00155 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { 00156 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); 00157 NewBlocks.push_back(NewBB); 00158 00159 if (Loop *ParentLoop = L->getParentLoop()) 00160 ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); 00161 00162 VMap[*BB] = NewBB; 00163 if (Header == *BB) { 00164 // For the first block, add a CFG connection to this newly 00165 // created block 00166 InsertTop->getTerminator()->setSuccessor(0, NewBB); 00167 00168 // Change the incoming values to the ones defined in the 00169 // previously cloned loop. 00170 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 00171 PHINode *NewPHI = cast<PHINode>(VMap[I]); 00172 if (FirstCopy) { 00173 // We replace the first phi node with the value from the preheader 00174 VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); 00175 NewBB->getInstList().erase(NewPHI); 00176 } else { 00177 // Update VMap with values from the previous block 00178 unsigned idx = NewPHI->getBasicBlockIndex(Latch); 00179 Value *InVal = NewPHI->getIncomingValue(idx); 00180 if (Instruction *I = dyn_cast<Instruction>(InVal)) 00181 if (L->contains(I)) 00182 InVal = LVMap[InVal]; 00183 NewPHI->setIncomingValue(idx, InVal); 00184 NewPHI->setIncomingBlock(idx, InsertTop); 00185 } 00186 } 00187 } 00188 00189 if (Latch == *BB) { 00190 VMap.erase((*BB)->getTerminator()); 00191 NewBB->getTerminator()->eraseFromParent(); 00192 BranchInst::Create(InsertBot, NewBB); 00193 } 00194 } 00195 // LastValueMap is updated with the values for the current loop 00196 // which are used the next time this function is called. 00197 for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); 00198 VI != VE; ++VI) { 00199 LVMap[VI->first] = VI->second; 00200 } 00201 } 00202 00203 /// Insert code in the prolog code when unrolling a loop with a 00204 /// run-time trip-count. 00205 /// 00206 /// This method assumes that the loop unroll factor is total number 00207 /// of loop bodes in the loop after unrolling. (Some folks refer 00208 /// to the unroll factor as the number of *extra* copies added). 00209 /// We assume also that the loop unroll factor is a power-of-two. So, after 00210 /// unrolling the loop, the number of loop bodies executed is 2, 00211 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch 00212 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for 00213 /// the switch instruction is generated. 00214 /// 00215 /// extraiters = tripcount % loopfactor 00216 /// if (extraiters == 0) jump Loop: 00217 /// if (extraiters == loopfactor) jump L1 00218 /// if (extraiters == loopfactor-1) jump L2 00219 /// ... 00220 /// L1: LoopBody; 00221 /// L2: LoopBody; 00222 /// ... 00223 /// if tripcount < loopfactor jump End 00224 /// Loop: 00225 /// ... 00226 /// End: 00227 /// 00228 bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, 00229 LPPassManager *LPM) { 00230 // for now, only unroll loops that contain a single exit 00231 if (!L->getExitingBlock()) 00232 return false; 00233 00234 // Make sure the loop is in canonical form, and there is a single 00235 // exit block only. 00236 if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) 00237 return false; 00238 00239 // Use Scalar Evolution to compute the trip count. This allows more 00240 // loops to be unrolled than relying on induction var simplification 00241 if (!LPM) 00242 return false; 00243 ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); 00244 if (!SE) 00245 return false; 00246 00247 // Only unroll loops with a computable trip count and the trip count needs 00248 // to be an int value (allowing a pointer type is a TODO item) 00249 const SCEV *BECount = SE->getBackedgeTakenCount(L); 00250 if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) 00251 return false; 00252 00253 // Add 1 since the backedge count doesn't include the first loop iteration 00254 const SCEV *TripCountSC = 00255 SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); 00256 if (isa<SCEVCouldNotCompute>(TripCountSC)) 00257 return false; 00258 00259 // We only handle cases when the unroll factor is a power of 2. 00260 // Count is the loop unroll factor, the number of extra copies added + 1. 00261 if ((Count & (Count-1)) != 0) 00262 return false; 00263 00264 // If this loop is nested, then the loop unroller changes the code in 00265 // parent loop, so the Scalar Evolution pass needs to be run again 00266 if (Loop *ParentLoop = L->getParentLoop()) 00267 SE->forgetLoop(ParentLoop); 00268 00269 BasicBlock *PH = L->getLoopPreheader(); 00270 BasicBlock *Header = L->getHeader(); 00271 BasicBlock *Latch = L->getLoopLatch(); 00272 // It helps to splits the original preheader twice, one for the end of the 00273 // prolog code and one for a new loop preheader 00274 BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); 00275 BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); 00276 BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); 00277 00278 // Compute the number of extra iterations required, which is: 00279 // extra iterations = run-time trip count % (loop unroll factor + 1) 00280 SCEVExpander Expander(*SE, "loop-unroll"); 00281 Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), 00282 PreHeaderBR); 00283 00284 IRBuilder<> B(PreHeaderBR); 00285 Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); 00286 00287 // Check if for no extra iterations, then jump to unrolled loop. We have to 00288 // check that the trip count computation didn't overflow when adding one to 00289 // the backedge taken count. 00290 Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); 00291 Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); 00292 Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); 00293 00294 // Branch to either the extra iterations or the unrolled loop 00295 // We will fix up the true branch label when adding loop body copies 00296 BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); 00297 assert(PreHeaderBR->isUnconditional() && 00298 PreHeaderBR->getSuccessor(0) == PEnd && 00299 "CFG edges in Preheader are not correct"); 00300 PreHeaderBR->eraseFromParent(); 00301 00302 ValueToValueMapTy LVMap; 00303 Function *F = Header->getParent(); 00304 // These variables are used to update the CFG links in each iteration 00305 BasicBlock *CompareBB = nullptr; 00306 BasicBlock *LastLoopBB = PH; 00307 // Get an ordered list of blocks in the loop to help with the ordering of the 00308 // cloned blocks in the prolog code 00309 LoopBlocksDFS LoopBlocks(L); 00310 LoopBlocks.perform(LI); 00311 00312 // 00313 // For each extra loop iteration, create a copy of the loop's basic blocks 00314 // and generate a condition that branches to the copy depending on the 00315 // number of 'left over' iterations. 00316 // 00317 for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { 00318 std::vector<BasicBlock*> NewBlocks; 00319 ValueToValueMapTy VMap; 00320 00321 // Clone all the basic blocks in the loop, but we don't clone the loop 00322 // This function adds the appropriate CFG connections. 00323 CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, 00324 LoopBlocks, VMap, LVMap, LI); 00325 LastLoopBB = cast<BasicBlock>(VMap[Latch]); 00326 00327 // Insert the cloned blocks into function just before the original loop 00328 F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), 00329 NewBlocks[0], F->end()); 00330 00331 // Generate the code for the comparison which determines if the loop 00332 // prolog code needs to be executed. 00333 if (leftOverIters == Count-1) { 00334 // There is no compare block for the fall-thru case when for the last 00335 // left over iteration 00336 CompareBB = NewBlocks[0]; 00337 } else { 00338 // Create a new block for the comparison 00339 BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", 00340 F, CompareBB); 00341 if (Loop *ParentLoop = L->getParentLoop()) { 00342 // Add the new block to the parent loop, if needed 00343 ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); 00344 } 00345 00346 // The comparison w/ the extra iteration value and branch 00347 Type *CountTy = TripCount->getType(); 00348 Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, 00349 ConstantInt::get(CountTy, leftOverIters), 00350 "un.tmp"); 00351 // Branch to either the extra iterations or the unrolled loop 00352 BranchInst::Create(NewBlocks[0], CompareBB, 00353 BranchVal, NewBB); 00354 CompareBB = NewBB; 00355 PH->getTerminator()->setSuccessor(0, NewBB); 00356 VMap[NewPH] = CompareBB; 00357 } 00358 00359 // Rewrite the cloned instruction operands to use the values 00360 // created when the clone is created. 00361 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { 00362 for (BasicBlock::iterator I = NewBlocks[i]->begin(), 00363 E = NewBlocks[i]->end(); I != E; ++I) { 00364 RemapInstruction(I, VMap, 00365 RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); 00366 } 00367 } 00368 } 00369 00370 // Connect the prolog code to the original loop and update the 00371 // PHI functions. 00372 ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, 00373 LPM->getAsPass()); 00374 NumRuntimeUnrolled++; 00375 return true; 00376 }