LLVM API Documentation
00001 //===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass implements a simple loop reroller. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "llvm/Transforms/Scalar.h" 00015 #include "llvm/ADT/STLExtras.h" 00016 #include "llvm/ADT/SmallSet.h" 00017 #include "llvm/ADT/Statistic.h" 00018 #include "llvm/Analysis/AliasAnalysis.h" 00019 #include "llvm/Analysis/AliasSetTracker.h" 00020 #include "llvm/Analysis/LoopPass.h" 00021 #include "llvm/Analysis/ScalarEvolution.h" 00022 #include "llvm/Analysis/ScalarEvolutionExpander.h" 00023 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 00024 #include "llvm/Analysis/ValueTracking.h" 00025 #include "llvm/IR/DataLayout.h" 00026 #include "llvm/IR/Dominators.h" 00027 #include "llvm/IR/IntrinsicInst.h" 00028 #include "llvm/Support/CommandLine.h" 00029 #include "llvm/Support/Debug.h" 00030 #include "llvm/Support/raw_ostream.h" 00031 #include "llvm/Target/TargetLibraryInfo.h" 00032 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00033 #include "llvm/Transforms/Utils/Local.h" 00034 #include "llvm/Transforms/Utils/LoopUtils.h" 00035 00036 using namespace llvm; 00037 00038 #define DEBUG_TYPE "loop-reroll" 00039 00040 STATISTIC(NumRerolledLoops, "Number of rerolled loops"); 00041 00042 static cl::opt<unsigned> 00043 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, 00044 cl::desc("The maximum increment for loop rerolling")); 00045 00046 // This loop re-rolling transformation aims to transform loops like this: 00047 // 00048 // int foo(int a); 00049 // void bar(int *x) { 00050 // for (int i = 0; i < 500; i += 3) { 00051 // foo(i); 00052 // foo(i+1); 00053 // foo(i+2); 00054 // } 00055 // } 00056 // 00057 // into a loop like this: 00058 // 00059 // void bar(int *x) { 00060 // for (int i = 0; i < 500; ++i) 00061 // foo(i); 00062 // } 00063 // 00064 // It does this by looking for loops that, besides the latch code, are composed 00065 // of isomorphic DAGs of instructions, with each DAG rooted at some increment 00066 // to the induction variable, and where each DAG is isomorphic to the DAG 00067 // rooted at the induction variable (excepting the sub-DAGs which root the 00068 // other induction-variable increments). In other words, we're looking for loop 00069 // bodies of the form: 00070 // 00071 // %iv = phi [ (preheader, ...), (body, %iv.next) ] 00072 // f(%iv) 00073 // %iv.1 = add %iv, 1 <-- a root increment 00074 // f(%iv.1) 00075 // %iv.2 = add %iv, 2 <-- a root increment 00076 // f(%iv.2) 00077 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment 00078 // f(%iv.scale_m_1) 00079 // ... 00080 // %iv.next = add %iv, scale 00081 // %cmp = icmp(%iv, ...) 00082 // br %cmp, header, exit 00083 // 00084 // where each f(i) is a set of instructions that, collectively, are a function 00085 // only of i (and other loop-invariant values). 00086 // 00087 // As a special case, we can also reroll loops like this: 00088 // 00089 // int foo(int); 00090 // void bar(int *x) { 00091 // for (int i = 0; i < 500; ++i) { 00092 // x[3*i] = foo(0); 00093 // x[3*i+1] = foo(0); 00094 // x[3*i+2] = foo(0); 00095 // } 00096 // } 00097 // 00098 // into this: 00099 // 00100 // void bar(int *x) { 00101 // for (int i = 0; i < 1500; ++i) 00102 // x[i] = foo(0); 00103 // } 00104 // 00105 // in which case, we're looking for inputs like this: 00106 // 00107 // %iv = phi [ (preheader, ...), (body, %iv.next) ] 00108 // %scaled.iv = mul %iv, scale 00109 // f(%scaled.iv) 00110 // %scaled.iv.1 = add %scaled.iv, 1 00111 // f(%scaled.iv.1) 00112 // %scaled.iv.2 = add %scaled.iv, 2 00113 // f(%scaled.iv.2) 00114 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1 00115 // f(%scaled.iv.scale_m_1) 00116 // ... 00117 // %iv.next = add %iv, 1 00118 // %cmp = icmp(%iv, ...) 00119 // br %cmp, header, exit 00120 00121 namespace { 00122 class LoopReroll : public LoopPass { 00123 public: 00124 static char ID; // Pass ID, replacement for typeid 00125 LoopReroll() : LoopPass(ID) { 00126 initializeLoopRerollPass(*PassRegistry::getPassRegistry()); 00127 } 00128 00129 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 00130 00131 void getAnalysisUsage(AnalysisUsage &AU) const override { 00132 AU.addRequired<AliasAnalysis>(); 00133 AU.addRequired<LoopInfo>(); 00134 AU.addPreserved<LoopInfo>(); 00135 AU.addRequired<DominatorTreeWrapperPass>(); 00136 AU.addPreserved<DominatorTreeWrapperPass>(); 00137 AU.addRequired<ScalarEvolution>(); 00138 AU.addRequired<TargetLibraryInfo>(); 00139 } 00140 00141 protected: 00142 AliasAnalysis *AA; 00143 LoopInfo *LI; 00144 ScalarEvolution *SE; 00145 const DataLayout *DL; 00146 TargetLibraryInfo *TLI; 00147 DominatorTree *DT; 00148 00149 typedef SmallVector<Instruction *, 16> SmallInstructionVector; 00150 typedef SmallSet<Instruction *, 16> SmallInstructionSet; 00151 00152 // A chain of isomorphic instructions, indentified by a single-use PHI, 00153 // representing a reduction. Only the last value may be used outside the 00154 // loop. 00155 struct SimpleLoopReduction { 00156 SimpleLoopReduction(Instruction *P, Loop *L) 00157 : Valid(false), Instructions(1, P) { 00158 assert(isa<PHINode>(P) && "First reduction instruction must be a PHI"); 00159 add(L); 00160 } 00161 00162 bool valid() const { 00163 return Valid; 00164 } 00165 00166 Instruction *getPHI() const { 00167 assert(Valid && "Using invalid reduction"); 00168 return Instructions.front(); 00169 } 00170 00171 Instruction *getReducedValue() const { 00172 assert(Valid && "Using invalid reduction"); 00173 return Instructions.back(); 00174 } 00175 00176 Instruction *get(size_t i) const { 00177 assert(Valid && "Using invalid reduction"); 00178 return Instructions[i+1]; 00179 } 00180 00181 Instruction *operator [] (size_t i) const { return get(i); } 00182 00183 // The size, ignoring the initial PHI. 00184 size_t size() const { 00185 assert(Valid && "Using invalid reduction"); 00186 return Instructions.size()-1; 00187 } 00188 00189 typedef SmallInstructionVector::iterator iterator; 00190 typedef SmallInstructionVector::const_iterator const_iterator; 00191 00192 iterator begin() { 00193 assert(Valid && "Using invalid reduction"); 00194 return std::next(Instructions.begin()); 00195 } 00196 00197 const_iterator begin() const { 00198 assert(Valid && "Using invalid reduction"); 00199 return std::next(Instructions.begin()); 00200 } 00201 00202 iterator end() { return Instructions.end(); } 00203 const_iterator end() const { return Instructions.end(); } 00204 00205 protected: 00206 bool Valid; 00207 SmallInstructionVector Instructions; 00208 00209 void add(Loop *L); 00210 }; 00211 00212 // The set of all reductions, and state tracking of possible reductions 00213 // during loop instruction processing. 00214 struct ReductionTracker { 00215 typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector; 00216 00217 // Add a new possible reduction. 00218 void addSLR(SimpleLoopReduction &SLR) { 00219 PossibleReds.push_back(SLR); 00220 } 00221 00222 // Setup to track possible reductions corresponding to the provided 00223 // rerolling scale. Only reductions with a number of non-PHI instructions 00224 // that is divisible by the scale are considered. Three instructions sets 00225 // are filled in: 00226 // - A set of all possible instructions in eligible reductions. 00227 // - A set of all PHIs in eligible reductions 00228 // - A set of all reduced values (last instructions) in eligible reductions. 00229 void restrictToScale(uint64_t Scale, 00230 SmallInstructionSet &PossibleRedSet, 00231 SmallInstructionSet &PossibleRedPHISet, 00232 SmallInstructionSet &PossibleRedLastSet) { 00233 PossibleRedIdx.clear(); 00234 PossibleRedIter.clear(); 00235 Reds.clear(); 00236 00237 for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i) 00238 if (PossibleReds[i].size() % Scale == 0) { 00239 PossibleRedLastSet.insert(PossibleReds[i].getReducedValue()); 00240 PossibleRedPHISet.insert(PossibleReds[i].getPHI()); 00241 00242 PossibleRedSet.insert(PossibleReds[i].getPHI()); 00243 PossibleRedIdx[PossibleReds[i].getPHI()] = i; 00244 for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(), 00245 JE = PossibleReds[i].end(); J != JE; ++J) { 00246 PossibleRedSet.insert(*J); 00247 PossibleRedIdx[*J] = i; 00248 } 00249 } 00250 } 00251 00252 // The functions below are used while processing the loop instructions. 00253 00254 // Are the two instructions both from reductions, and furthermore, from 00255 // the same reduction? 00256 bool isPairInSame(Instruction *J1, Instruction *J2) { 00257 DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1); 00258 if (J1I != PossibleRedIdx.end()) { 00259 DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2); 00260 if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second) 00261 return true; 00262 } 00263 00264 return false; 00265 } 00266 00267 // The two provided instructions, the first from the base iteration, and 00268 // the second from iteration i, form a matched pair. If these are part of 00269 // a reduction, record that fact. 00270 void recordPair(Instruction *J1, Instruction *J2, unsigned i) { 00271 if (PossibleRedIdx.count(J1)) { 00272 assert(PossibleRedIdx.count(J2) && 00273 "Recording reduction vs. non-reduction instruction?"); 00274 00275 PossibleRedIter[J1] = 0; 00276 PossibleRedIter[J2] = i; 00277 00278 int Idx = PossibleRedIdx[J1]; 00279 assert(Idx == PossibleRedIdx[J2] && 00280 "Recording pair from different reductions?"); 00281 Reds.insert(Idx); 00282 } 00283 } 00284 00285 // The functions below can be called after we've finished processing all 00286 // instructions in the loop, and we know which reductions were selected. 00287 00288 // Is the provided instruction the PHI of a reduction selected for 00289 // rerolling? 00290 bool isSelectedPHI(Instruction *J) { 00291 if (!isa<PHINode>(J)) 00292 return false; 00293 00294 for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end(); 00295 RI != RIE; ++RI) { 00296 int i = *RI; 00297 if (cast<Instruction>(J) == PossibleReds[i].getPHI()) 00298 return true; 00299 } 00300 00301 return false; 00302 } 00303 00304 bool validateSelected(); 00305 void replaceSelected(); 00306 00307 protected: 00308 // The vector of all possible reductions (for any scale). 00309 SmallReductionVector PossibleReds; 00310 00311 DenseMap<Instruction *, int> PossibleRedIdx; 00312 DenseMap<Instruction *, int> PossibleRedIter; 00313 DenseSet<int> Reds; 00314 }; 00315 00316 void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); 00317 void collectPossibleReductions(Loop *L, 00318 ReductionTracker &Reductions); 00319 void collectInLoopUserSet(Loop *L, 00320 const SmallInstructionVector &Roots, 00321 const SmallInstructionSet &Exclude, 00322 const SmallInstructionSet &Final, 00323 DenseSet<Instruction *> &Users); 00324 void collectInLoopUserSet(Loop *L, 00325 Instruction * Root, 00326 const SmallInstructionSet &Exclude, 00327 const SmallInstructionSet &Final, 00328 DenseSet<Instruction *> &Users); 00329 bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale, 00330 Instruction *&IV, 00331 SmallInstructionVector &LoopIncs); 00332 bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV, 00333 SmallVector<SmallInstructionVector, 32> &Roots, 00334 SmallInstructionSet &AllRoots, 00335 SmallInstructionVector &LoopIncs); 00336 bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, 00337 ReductionTracker &Reductions); 00338 }; 00339 } 00340 00341 char LoopReroll::ID = 0; 00342 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false) 00343 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 00344 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 00345 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 00346 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 00347 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 00348 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false) 00349 00350 Pass *llvm::createLoopRerollPass() { 00351 return new LoopReroll; 00352 } 00353 00354 // Returns true if the provided instruction is used outside the given loop. 00355 // This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in 00356 // non-loop blocks to be outside the loop. 00357 static bool hasUsesOutsideLoop(Instruction *I, Loop *L) { 00358 for (User *U : I->users()) 00359 if (!L->contains(cast<Instruction>(U))) 00360 return true; 00361 00362 return false; 00363 } 00364 00365 // Collect the list of loop induction variables with respect to which it might 00366 // be possible to reroll the loop. 00367 void LoopReroll::collectPossibleIVs(Loop *L, 00368 SmallInstructionVector &PossibleIVs) { 00369 BasicBlock *Header = L->getHeader(); 00370 for (BasicBlock::iterator I = Header->begin(), 00371 IE = Header->getFirstInsertionPt(); I != IE; ++I) { 00372 if (!isa<PHINode>(I)) 00373 continue; 00374 if (!I->getType()->isIntegerTy()) 00375 continue; 00376 00377 if (const SCEVAddRecExpr *PHISCEV = 00378 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) { 00379 if (PHISCEV->getLoop() != L) 00380 continue; 00381 if (!PHISCEV->isAffine()) 00382 continue; 00383 if (const SCEVConstant *IncSCEV = 00384 dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) { 00385 if (!IncSCEV->getValue()->getValue().isStrictlyPositive()) 00386 continue; 00387 if (IncSCEV->getValue()->uge(MaxInc)) 00388 continue; 00389 00390 DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << 00391 *PHISCEV << "\n"); 00392 PossibleIVs.push_back(I); 00393 } 00394 } 00395 } 00396 } 00397 00398 // Add the remainder of the reduction-variable chain to the instruction vector 00399 // (the initial PHINode has already been added). If successful, the object is 00400 // marked as valid. 00401 void LoopReroll::SimpleLoopReduction::add(Loop *L) { 00402 assert(!Valid && "Cannot add to an already-valid chain"); 00403 00404 // The reduction variable must be a chain of single-use instructions 00405 // (including the PHI), except for the last value (which is used by the PHI 00406 // and also outside the loop). 00407 Instruction *C = Instructions.front(); 00408 00409 do { 00410 C = cast<Instruction>(*C->user_begin()); 00411 if (C->hasOneUse()) { 00412 if (!C->isBinaryOp()) 00413 return; 00414 00415 if (!(isa<PHINode>(Instructions.back()) || 00416 C->isSameOperationAs(Instructions.back()))) 00417 return; 00418 00419 Instructions.push_back(C); 00420 } 00421 } while (C->hasOneUse()); 00422 00423 if (Instructions.size() < 2 || 00424 !C->isSameOperationAs(Instructions.back()) || 00425 C->use_empty()) 00426 return; 00427 00428 // C is now the (potential) last instruction in the reduction chain. 00429 for (User *U : C->users()) 00430 // The only in-loop user can be the initial PHI. 00431 if (L->contains(cast<Instruction>(U))) 00432 if (cast<Instruction>(U) != Instructions.front()) 00433 return; 00434 00435 Instructions.push_back(C); 00436 Valid = true; 00437 } 00438 00439 // Collect the vector of possible reduction variables. 00440 void LoopReroll::collectPossibleReductions(Loop *L, 00441 ReductionTracker &Reductions) { 00442 BasicBlock *Header = L->getHeader(); 00443 for (BasicBlock::iterator I = Header->begin(), 00444 IE = Header->getFirstInsertionPt(); I != IE; ++I) { 00445 if (!isa<PHINode>(I)) 00446 continue; 00447 if (!I->getType()->isSingleValueType()) 00448 continue; 00449 00450 SimpleLoopReduction SLR(I, L); 00451 if (!SLR.valid()) 00452 continue; 00453 00454 DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " << 00455 SLR.size() << " chained instructions)\n"); 00456 Reductions.addSLR(SLR); 00457 } 00458 } 00459 00460 // Collect the set of all users of the provided root instruction. This set of 00461 // users contains not only the direct users of the root instruction, but also 00462 // all users of those users, and so on. There are two exceptions: 00463 // 00464 // 1. Instructions in the set of excluded instructions are never added to the 00465 // use set (even if they are users). This is used, for example, to exclude 00466 // including root increments in the use set of the primary IV. 00467 // 00468 // 2. Instructions in the set of final instructions are added to the use set 00469 // if they are users, but their users are not added. This is used, for 00470 // example, to prevent a reduction update from forcing all later reduction 00471 // updates into the use set. 00472 void LoopReroll::collectInLoopUserSet(Loop *L, 00473 Instruction *Root, const SmallInstructionSet &Exclude, 00474 const SmallInstructionSet &Final, 00475 DenseSet<Instruction *> &Users) { 00476 SmallInstructionVector Queue(1, Root); 00477 while (!Queue.empty()) { 00478 Instruction *I = Queue.pop_back_val(); 00479 if (!Users.insert(I).second) 00480 continue; 00481 00482 if (!Final.count(I)) 00483 for (Use &U : I->uses()) { 00484 Instruction *User = cast<Instruction>(U.getUser()); 00485 if (PHINode *PN = dyn_cast<PHINode>(User)) { 00486 // Ignore "wrap-around" uses to PHIs of this loop's header. 00487 if (PN->getIncomingBlock(U) == L->getHeader()) 00488 continue; 00489 } 00490 00491 if (L->contains(User) && !Exclude.count(User)) { 00492 Queue.push_back(User); 00493 } 00494 } 00495 00496 // We also want to collect single-user "feeder" values. 00497 for (User::op_iterator OI = I->op_begin(), 00498 OIE = I->op_end(); OI != OIE; ++OI) { 00499 if (Instruction *Op = dyn_cast<Instruction>(*OI)) 00500 if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) && 00501 !Final.count(Op)) 00502 Queue.push_back(Op); 00503 } 00504 } 00505 } 00506 00507 // Collect all of the users of all of the provided root instructions (combined 00508 // into a single set). 00509 void LoopReroll::collectInLoopUserSet(Loop *L, 00510 const SmallInstructionVector &Roots, 00511 const SmallInstructionSet &Exclude, 00512 const SmallInstructionSet &Final, 00513 DenseSet<Instruction *> &Users) { 00514 for (SmallInstructionVector::const_iterator I = Roots.begin(), 00515 IE = Roots.end(); I != IE; ++I) 00516 collectInLoopUserSet(L, *I, Exclude, Final, Users); 00517 } 00518 00519 static bool isSimpleLoadStore(Instruction *I) { 00520 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 00521 return LI->isSimple(); 00522 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 00523 return SI->isSimple(); 00524 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) 00525 return !MI->isVolatile(); 00526 return false; 00527 } 00528 00529 // Recognize loops that are setup like this: 00530 // 00531 // %iv = phi [ (preheader, ...), (body, %iv.next) ] 00532 // %scaled.iv = mul %iv, scale 00533 // f(%scaled.iv) 00534 // %scaled.iv.1 = add %scaled.iv, 1 00535 // f(%scaled.iv.1) 00536 // %scaled.iv.2 = add %scaled.iv, 2 00537 // f(%scaled.iv.2) 00538 // %scaled.iv.scale_m_1 = add %scaled.iv, scale-1 00539 // f(%scaled.iv.scale_m_1) 00540 // ... 00541 // %iv.next = add %iv, 1 00542 // %cmp = icmp(%iv, ...) 00543 // br %cmp, header, exit 00544 // 00545 // and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs. 00546 bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale, 00547 Instruction *&IV, 00548 SmallInstructionVector &LoopIncs) { 00549 // This is a special case: here we're looking for all uses (except for 00550 // the increment) to be multiplied by a common factor. The increment must 00551 // be by one. This is to capture loops like: 00552 // for (int i = 0; i < 500; ++i) { 00553 // foo(3*i); foo(3*i+1); foo(3*i+2); 00554 // } 00555 if (RealIV->getNumUses() != 2) 00556 return false; 00557 const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV)); 00558 Instruction *User1 = cast<Instruction>(*RealIV->user_begin()), 00559 *User2 = cast<Instruction>(*std::next(RealIV->user_begin())); 00560 if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType())) 00561 return false; 00562 const SCEVAddRecExpr *User1SCEV = 00563 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)), 00564 *User2SCEV = 00565 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2)); 00566 if (!User1SCEV || !User1SCEV->isAffine() || 00567 !User2SCEV || !User2SCEV->isAffine()) 00568 return false; 00569 00570 // We assume below that User1 is the scale multiply and User2 is the 00571 // increment. If this can't be true, then swap them. 00572 if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) { 00573 std::swap(User1, User2); 00574 std::swap(User1SCEV, User2SCEV); 00575 } 00576 00577 if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE)) 00578 return false; 00579 assert(User2SCEV->getStepRecurrence(*SE)->isOne() && 00580 "Invalid non-unit step for multiplicative scaling"); 00581 LoopIncs.push_back(User2); 00582 00583 if (const SCEVConstant *MulScale = 00584 dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) { 00585 // Make sure that both the start and step have the same multiplier. 00586 if (RealIVSCEV->getStart()->getType() != MulScale->getType()) 00587 return false; 00588 if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) != 00589 User1SCEV->getStart()) 00590 return false; 00591 00592 ConstantInt *MulScaleCI = MulScale->getValue(); 00593 if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc)) 00594 return false; 00595 Scale = MulScaleCI->getZExtValue(); 00596 IV = User1; 00597 } else 00598 return false; 00599 00600 DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n"); 00601 return true; 00602 } 00603 00604 // Collect all root increments with respect to the provided induction variable 00605 // (normally the PHI, but sometimes a multiply). A root increment is an 00606 // instruction, normally an add, with a positive constant less than Scale. In a 00607 // rerollable loop, each of these increments is the root of an instruction 00608 // graph isomorphic to the others. Also, we collect the final induction 00609 // increment (the increment equal to the Scale), and its users in LoopIncs. 00610 bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, 00611 Instruction *IV, 00612 SmallVector<SmallInstructionVector, 32> &Roots, 00613 SmallInstructionSet &AllRoots, 00614 SmallInstructionVector &LoopIncs) { 00615 for (User *U : IV->users()) { 00616 Instruction *UI = cast<Instruction>(U); 00617 if (!SE->isSCEVable(UI->getType())) 00618 continue; 00619 if (UI->getType() != IV->getType()) 00620 continue; 00621 if (!L->contains(UI)) 00622 continue; 00623 if (hasUsesOutsideLoop(UI, L)) 00624 continue; 00625 00626 if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV( 00627 SE->getSCEV(UI), SE->getSCEV(IV)))) { 00628 uint64_t Idx = Diff->getValue()->getValue().getZExtValue(); 00629 if (Idx > 0 && Idx < Scale) { 00630 Roots[Idx-1].push_back(UI); 00631 AllRoots.insert(UI); 00632 } else if (Idx == Scale && Inc > 1) { 00633 LoopIncs.push_back(UI); 00634 } 00635 } 00636 } 00637 00638 if (Roots[0].empty()) 00639 return false; 00640 bool AllSame = true; 00641 for (unsigned i = 1; i < Scale-1; ++i) 00642 if (Roots[i].size() != Roots[0].size()) { 00643 AllSame = false; 00644 break; 00645 } 00646 00647 if (!AllSame) 00648 return false; 00649 00650 return true; 00651 } 00652 00653 // Validate the selected reductions. All iterations must have an isomorphic 00654 // part of the reduction chain and, for non-associative reductions, the chain 00655 // entries must appear in order. 00656 bool LoopReroll::ReductionTracker::validateSelected() { 00657 // For a non-associative reduction, the chain entries must appear in order. 00658 for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end(); 00659 RI != RIE; ++RI) { 00660 int i = *RI; 00661 int PrevIter = 0, BaseCount = 0, Count = 0; 00662 for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(), 00663 JE = PossibleReds[i].end(); J != JE; ++J) { 00664 // Note that all instructions in the chain must have been found because 00665 // all instructions in the function must have been assigned to some 00666 // iteration. 00667 int Iter = PossibleRedIter[*J]; 00668 if (Iter != PrevIter && Iter != PrevIter + 1 && 00669 !PossibleReds[i].getReducedValue()->isAssociative()) { 00670 DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " << 00671 *J << "\n"); 00672 return false; 00673 } 00674 00675 if (Iter != PrevIter) { 00676 if (Count != BaseCount) { 00677 DEBUG(dbgs() << "LRR: Iteration " << PrevIter << 00678 " reduction use count " << Count << 00679 " is not equal to the base use count " << 00680 BaseCount << "\n"); 00681 return false; 00682 } 00683 00684 Count = 0; 00685 } 00686 00687 ++Count; 00688 if (Iter == 0) 00689 ++BaseCount; 00690 00691 PrevIter = Iter; 00692 } 00693 } 00694 00695 return true; 00696 } 00697 00698 // For all selected reductions, remove all parts except those in the first 00699 // iteration (and the PHI). Replace outside uses of the reduced value with uses 00700 // of the first-iteration reduced value (in other words, reroll the selected 00701 // reductions). 00702 void LoopReroll::ReductionTracker::replaceSelected() { 00703 // Fixup reductions to refer to the last instruction associated with the 00704 // first iteration (not the last). 00705 for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end(); 00706 RI != RIE; ++RI) { 00707 int i = *RI; 00708 int j = 0; 00709 for (int e = PossibleReds[i].size(); j != e; ++j) 00710 if (PossibleRedIter[PossibleReds[i][j]] != 0) { 00711 --j; 00712 break; 00713 } 00714 00715 // Replace users with the new end-of-chain value. 00716 SmallInstructionVector Users; 00717 for (User *U : PossibleReds[i].getReducedValue()->users()) 00718 Users.push_back(cast<Instruction>(U)); 00719 00720 for (SmallInstructionVector::iterator J = Users.begin(), 00721 JE = Users.end(); J != JE; ++J) 00722 (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(), 00723 PossibleReds[i][j]); 00724 } 00725 } 00726 00727 // Reroll the provided loop with respect to the provided induction variable. 00728 // Generally, we're looking for a loop like this: 00729 // 00730 // %iv = phi [ (preheader, ...), (body, %iv.next) ] 00731 // f(%iv) 00732 // %iv.1 = add %iv, 1 <-- a root increment 00733 // f(%iv.1) 00734 // %iv.2 = add %iv, 2 <-- a root increment 00735 // f(%iv.2) 00736 // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment 00737 // f(%iv.scale_m_1) 00738 // ... 00739 // %iv.next = add %iv, scale 00740 // %cmp = icmp(%iv, ...) 00741 // br %cmp, header, exit 00742 // 00743 // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of 00744 // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can 00745 // be intermixed with eachother. The restriction imposed by this algorithm is 00746 // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1), 00747 // etc. be the same. 00748 // 00749 // First, we collect the use set of %iv, excluding the other increment roots. 00750 // This gives us f(%iv). Then we iterate over the loop instructions (scale-1) 00751 // times, having collected the use set of f(%iv.(i+1)), during which we: 00752 // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to 00753 // the next unmatched instruction in f(%iv.(i+1)). 00754 // - Ensure that both matched instructions don't have any external users 00755 // (with the exception of last-in-chain reduction instructions). 00756 // - Track the (aliasing) write set, and other side effects, of all 00757 // instructions that belong to future iterations that come before the matched 00758 // instructions. If the matched instructions read from that write set, then 00759 // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in 00760 // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly, 00761 // if any of these future instructions had side effects (could not be 00762 // speculatively executed), and so do the matched instructions, when we 00763 // cannot reorder those side-effect-producing instructions, and rerolling 00764 // fails. 00765 // 00766 // Finally, we make sure that all loop instructions are either loop increment 00767 // roots, belong to simple latch code, parts of validated reductions, part of 00768 // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions 00769 // have been validated), then we reroll the loop. 00770 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, 00771 const SCEV *IterCount, 00772 ReductionTracker &Reductions) { 00773 const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV)); 00774 uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))-> 00775 getValue()->getZExtValue(); 00776 // The collection of loop increment instructions. 00777 SmallInstructionVector LoopIncs; 00778 uint64_t Scale = Inc; 00779 00780 // The effective induction variable, IV, is normally also the real induction 00781 // variable. When we're dealing with a loop like: 00782 // for (int i = 0; i < 500; ++i) 00783 // x[3*i] = ...; 00784 // x[3*i+1] = ...; 00785 // x[3*i+2] = ...; 00786 // then the real IV is still i, but the effective IV is (3*i). 00787 Instruction *RealIV = IV; 00788 if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs)) 00789 return false; 00790 00791 assert(Scale <= MaxInc && "Scale is too large"); 00792 assert(Scale > 1 && "Scale must be at least 2"); 00793 00794 // The set of increment instructions for each increment value. 00795 SmallVector<SmallInstructionVector, 32> Roots(Scale-1); 00796 SmallInstructionSet AllRoots; 00797 if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs)) 00798 return false; 00799 00800 DEBUG(dbgs() << "LRR: Found all root induction increments for: " << 00801 *RealIV << "\n"); 00802 00803 // An array of just the possible reductions for this scale factor. When we 00804 // collect the set of all users of some root instructions, these reduction 00805 // instructions are treated as 'final' (their uses are not considered). 00806 // This is important because we don't want the root use set to search down 00807 // the reduction chain. 00808 SmallInstructionSet PossibleRedSet; 00809 SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet; 00810 Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet, 00811 PossibleRedLastSet); 00812 00813 // We now need to check for equivalence of the use graph of each root with 00814 // that of the primary induction variable (excluding the roots). Our goal 00815 // here is not to solve the full graph isomorphism problem, but rather to 00816 // catch common cases without a lot of work. As a result, we will assume 00817 // that the relative order of the instructions in each unrolled iteration 00818 // is the same (although we will not make an assumption about how the 00819 // different iterations are intermixed). Note that while the order must be 00820 // the same, the instructions may not be in the same basic block. 00821 SmallInstructionSet Exclude(AllRoots); 00822 Exclude.insert(LoopIncs.begin(), LoopIncs.end()); 00823 00824 DenseSet<Instruction *> BaseUseSet; 00825 collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet); 00826 00827 DenseSet<Instruction *> AllRootUses; 00828 std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1); 00829 00830 bool MatchFailed = false; 00831 for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) { 00832 DenseSet<Instruction *> &RootUseSet = RootUseSets[i]; 00833 collectInLoopUserSet(L, Roots[i], SmallInstructionSet(), 00834 PossibleRedSet, RootUseSet); 00835 00836 DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() << 00837 " vs. iteration increment " << (i+1) << 00838 " use set size: " << RootUseSet.size() << "\n"); 00839 00840 if (BaseUseSet.size() != RootUseSet.size()) { 00841 MatchFailed = true; 00842 break; 00843 } 00844 00845 // In addition to regular aliasing information, we need to look for 00846 // instructions from later (future) iterations that have side effects 00847 // preventing us from reordering them past other instructions with side 00848 // effects. 00849 bool FutureSideEffects = false; 00850 AliasSetTracker AST(*AA); 00851 00852 // The map between instructions in f(%iv.(i+1)) and f(%iv). 00853 DenseMap<Value *, Value *> BaseMap; 00854 00855 assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops"); 00856 for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(), 00857 JE = Header->end(); J1 != JE && !MatchFailed; ++J1) { 00858 if (cast<Instruction>(J1) == RealIV) 00859 continue; 00860 if (cast<Instruction>(J1) == IV) 00861 continue; 00862 if (!BaseUseSet.count(J1)) 00863 continue; 00864 if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs. 00865 continue; 00866 00867 while (J2 != JE && (!RootUseSet.count(J2) || 00868 std::find(Roots[i].begin(), Roots[i].end(), J2) != 00869 Roots[i].end())) { 00870 // As we iterate through the instructions, instructions that don't 00871 // belong to previous iterations (or the base case), must belong to 00872 // future iterations. We want to track the alias set of writes from 00873 // previous iterations. 00874 if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) && 00875 !AllRootUses.count(J2)) { 00876 if (J2->mayWriteToMemory()) 00877 AST.add(J2); 00878 00879 // Note: This is specifically guarded by a check on isa<PHINode>, 00880 // which while a valid (somewhat arbitrary) micro-optimization, is 00881 // needed because otherwise isSafeToSpeculativelyExecute returns 00882 // false on PHI nodes. 00883 if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL)) 00884 FutureSideEffects = true; 00885 } 00886 00887 ++J2; 00888 } 00889 00890 if (!J1->isSameOperationAs(J2)) { 00891 DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << 00892 " vs. " << *J2 << "\n"); 00893 MatchFailed = true; 00894 break; 00895 } 00896 00897 // Make sure that this instruction, which is in the use set of this 00898 // root instruction, does not also belong to the base set or the set of 00899 // some previous root instruction. 00900 if (BaseUseSet.count(J2) || AllRootUses.count(J2)) { 00901 DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << 00902 " vs. " << *J2 << " (prev. case overlap)\n"); 00903 MatchFailed = true; 00904 break; 00905 } 00906 00907 // Make sure that we don't alias with any instruction in the alias set 00908 // tracker. If we do, then we depend on a future iteration, and we 00909 // can't reroll. 00910 if (J2->mayReadFromMemory()) { 00911 for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end(); 00912 K != KE && !MatchFailed; ++K) { 00913 if (K->aliasesUnknownInst(J2, *AA)) { 00914 DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << 00915 " vs. " << *J2 << " (depends on future store)\n"); 00916 MatchFailed = true; 00917 break; 00918 } 00919 } 00920 } 00921 00922 // If we've past an instruction from a future iteration that may have 00923 // side effects, and this instruction might also, then we can't reorder 00924 // them, and this matching fails. As an exception, we allow the alias 00925 // set tracker to handle regular (simple) load/store dependencies. 00926 if (FutureSideEffects && 00927 ((!isSimpleLoadStore(J1) && 00928 !isSafeToSpeculativelyExecute(J1, DL)) || 00929 (!isSimpleLoadStore(J2) && 00930 !isSafeToSpeculativelyExecute(J2, DL)))) { 00931 DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << 00932 " vs. " << *J2 << 00933 " (side effects prevent reordering)\n"); 00934 MatchFailed = true; 00935 break; 00936 } 00937 00938 // For instructions that are part of a reduction, if the operation is 00939 // associative, then don't bother matching the operands (because we 00940 // already know that the instructions are isomorphic, and the order 00941 // within the iteration does not matter). For non-associative reductions, 00942 // we do need to match the operands, because we need to reject 00943 // out-of-order instructions within an iteration! 00944 // For example (assume floating-point addition), we need to reject this: 00945 // x += a[i]; x += b[i]; 00946 // x += a[i+1]; x += b[i+1]; 00947 // x += b[i+2]; x += a[i+2]; 00948 bool InReduction = Reductions.isPairInSame(J1, J2); 00949 00950 if (!(InReduction && J1->isAssociative())) { 00951 bool Swapped = false, SomeOpMatched = false; 00952 for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { 00953 Value *Op2 = J2->getOperand(j); 00954 00955 // If this is part of a reduction (and the operation is not 00956 // associatve), then we match all operands, but not those that are 00957 // part of the reduction. 00958 if (InReduction) 00959 if (Instruction *Op2I = dyn_cast<Instruction>(Op2)) 00960 if (Reductions.isPairInSame(J2, Op2I)) 00961 continue; 00962 00963 DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2); 00964 if (BMI != BaseMap.end()) 00965 Op2 = BMI->second; 00966 else if (std::find(Roots[i].begin(), Roots[i].end(), 00967 (Instruction*) Op2) != Roots[i].end()) 00968 Op2 = IV; 00969 00970 if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) { 00971 // If we've not already decided to swap the matched operands, and 00972 // we've not already matched our first operand (note that we could 00973 // have skipped matching the first operand because it is part of a 00974 // reduction above), and the instruction is commutative, then try 00975 // the swapped match. 00976 if (!Swapped && J1->isCommutative() && !SomeOpMatched && 00977 J1->getOperand(!j) == Op2) { 00978 Swapped = true; 00979 } else { 00980 DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << 00981 " vs. " << *J2 << " (operand " << j << ")\n"); 00982 MatchFailed = true; 00983 break; 00984 } 00985 } 00986 00987 SomeOpMatched = true; 00988 } 00989 } 00990 00991 if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) || 00992 (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) { 00993 DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << 00994 " vs. " << *J2 << " (uses outside loop)\n"); 00995 MatchFailed = true; 00996 break; 00997 } 00998 00999 if (!MatchFailed) 01000 BaseMap.insert(std::pair<Value *, Value *>(J2, J1)); 01001 01002 AllRootUses.insert(J2); 01003 Reductions.recordPair(J1, J2, i+1); 01004 01005 ++J2; 01006 } 01007 } 01008 01009 if (MatchFailed) 01010 return false; 01011 01012 DEBUG(dbgs() << "LRR: Matched all iteration increments for " << 01013 *RealIV << "\n"); 01014 01015 DenseSet<Instruction *> LoopIncUseSet; 01016 collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(), 01017 SmallInstructionSet(), LoopIncUseSet); 01018 DEBUG(dbgs() << "LRR: Loop increment set size: " << 01019 LoopIncUseSet.size() << "\n"); 01020 01021 // Make sure that all instructions in the loop have been included in some 01022 // use set. 01023 for (BasicBlock::iterator J = Header->begin(), JE = Header->end(); 01024 J != JE; ++J) { 01025 if (isa<DbgInfoIntrinsic>(J)) 01026 continue; 01027 if (cast<Instruction>(J) == RealIV) 01028 continue; 01029 if (cast<Instruction>(J) == IV) 01030 continue; 01031 if (BaseUseSet.count(J) || AllRootUses.count(J) || 01032 (LoopIncUseSet.count(J) && (J->isTerminator() || 01033 isSafeToSpeculativelyExecute(J, DL)))) 01034 continue; 01035 01036 if (AllRoots.count(J)) 01037 continue; 01038 01039 if (Reductions.isSelectedPHI(J)) 01040 continue; 01041 01042 DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV << 01043 " unprocessed instruction found: " << *J << "\n"); 01044 MatchFailed = true; 01045 break; 01046 } 01047 01048 if (MatchFailed) 01049 return false; 01050 01051 DEBUG(dbgs() << "LRR: all instructions processed from " << 01052 *RealIV << "\n"); 01053 01054 if (!Reductions.validateSelected()) 01055 return false; 01056 01057 // At this point, we've validated the rerolling, and we're committed to 01058 // making changes! 01059 01060 Reductions.replaceSelected(); 01061 01062 // Remove instructions associated with non-base iterations. 01063 for (BasicBlock::reverse_iterator J = Header->rbegin(); 01064 J != Header->rend();) { 01065 if (AllRootUses.count(&*J)) { 01066 Instruction *D = &*J; 01067 DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); 01068 D->eraseFromParent(); 01069 continue; 01070 } 01071 01072 ++J; 01073 } 01074 01075 // Insert the new induction variable. 01076 const SCEV *Start = RealIVSCEV->getStart(); 01077 if (Inc == 1) 01078 Start = SE->getMulExpr(Start, 01079 SE->getConstant(Start->getType(), Scale)); 01080 const SCEVAddRecExpr *H = 01081 cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start, 01082 SE->getConstant(RealIVSCEV->getType(), 1), 01083 L, SCEV::FlagAnyWrap)); 01084 { // Limit the lifetime of SCEVExpander. 01085 SCEVExpander Expander(*SE, "reroll"); 01086 Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); 01087 01088 for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(), 01089 JE = BaseUseSet.end(); J != JE; ++J) 01090 (*J)->replaceUsesOfWith(IV, NewIV); 01091 01092 if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) { 01093 if (LoopIncUseSet.count(BI)) { 01094 const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); 01095 if (Inc == 1) 01096 ICSCEV = 01097 SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); 01098 // Iteration count SCEV minus 1 01099 const SCEV *ICMinus1SCEV = 01100 SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); 01101 01102 Value *ICMinus1; // Iteration count minus 1 01103 if (isa<SCEVConstant>(ICMinus1SCEV)) { 01104 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); 01105 } else { 01106 BasicBlock *Preheader = L->getLoopPreheader(); 01107 if (!Preheader) 01108 Preheader = InsertPreheaderForLoop(L, this); 01109 01110 ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), 01111 Preheader->getTerminator()); 01112 } 01113 01114 Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, 01115 "exitcond"); 01116 BI->setCondition(Cond); 01117 01118 if (BI->getSuccessor(1) != Header) 01119 BI->swapSuccessors(); 01120 } 01121 } 01122 } 01123 01124 SimplifyInstructionsInBlock(Header, DL, TLI); 01125 DeleteDeadPHIs(Header, TLI); 01126 ++NumRerolledLoops; 01127 return true; 01128 } 01129 01130 bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { 01131 if (skipOptnoneFunction(L)) 01132 return false; 01133 01134 AA = &getAnalysis<AliasAnalysis>(); 01135 LI = &getAnalysis<LoopInfo>(); 01136 SE = &getAnalysis<ScalarEvolution>(); 01137 TLI = &getAnalysis<TargetLibraryInfo>(); 01138 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 01139 DL = DLP ? &DLP->getDataLayout() : nullptr; 01140 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 01141 01142 BasicBlock *Header = L->getHeader(); 01143 DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << 01144 "] Loop %" << Header->getName() << " (" << 01145 L->getNumBlocks() << " block(s))\n"); 01146 01147 bool Changed = false; 01148 01149 // For now, we'll handle only single BB loops. 01150 if (L->getNumBlocks() > 1) 01151 return Changed; 01152 01153 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 01154 return Changed; 01155 01156 const SCEV *LIBETC = SE->getBackedgeTakenCount(L); 01157 const SCEV *IterCount = 01158 SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1)); 01159 DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n"); 01160 01161 // First, we need to find the induction variable with respect to which we can 01162 // reroll (there may be several possible options). 01163 SmallInstructionVector PossibleIVs; 01164 collectPossibleIVs(L, PossibleIVs); 01165 01166 if (PossibleIVs.empty()) { 01167 DEBUG(dbgs() << "LRR: No possible IVs found\n"); 01168 return Changed; 01169 } 01170 01171 ReductionTracker Reductions; 01172 collectPossibleReductions(L, Reductions); 01173 01174 // For each possible IV, collect the associated possible set of 'root' nodes 01175 // (i+1, i+2, etc.). 01176 for (SmallInstructionVector::iterator I = PossibleIVs.begin(), 01177 IE = PossibleIVs.end(); I != IE; ++I) 01178 if (reroll(*I, L, Header, IterCount, Reductions)) { 01179 Changed = true; 01180 break; 01181 } 01182 01183 return Changed; 01184 } 01185