LLVM API Documentation
00001 //===- JumpThreading.cpp - Thread control through conditional blocks ------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the Jump Threading pass. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "llvm/Transforms/Scalar.h" 00015 #include "llvm/ADT/DenseMap.h" 00016 #include "llvm/ADT/DenseSet.h" 00017 #include "llvm/ADT/STLExtras.h" 00018 #include "llvm/ADT/SmallPtrSet.h" 00019 #include "llvm/ADT/SmallSet.h" 00020 #include "llvm/ADT/Statistic.h" 00021 #include "llvm/Analysis/CFG.h" 00022 #include "llvm/Analysis/ConstantFolding.h" 00023 #include "llvm/Analysis/InstructionSimplify.h" 00024 #include "llvm/Analysis/LazyValueInfo.h" 00025 #include "llvm/Analysis/Loads.h" 00026 #include "llvm/IR/DataLayout.h" 00027 #include "llvm/IR/IntrinsicInst.h" 00028 #include "llvm/IR/LLVMContext.h" 00029 #include "llvm/IR/Metadata.h" 00030 #include "llvm/IR/ValueHandle.h" 00031 #include "llvm/Pass.h" 00032 #include "llvm/Support/CommandLine.h" 00033 #include "llvm/Support/Debug.h" 00034 #include "llvm/Support/raw_ostream.h" 00035 #include "llvm/Target/TargetLibraryInfo.h" 00036 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00037 #include "llvm/Transforms/Utils/Local.h" 00038 #include "llvm/Transforms/Utils/SSAUpdater.h" 00039 using namespace llvm; 00040 00041 #define DEBUG_TYPE "jump-threading" 00042 00043 STATISTIC(NumThreads, "Number of jumps threaded"); 00044 STATISTIC(NumFolds, "Number of terminators folded"); 00045 STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); 00046 00047 static cl::opt<unsigned> 00048 Threshold("jump-threading-threshold", 00049 cl::desc("Max block size to duplicate for jump threading"), 00050 cl::init(6), cl::Hidden); 00051 00052 namespace { 00053 // These are at global scope so static functions can use them too. 00054 typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo; 00055 typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy; 00056 00057 // This is used to keep track of what kind of constant we're currently hoping 00058 // to find. 00059 enum ConstantPreference { 00060 WantInteger, 00061 WantBlockAddress 00062 }; 00063 00064 /// This pass performs 'jump threading', which looks at blocks that have 00065 /// multiple predecessors and multiple successors. If one or more of the 00066 /// predecessors of the block can be proven to always jump to one of the 00067 /// successors, we forward the edge from the predecessor to the successor by 00068 /// duplicating the contents of this block. 00069 /// 00070 /// An example of when this can occur is code like this: 00071 /// 00072 /// if () { ... 00073 /// X = 4; 00074 /// } 00075 /// if (X < 3) { 00076 /// 00077 /// In this case, the unconditional branch at the end of the first if can be 00078 /// revectored to the false side of the second if. 00079 /// 00080 class JumpThreading : public FunctionPass { 00081 const DataLayout *DL; 00082 TargetLibraryInfo *TLI; 00083 LazyValueInfo *LVI; 00084 #ifdef NDEBUG 00085 SmallPtrSet<BasicBlock*, 16> LoopHeaders; 00086 #else 00087 SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; 00088 #endif 00089 DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; 00090 00091 // RAII helper for updating the recursion stack. 00092 struct RecursionSetRemover { 00093 DenseSet<std::pair<Value*, BasicBlock*> > &TheSet; 00094 std::pair<Value*, BasicBlock*> ThePair; 00095 00096 RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S, 00097 std::pair<Value*, BasicBlock*> P) 00098 : TheSet(S), ThePair(P) { } 00099 00100 ~RecursionSetRemover() { 00101 TheSet.erase(ThePair); 00102 } 00103 }; 00104 public: 00105 static char ID; // Pass identification 00106 JumpThreading() : FunctionPass(ID) { 00107 initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); 00108 } 00109 00110 bool runOnFunction(Function &F) override; 00111 00112 void getAnalysisUsage(AnalysisUsage &AU) const override { 00113 AU.addRequired<LazyValueInfo>(); 00114 AU.addPreserved<LazyValueInfo>(); 00115 AU.addRequired<TargetLibraryInfo>(); 00116 } 00117 00118 void FindLoopHeaders(Function &F); 00119 bool ProcessBlock(BasicBlock *BB); 00120 bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, 00121 BasicBlock *SuccBB); 00122 bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, 00123 const SmallVectorImpl<BasicBlock *> &PredBBs); 00124 00125 bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, 00126 PredValueInfo &Result, 00127 ConstantPreference Preference, 00128 Instruction *CxtI = nullptr); 00129 bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, 00130 ConstantPreference Preference, 00131 Instruction *CxtI = nullptr); 00132 00133 bool ProcessBranchOnPHI(PHINode *PN); 00134 bool ProcessBranchOnXOR(BinaryOperator *BO); 00135 00136 bool SimplifyPartiallyRedundantLoad(LoadInst *LI); 00137 bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); 00138 }; 00139 } 00140 00141 char JumpThreading::ID = 0; 00142 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", 00143 "Jump Threading", false, false) 00144 INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) 00145 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 00146 INITIALIZE_PASS_END(JumpThreading, "jump-threading", 00147 "Jump Threading", false, false) 00148 00149 // Public interface to the Jump Threading pass 00150 FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } 00151 00152 /// runOnFunction - Top level algorithm. 00153 /// 00154 bool JumpThreading::runOnFunction(Function &F) { 00155 if (skipOptnoneFunction(F)) 00156 return false; 00157 00158 DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); 00159 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 00160 DL = DLP ? &DLP->getDataLayout() : nullptr; 00161 TLI = &getAnalysis<TargetLibraryInfo>(); 00162 LVI = &getAnalysis<LazyValueInfo>(); 00163 00164 // Remove unreachable blocks from function as they may result in infinite 00165 // loop. We do threading if we found something profitable. Jump threading a 00166 // branch can create other opportunities. If these opportunities form a cycle 00167 // i.e. if any jump treading is undoing previous threading in the path, then 00168 // we will loop forever. We take care of this issue by not jump threading for 00169 // back edges. This works for normal cases but not for unreachable blocks as 00170 // they may have cycle with no back edge. 00171 removeUnreachableBlocks(F); 00172 00173 FindLoopHeaders(F); 00174 00175 bool Changed, EverChanged = false; 00176 do { 00177 Changed = false; 00178 for (Function::iterator I = F.begin(), E = F.end(); I != E;) { 00179 BasicBlock *BB = I; 00180 // Thread all of the branches we can over this block. 00181 while (ProcessBlock(BB)) 00182 Changed = true; 00183 00184 ++I; 00185 00186 // If the block is trivially dead, zap it. This eliminates the successor 00187 // edges which simplifies the CFG. 00188 if (pred_begin(BB) == pred_end(BB) && 00189 BB != &BB->getParent()->getEntryBlock()) { 00190 DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() 00191 << "' with terminator: " << *BB->getTerminator() << '\n'); 00192 LoopHeaders.erase(BB); 00193 LVI->eraseBlock(BB); 00194 DeleteDeadBlock(BB); 00195 Changed = true; 00196 continue; 00197 } 00198 00199 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 00200 00201 // Can't thread an unconditional jump, but if the block is "almost 00202 // empty", we can replace uses of it with uses of the successor and make 00203 // this dead. 00204 if (BI && BI->isUnconditional() && 00205 BB != &BB->getParent()->getEntryBlock() && 00206 // If the terminator is the only non-phi instruction, try to nuke it. 00207 BB->getFirstNonPHIOrDbg()->isTerminator()) { 00208 // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the 00209 // block, we have to make sure it isn't in the LoopHeaders set. We 00210 // reinsert afterward if needed. 00211 bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); 00212 BasicBlock *Succ = BI->getSuccessor(0); 00213 00214 // FIXME: It is always conservatively correct to drop the info 00215 // for a block even if it doesn't get erased. This isn't totally 00216 // awesome, but it allows us to use AssertingVH to prevent nasty 00217 // dangling pointer issues within LazyValueInfo. 00218 LVI->eraseBlock(BB); 00219 if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { 00220 Changed = true; 00221 // If we deleted BB and BB was the header of a loop, then the 00222 // successor is now the header of the loop. 00223 BB = Succ; 00224 } 00225 00226 if (ErasedFromLoopHeaders) 00227 LoopHeaders.insert(BB); 00228 } 00229 } 00230 EverChanged |= Changed; 00231 } while (Changed); 00232 00233 LoopHeaders.clear(); 00234 return EverChanged; 00235 } 00236 00237 /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to 00238 /// thread across it. Stop scanning the block when passing the threshold. 00239 static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, 00240 unsigned Threshold) { 00241 /// Ignore PHI nodes, these will be flattened when duplication happens. 00242 BasicBlock::const_iterator I = BB->getFirstNonPHI(); 00243 00244 // FIXME: THREADING will delete values that are just used to compute the 00245 // branch, so they shouldn't count against the duplication cost. 00246 00247 // Sum up the cost of each instruction until we get to the terminator. Don't 00248 // include the terminator because the copy won't include it. 00249 unsigned Size = 0; 00250 for (; !isa<TerminatorInst>(I); ++I) { 00251 00252 // Stop scanning the block if we've reached the threshold. 00253 if (Size > Threshold) 00254 return Size; 00255 00256 // Debugger intrinsics don't incur code size. 00257 if (isa<DbgInfoIntrinsic>(I)) continue; 00258 00259 // If this is a pointer->pointer bitcast, it is free. 00260 if (isa<BitCastInst>(I) && I->getType()->isPointerTy()) 00261 continue; 00262 00263 // All other instructions count for at least one unit. 00264 ++Size; 00265 00266 // Calls are more expensive. If they are non-intrinsic calls, we model them 00267 // as having cost of 4. If they are a non-vector intrinsic, we model them 00268 // as having cost of 2 total, and if they are a vector intrinsic, we model 00269 // them as having cost 1. 00270 if (const CallInst *CI = dyn_cast<CallInst>(I)) { 00271 if (CI->cannotDuplicate()) 00272 // Blocks with NoDuplicate are modelled as having infinite cost, so they 00273 // are never duplicated. 00274 return ~0U; 00275 else if (!isa<IntrinsicInst>(CI)) 00276 Size += 3; 00277 else if (!CI->getType()->isVectorTy()) 00278 Size += 1; 00279 } 00280 } 00281 00282 // Threading through a switch statement is particularly profitable. If this 00283 // block ends in a switch, decrease its cost to make it more likely to happen. 00284 if (isa<SwitchInst>(I)) 00285 Size = Size > 6 ? Size-6 : 0; 00286 00287 // The same holds for indirect branches, but slightly more so. 00288 if (isa<IndirectBrInst>(I)) 00289 Size = Size > 8 ? Size-8 : 0; 00290 00291 return Size; 00292 } 00293 00294 /// FindLoopHeaders - We do not want jump threading to turn proper loop 00295 /// structures into irreducible loops. Doing this breaks up the loop nesting 00296 /// hierarchy and pessimizes later transformations. To prevent this from 00297 /// happening, we first have to find the loop headers. Here we approximate this 00298 /// by finding targets of backedges in the CFG. 00299 /// 00300 /// Note that there definitely are cases when we want to allow threading of 00301 /// edges across a loop header. For example, threading a jump from outside the 00302 /// loop (the preheader) to an exit block of the loop is definitely profitable. 00303 /// It is also almost always profitable to thread backedges from within the loop 00304 /// to exit blocks, and is often profitable to thread backedges to other blocks 00305 /// within the loop (forming a nested loop). This simple analysis is not rich 00306 /// enough to track all of these properties and keep it up-to-date as the CFG 00307 /// mutates, so we don't allow any of these transformations. 00308 /// 00309 void JumpThreading::FindLoopHeaders(Function &F) { 00310 SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; 00311 FindFunctionBackedges(F, Edges); 00312 00313 for (unsigned i = 0, e = Edges.size(); i != e; ++i) 00314 LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); 00315 } 00316 00317 /// getKnownConstant - Helper method to determine if we can thread over a 00318 /// terminator with the given value as its condition, and if so what value to 00319 /// use for that. What kind of value this is depends on whether we want an 00320 /// integer or a block address, but an undef is always accepted. 00321 /// Returns null if Val is null or not an appropriate constant. 00322 static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { 00323 if (!Val) 00324 return nullptr; 00325 00326 // Undef is "known" enough. 00327 if (UndefValue *U = dyn_cast<UndefValue>(Val)) 00328 return U; 00329 00330 if (Preference == WantBlockAddress) 00331 return dyn_cast<BlockAddress>(Val->stripPointerCasts()); 00332 00333 return dyn_cast<ConstantInt>(Val); 00334 } 00335 00336 /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see 00337 /// if we can infer that the value is a known ConstantInt/BlockAddress or undef 00338 /// in any of our predecessors. If so, return the known list of value and pred 00339 /// BB in the result vector. 00340 /// 00341 /// This returns true if there were any known values. 00342 /// 00343 bool JumpThreading:: 00344 ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, 00345 ConstantPreference Preference, 00346 Instruction *CxtI) { 00347 // This method walks up use-def chains recursively. Because of this, we could 00348 // get into an infinite loop going around loops in the use-def chain. To 00349 // prevent this, keep track of what (value, block) pairs we've already visited 00350 // and terminate the search if we loop back to them 00351 if (!RecursionSet.insert(std::make_pair(V, BB)).second) 00352 return false; 00353 00354 // An RAII help to remove this pair from the recursion set once the recursion 00355 // stack pops back out again. 00356 RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); 00357 00358 // If V is a constant, then it is known in all predecessors. 00359 if (Constant *KC = getKnownConstant(V, Preference)) { 00360 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 00361 Result.push_back(std::make_pair(KC, *PI)); 00362 00363 return true; 00364 } 00365 00366 // If V is a non-instruction value, or an instruction in a different block, 00367 // then it can't be derived from a PHI. 00368 Instruction *I = dyn_cast<Instruction>(V); 00369 if (!I || I->getParent() != BB) { 00370 00371 // Okay, if this is a live-in value, see if it has a known value at the end 00372 // of any of our predecessors. 00373 // 00374 // FIXME: This should be an edge property, not a block end property. 00375 /// TODO: Per PR2563, we could infer value range information about a 00376 /// predecessor based on its terminator. 00377 // 00378 // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if 00379 // "I" is a non-local compare-with-a-constant instruction. This would be 00380 // able to handle value inequalities better, for example if the compare is 00381 // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. 00382 // Perhaps getConstantOnEdge should be smart enough to do this? 00383 00384 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 00385 BasicBlock *P = *PI; 00386 // If the value is known by LazyValueInfo to be a constant in a 00387 // predecessor, use that information to try to thread this block. 00388 Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI); 00389 if (Constant *KC = getKnownConstant(PredCst, Preference)) 00390 Result.push_back(std::make_pair(KC, P)); 00391 } 00392 00393 return !Result.empty(); 00394 } 00395 00396 /// If I is a PHI node, then we know the incoming values for any constants. 00397 if (PHINode *PN = dyn_cast<PHINode>(I)) { 00398 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 00399 Value *InVal = PN->getIncomingValue(i); 00400 if (Constant *KC = getKnownConstant(InVal, Preference)) { 00401 Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); 00402 } else { 00403 Constant *CI = LVI->getConstantOnEdge(InVal, 00404 PN->getIncomingBlock(i), 00405 BB, CxtI); 00406 if (Constant *KC = getKnownConstant(CI, Preference)) 00407 Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); 00408 } 00409 } 00410 00411 return !Result.empty(); 00412 } 00413 00414 PredValueInfoTy LHSVals, RHSVals; 00415 00416 // Handle some boolean conditions. 00417 if (I->getType()->getPrimitiveSizeInBits() == 1) { 00418 assert(Preference == WantInteger && "One-bit non-integer type?"); 00419 // X | true -> true 00420 // X & false -> false 00421 if (I->getOpcode() == Instruction::Or || 00422 I->getOpcode() == Instruction::And) { 00423 ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, 00424 WantInteger, CxtI); 00425 ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, 00426 WantInteger, CxtI); 00427 00428 if (LHSVals.empty() && RHSVals.empty()) 00429 return false; 00430 00431 ConstantInt *InterestingVal; 00432 if (I->getOpcode() == Instruction::Or) 00433 InterestingVal = ConstantInt::getTrue(I->getContext()); 00434 else 00435 InterestingVal = ConstantInt::getFalse(I->getContext()); 00436 00437 SmallPtrSet<BasicBlock*, 4> LHSKnownBBs; 00438 00439 // Scan for the sentinel. If we find an undef, force it to the 00440 // interesting value: x|undef -> true and x&undef -> false. 00441 for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) 00442 if (LHSVals[i].first == InterestingVal || 00443 isa<UndefValue>(LHSVals[i].first)) { 00444 Result.push_back(LHSVals[i]); 00445 Result.back().first = InterestingVal; 00446 LHSKnownBBs.insert(LHSVals[i].second); 00447 } 00448 for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) 00449 if (RHSVals[i].first == InterestingVal || 00450 isa<UndefValue>(RHSVals[i].first)) { 00451 // If we already inferred a value for this block on the LHS, don't 00452 // re-add it. 00453 if (!LHSKnownBBs.count(RHSVals[i].second)) { 00454 Result.push_back(RHSVals[i]); 00455 Result.back().first = InterestingVal; 00456 } 00457 } 00458 00459 return !Result.empty(); 00460 } 00461 00462 // Handle the NOT form of XOR. 00463 if (I->getOpcode() == Instruction::Xor && 00464 isa<ConstantInt>(I->getOperand(1)) && 00465 cast<ConstantInt>(I->getOperand(1))->isOne()) { 00466 ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result, 00467 WantInteger, CxtI); 00468 if (Result.empty()) 00469 return false; 00470 00471 // Invert the known values. 00472 for (unsigned i = 0, e = Result.size(); i != e; ++i) 00473 Result[i].first = ConstantExpr::getNot(Result[i].first); 00474 00475 return true; 00476 } 00477 00478 // Try to simplify some other binary operator values. 00479 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 00480 assert(Preference != WantBlockAddress 00481 && "A binary operator creating a block address?"); 00482 if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { 00483 PredValueInfoTy LHSVals; 00484 ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals, 00485 WantInteger, CxtI); 00486 00487 // Try to use constant folding to simplify the binary operator. 00488 for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { 00489 Constant *V = LHSVals[i].first; 00490 Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); 00491 00492 if (Constant *KC = getKnownConstant(Folded, WantInteger)) 00493 Result.push_back(std::make_pair(KC, LHSVals[i].second)); 00494 } 00495 } 00496 00497 return !Result.empty(); 00498 } 00499 00500 // Handle compare with phi operand, where the PHI is defined in this block. 00501 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { 00502 assert(Preference == WantInteger && "Compares only produce integers"); 00503 PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); 00504 if (PN && PN->getParent() == BB) { 00505 // We can do this simplification if any comparisons fold to true or false. 00506 // See if any do. 00507 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 00508 BasicBlock *PredBB = PN->getIncomingBlock(i); 00509 Value *LHS = PN->getIncomingValue(i); 00510 Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); 00511 00512 Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); 00513 if (!Res) { 00514 if (!isa<Constant>(RHS)) 00515 continue; 00516 00517 LazyValueInfo::Tristate 00518 ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, 00519 cast<Constant>(RHS), PredBB, BB, 00520 CxtI ? CxtI : Cmp); 00521 if (ResT == LazyValueInfo::Unknown) 00522 continue; 00523 Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); 00524 } 00525 00526 if (Constant *KC = getKnownConstant(Res, WantInteger)) 00527 Result.push_back(std::make_pair(KC, PredBB)); 00528 } 00529 00530 return !Result.empty(); 00531 } 00532 00533 // If comparing a live-in value against a constant, see if we know the 00534 // live-in value on any predecessors. 00535 if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) { 00536 if (!isa<Instruction>(Cmp->getOperand(0)) || 00537 cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { 00538 Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); 00539 00540 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ 00541 BasicBlock *P = *PI; 00542 // If the value is known by LazyValueInfo to be a constant in a 00543 // predecessor, use that information to try to thread this block. 00544 LazyValueInfo::Tristate Res = 00545 LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), 00546 RHSCst, P, BB, CxtI ? CxtI : Cmp); 00547 if (Res == LazyValueInfo::Unknown) 00548 continue; 00549 00550 Constant *ResC = ConstantInt::get(Cmp->getType(), Res); 00551 Result.push_back(std::make_pair(ResC, P)); 00552 } 00553 00554 return !Result.empty(); 00555 } 00556 00557 // Try to find a constant value for the LHS of a comparison, 00558 // and evaluate it statically if we can. 00559 if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) { 00560 PredValueInfoTy LHSVals; 00561 ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, 00562 WantInteger, CxtI); 00563 00564 for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { 00565 Constant *V = LHSVals[i].first; 00566 Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), 00567 V, CmpConst); 00568 if (Constant *KC = getKnownConstant(Folded, WantInteger)) 00569 Result.push_back(std::make_pair(KC, LHSVals[i].second)); 00570 } 00571 00572 return !Result.empty(); 00573 } 00574 } 00575 } 00576 00577 if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 00578 // Handle select instructions where at least one operand is a known constant 00579 // and we can figure out the condition value for any predecessor block. 00580 Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference); 00581 Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference); 00582 PredValueInfoTy Conds; 00583 if ((TrueVal || FalseVal) && 00584 ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds, 00585 WantInteger, CxtI)) { 00586 for (unsigned i = 0, e = Conds.size(); i != e; ++i) { 00587 Constant *Cond = Conds[i].first; 00588 00589 // Figure out what value to use for the condition. 00590 bool KnownCond; 00591 if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) { 00592 // A known boolean. 00593 KnownCond = CI->isOne(); 00594 } else { 00595 assert(isa<UndefValue>(Cond) && "Unexpected condition value"); 00596 // Either operand will do, so be sure to pick the one that's a known 00597 // constant. 00598 // FIXME: Do this more cleverly if both values are known constants? 00599 KnownCond = (TrueVal != nullptr); 00600 } 00601 00602 // See if the select has a known constant value for this predecessor. 00603 if (Constant *Val = KnownCond ? TrueVal : FalseVal) 00604 Result.push_back(std::make_pair(Val, Conds[i].second)); 00605 } 00606 00607 return !Result.empty(); 00608 } 00609 } 00610 00611 // If all else fails, see if LVI can figure out a constant value for us. 00612 Constant *CI = LVI->getConstant(V, BB, CxtI); 00613 if (Constant *KC = getKnownConstant(CI, Preference)) { 00614 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 00615 Result.push_back(std::make_pair(KC, *PI)); 00616 } 00617 00618 return !Result.empty(); 00619 } 00620 00621 00622 00623 /// GetBestDestForBranchOnUndef - If we determine that the specified block ends 00624 /// in an undefined jump, decide which block is best to revector to. 00625 /// 00626 /// Since we can pick an arbitrary destination, we pick the successor with the 00627 /// fewest predecessors. This should reduce the in-degree of the others. 00628 /// 00629 static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { 00630 TerminatorInst *BBTerm = BB->getTerminator(); 00631 unsigned MinSucc = 0; 00632 BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); 00633 // Compute the successor with the minimum number of predecessors. 00634 unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); 00635 for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { 00636 TestBB = BBTerm->getSuccessor(i); 00637 unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); 00638 if (NumPreds < MinNumPreds) { 00639 MinSucc = i; 00640 MinNumPreds = NumPreds; 00641 } 00642 } 00643 00644 return MinSucc; 00645 } 00646 00647 static bool hasAddressTakenAndUsed(BasicBlock *BB) { 00648 if (!BB->hasAddressTaken()) return false; 00649 00650 // If the block has its address taken, it may be a tree of dead constants 00651 // hanging off of it. These shouldn't keep the block alive. 00652 BlockAddress *BA = BlockAddress::get(BB); 00653 BA->removeDeadConstantUsers(); 00654 return !BA->use_empty(); 00655 } 00656 00657 /// ProcessBlock - If there are any predecessors whose control can be threaded 00658 /// through to a successor, transform them now. 00659 bool JumpThreading::ProcessBlock(BasicBlock *BB) { 00660 // If the block is trivially dead, just return and let the caller nuke it. 00661 // This simplifies other transformations. 00662 if (pred_begin(BB) == pred_end(BB) && 00663 BB != &BB->getParent()->getEntryBlock()) 00664 return false; 00665 00666 // If this block has a single predecessor, and if that pred has a single 00667 // successor, merge the blocks. This encourages recursive jump threading 00668 // because now the condition in this block can be threaded through 00669 // predecessors of our predecessor block. 00670 if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { 00671 if (SinglePred->getTerminator()->getNumSuccessors() == 1 && 00672 SinglePred != BB && !hasAddressTakenAndUsed(BB)) { 00673 // If SinglePred was a loop header, BB becomes one. 00674 if (LoopHeaders.erase(SinglePred)) 00675 LoopHeaders.insert(BB); 00676 00677 LVI->eraseBlock(SinglePred); 00678 MergeBasicBlockIntoOnlyPred(BB); 00679 00680 return true; 00681 } 00682 } 00683 00684 // What kind of constant we're looking for. 00685 ConstantPreference Preference = WantInteger; 00686 00687 // Look to see if the terminator is a conditional branch, switch or indirect 00688 // branch, if not we can't thread it. 00689 Value *Condition; 00690 Instruction *Terminator = BB->getTerminator(); 00691 if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) { 00692 // Can't thread an unconditional jump. 00693 if (BI->isUnconditional()) return false; 00694 Condition = BI->getCondition(); 00695 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) { 00696 Condition = SI->getCondition(); 00697 } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) { 00698 // Can't thread indirect branch with no successors. 00699 if (IB->getNumSuccessors() == 0) return false; 00700 Condition = IB->getAddress()->stripPointerCasts(); 00701 Preference = WantBlockAddress; 00702 } else { 00703 return false; // Must be an invoke. 00704 } 00705 00706 // Run constant folding to see if we can reduce the condition to a simple 00707 // constant. 00708 if (Instruction *I = dyn_cast<Instruction>(Condition)) { 00709 Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI); 00710 if (SimpleVal) { 00711 I->replaceAllUsesWith(SimpleVal); 00712 I->eraseFromParent(); 00713 Condition = SimpleVal; 00714 } 00715 } 00716 00717 // If the terminator is branching on an undef, we can pick any of the 00718 // successors to branch to. Let GetBestDestForJumpOnUndef decide. 00719 if (isa<UndefValue>(Condition)) { 00720 unsigned BestSucc = GetBestDestForJumpOnUndef(BB); 00721 00722 // Fold the branch/switch. 00723 TerminatorInst *BBTerm = BB->getTerminator(); 00724 for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { 00725 if (i == BestSucc) continue; 00726 BBTerm->getSuccessor(i)->removePredecessor(BB, true); 00727 } 00728 00729 DEBUG(dbgs() << " In block '" << BB->getName() 00730 << "' folding undef terminator: " << *BBTerm << '\n'); 00731 BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); 00732 BBTerm->eraseFromParent(); 00733 return true; 00734 } 00735 00736 // If the terminator of this block is branching on a constant, simplify the 00737 // terminator to an unconditional branch. This can occur due to threading in 00738 // other blocks. 00739 if (getKnownConstant(Condition, Preference)) { 00740 DEBUG(dbgs() << " In block '" << BB->getName() 00741 << "' folding terminator: " << *BB->getTerminator() << '\n'); 00742 ++NumFolds; 00743 ConstantFoldTerminator(BB, true); 00744 return true; 00745 } 00746 00747 Instruction *CondInst = dyn_cast<Instruction>(Condition); 00748 00749 // All the rest of our checks depend on the condition being an instruction. 00750 if (!CondInst) { 00751 // FIXME: Unify this with code below. 00752 if (ProcessThreadableEdges(Condition, BB, Preference, Terminator)) 00753 return true; 00754 return false; 00755 } 00756 00757 00758 if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { 00759 // For a comparison where the LHS is outside this block, it's possible 00760 // that we've branched on it before. Used LVI to see if we can simplify 00761 // the branch based on that. 00762 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); 00763 Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1)); 00764 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 00765 if (CondBr && CondConst && CondBr->isConditional() && PI != PE && 00766 (!isa<Instruction>(CondCmp->getOperand(0)) || 00767 cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) { 00768 // For predecessor edge, determine if the comparison is true or false 00769 // on that edge. If they're all true or all false, we can simplify the 00770 // branch. 00771 // FIXME: We could handle mixed true/false by duplicating code. 00772 LazyValueInfo::Tristate Baseline = 00773 LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0), 00774 CondConst, *PI, BB, CondCmp); 00775 if (Baseline != LazyValueInfo::Unknown) { 00776 // Check that all remaining incoming values match the first one. 00777 while (++PI != PE) { 00778 LazyValueInfo::Tristate Ret = 00779 LVI->getPredicateOnEdge(CondCmp->getPredicate(), 00780 CondCmp->getOperand(0), CondConst, *PI, BB, 00781 CondCmp); 00782 if (Ret != Baseline) break; 00783 } 00784 00785 // If we terminated early, then one of the values didn't match. 00786 if (PI == PE) { 00787 unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0; 00788 unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1; 00789 CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); 00790 BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); 00791 CondBr->eraseFromParent(); 00792 return true; 00793 } 00794 } 00795 00796 } else if (CondBr && CondConst && CondBr->isConditional()) { 00797 // There might be an invairant in the same block with the conditional 00798 // that can determine the predicate. 00799 00800 LazyValueInfo::Tristate Ret = 00801 LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0), 00802 CondConst, CondCmp); 00803 if (Ret != LazyValueInfo::Unknown) { 00804 unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0; 00805 unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1; 00806 CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); 00807 BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); 00808 CondBr->eraseFromParent(); 00809 return true; 00810 } 00811 } 00812 00813 if (CondBr && CondConst && TryToUnfoldSelect(CondCmp, BB)) 00814 return true; 00815 } 00816 00817 // Check for some cases that are worth simplifying. Right now we want to look 00818 // for loads that are used by a switch or by the condition for the branch. If 00819 // we see one, check to see if it's partially redundant. If so, insert a PHI 00820 // which can then be used to thread the values. 00821 // 00822 Value *SimplifyValue = CondInst; 00823 if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue)) 00824 if (isa<Constant>(CondCmp->getOperand(1))) 00825 SimplifyValue = CondCmp->getOperand(0); 00826 00827 // TODO: There are other places where load PRE would be profitable, such as 00828 // more complex comparisons. 00829 if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) 00830 if (SimplifyPartiallyRedundantLoad(LI)) 00831 return true; 00832 00833 00834 // Handle a variety of cases where we are branching on something derived from 00835 // a PHI node in the current block. If we can prove that any predecessors 00836 // compute a predictable value based on a PHI node, thread those predecessors. 00837 // 00838 if (ProcessThreadableEdges(CondInst, BB, Preference, Terminator)) 00839 return true; 00840 00841 // If this is an otherwise-unfoldable branch on a phi node in the current 00842 // block, see if we can simplify. 00843 if (PHINode *PN = dyn_cast<PHINode>(CondInst)) 00844 if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) 00845 return ProcessBranchOnPHI(PN); 00846 00847 00848 // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify. 00849 if (CondInst->getOpcode() == Instruction::Xor && 00850 CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator())) 00851 return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst)); 00852 00853 00854 // TODO: If we have: "br (X > 0)" and we have a predecessor where we know 00855 // "(X == 4)", thread through this block. 00856 00857 return false; 00858 } 00859 00860 /// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant 00861 /// load instruction, eliminate it by replacing it with a PHI node. This is an 00862 /// important optimization that encourages jump threading, and needs to be run 00863 /// interlaced with other jump threading tasks. 00864 bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { 00865 // Don't hack volatile/atomic loads. 00866 if (!LI->isSimple()) return false; 00867 00868 // If the load is defined in a block with exactly one predecessor, it can't be 00869 // partially redundant. 00870 BasicBlock *LoadBB = LI->getParent(); 00871 if (LoadBB->getSinglePredecessor()) 00872 return false; 00873 00874 // If the load is defined in a landing pad, it can't be partially redundant, 00875 // because the edges between the invoke and the landing pad cannot have other 00876 // instructions between them. 00877 if (LoadBB->isLandingPad()) 00878 return false; 00879 00880 Value *LoadedPtr = LI->getOperand(0); 00881 00882 // If the loaded operand is defined in the LoadBB, it can't be available. 00883 // TODO: Could do simple PHI translation, that would be fun :) 00884 if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) 00885 if (PtrOp->getParent() == LoadBB) 00886 return false; 00887 00888 // Scan a few instructions up from the load, to see if it is obviously live at 00889 // the entry to its block. 00890 BasicBlock::iterator BBIt = LI; 00891 00892 if (Value *AvailableVal = 00893 FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) { 00894 // If the value if the load is locally available within the block, just use 00895 // it. This frequently occurs for reg2mem'd allocas. 00896 //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; 00897 00898 // If the returned value is the load itself, replace with an undef. This can 00899 // only happen in dead loops. 00900 if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); 00901 LI->replaceAllUsesWith(AvailableVal); 00902 LI->eraseFromParent(); 00903 return true; 00904 } 00905 00906 // Otherwise, if we scanned the whole block and got to the top of the block, 00907 // we know the block is locally transparent to the load. If not, something 00908 // might clobber its value. 00909 if (BBIt != LoadBB->begin()) 00910 return false; 00911 00912 // If all of the loads and stores that feed the value have the same AA tags, 00913 // then we can propagate them onto any newly inserted loads. 00914 AAMDNodes AATags; 00915 LI->getAAMetadata(AATags); 00916 00917 SmallPtrSet<BasicBlock*, 8> PredsScanned; 00918 typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; 00919 AvailablePredsTy AvailablePreds; 00920 BasicBlock *OneUnavailablePred = nullptr; 00921 00922 // If we got here, the loaded value is transparent through to the start of the 00923 // block. Check to see if it is available in any of the predecessor blocks. 00924 for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); 00925 PI != PE; ++PI) { 00926 BasicBlock *PredBB = *PI; 00927 00928 // If we already scanned this predecessor, skip it. 00929 if (!PredsScanned.insert(PredBB)) 00930 continue; 00931 00932 // Scan the predecessor to see if the value is available in the pred. 00933 BBIt = PredBB->end(); 00934 AAMDNodes ThisAATags; 00935 Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, 00936 nullptr, &ThisAATags); 00937 if (!PredAvailable) { 00938 OneUnavailablePred = PredBB; 00939 continue; 00940 } 00941 00942 // If AA tags disagree or are not present, forget about them. 00943 if (AATags != ThisAATags) AATags = AAMDNodes(); 00944 00945 // If so, this load is partially redundant. Remember this info so that we 00946 // can create a PHI node. 00947 AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); 00948 } 00949 00950 // If the loaded value isn't available in any predecessor, it isn't partially 00951 // redundant. 00952 if (AvailablePreds.empty()) return false; 00953 00954 // Okay, the loaded value is available in at least one (and maybe all!) 00955 // predecessors. If the value is unavailable in more than one unique 00956 // predecessor, we want to insert a merge block for those common predecessors. 00957 // This ensures that we only have to insert one reload, thus not increasing 00958 // code size. 00959 BasicBlock *UnavailablePred = nullptr; 00960 00961 // If there is exactly one predecessor where the value is unavailable, the 00962 // already computed 'OneUnavailablePred' block is it. If it ends in an 00963 // unconditional branch, we know that it isn't a critical edge. 00964 if (PredsScanned.size() == AvailablePreds.size()+1 && 00965 OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) { 00966 UnavailablePred = OneUnavailablePred; 00967 } else if (PredsScanned.size() != AvailablePreds.size()) { 00968 // Otherwise, we had multiple unavailable predecessors or we had a critical 00969 // edge from the one. 00970 SmallVector<BasicBlock*, 8> PredsToSplit; 00971 SmallPtrSet<BasicBlock*, 8> AvailablePredSet; 00972 00973 for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i) 00974 AvailablePredSet.insert(AvailablePreds[i].first); 00975 00976 // Add all the unavailable predecessors to the PredsToSplit list. 00977 for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); 00978 PI != PE; ++PI) { 00979 BasicBlock *P = *PI; 00980 // If the predecessor is an indirect goto, we can't split the edge. 00981 if (isa<IndirectBrInst>(P->getTerminator())) 00982 return false; 00983 00984 if (!AvailablePredSet.count(P)) 00985 PredsToSplit.push_back(P); 00986 } 00987 00988 // Split them out to their own block. 00989 UnavailablePred = 00990 SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this); 00991 } 00992 00993 // If the value isn't available in all predecessors, then there will be 00994 // exactly one where it isn't available. Insert a load on that edge and add 00995 // it to the AvailablePreds list. 00996 if (UnavailablePred) { 00997 assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && 00998 "Can't handle critical edge here!"); 00999 LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, 01000 LI->getAlignment(), 01001 UnavailablePred->getTerminator()); 01002 NewVal->setDebugLoc(LI->getDebugLoc()); 01003 if (AATags) 01004 NewVal->setAAMetadata(AATags); 01005 01006 AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); 01007 } 01008 01009 // Now we know that each predecessor of this block has a value in 01010 // AvailablePreds, sort them for efficient access as we're walking the preds. 01011 array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); 01012 01013 // Create a PHI node at the start of the block for the PRE'd load value. 01014 pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); 01015 PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", 01016 LoadBB->begin()); 01017 PN->takeName(LI); 01018 PN->setDebugLoc(LI->getDebugLoc()); 01019 01020 // Insert new entries into the PHI for each predecessor. A single block may 01021 // have multiple entries here. 01022 for (pred_iterator PI = PB; PI != PE; ++PI) { 01023 BasicBlock *P = *PI; 01024 AvailablePredsTy::iterator I = 01025 std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), 01026 std::make_pair(P, (Value*)nullptr)); 01027 01028 assert(I != AvailablePreds.end() && I->first == P && 01029 "Didn't find entry for predecessor!"); 01030 01031 PN->addIncoming(I->second, I->first); 01032 } 01033 01034 //cerr << "PRE: " << *LI << *PN << "\n"; 01035 01036 LI->replaceAllUsesWith(PN); 01037 LI->eraseFromParent(); 01038 01039 return true; 01040 } 01041 01042 /// FindMostPopularDest - The specified list contains multiple possible 01043 /// threadable destinations. Pick the one that occurs the most frequently in 01044 /// the list. 01045 static BasicBlock * 01046 FindMostPopularDest(BasicBlock *BB, 01047 const SmallVectorImpl<std::pair<BasicBlock*, 01048 BasicBlock*> > &PredToDestList) { 01049 assert(!PredToDestList.empty()); 01050 01051 // Determine popularity. If there are multiple possible destinations, we 01052 // explicitly choose to ignore 'undef' destinations. We prefer to thread 01053 // blocks with known and real destinations to threading undef. We'll handle 01054 // them later if interesting. 01055 DenseMap<BasicBlock*, unsigned> DestPopularity; 01056 for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) 01057 if (PredToDestList[i].second) 01058 DestPopularity[PredToDestList[i].second]++; 01059 01060 // Find the most popular dest. 01061 DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); 01062 BasicBlock *MostPopularDest = DPI->first; 01063 unsigned Popularity = DPI->second; 01064 SmallVector<BasicBlock*, 4> SamePopularity; 01065 01066 for (++DPI; DPI != DestPopularity.end(); ++DPI) { 01067 // If the popularity of this entry isn't higher than the popularity we've 01068 // seen so far, ignore it. 01069 if (DPI->second < Popularity) 01070 ; // ignore. 01071 else if (DPI->second == Popularity) { 01072 // If it is the same as what we've seen so far, keep track of it. 01073 SamePopularity.push_back(DPI->first); 01074 } else { 01075 // If it is more popular, remember it. 01076 SamePopularity.clear(); 01077 MostPopularDest = DPI->first; 01078 Popularity = DPI->second; 01079 } 01080 } 01081 01082 // Okay, now we know the most popular destination. If there is more than one 01083 // destination, we need to determine one. This is arbitrary, but we need 01084 // to make a deterministic decision. Pick the first one that appears in the 01085 // successor list. 01086 if (!SamePopularity.empty()) { 01087 SamePopularity.push_back(MostPopularDest); 01088 TerminatorInst *TI = BB->getTerminator(); 01089 for (unsigned i = 0; ; ++i) { 01090 assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); 01091 01092 if (std::find(SamePopularity.begin(), SamePopularity.end(), 01093 TI->getSuccessor(i)) == SamePopularity.end()) 01094 continue; 01095 01096 MostPopularDest = TI->getSuccessor(i); 01097 break; 01098 } 01099 } 01100 01101 // Okay, we have finally picked the most popular destination. 01102 return MostPopularDest; 01103 } 01104 01105 bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, 01106 ConstantPreference Preference, 01107 Instruction *CxtI) { 01108 // If threading this would thread across a loop header, don't even try to 01109 // thread the edge. 01110 if (LoopHeaders.count(BB)) 01111 return false; 01112 01113 PredValueInfoTy PredValues; 01114 if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI)) 01115 return false; 01116 01117 assert(!PredValues.empty() && 01118 "ComputeValueKnownInPredecessors returned true with no values"); 01119 01120 DEBUG(dbgs() << "IN BB: " << *BB; 01121 for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { 01122 dbgs() << " BB '" << BB->getName() << "': FOUND condition = " 01123 << *PredValues[i].first 01124 << " for pred '" << PredValues[i].second->getName() << "'.\n"; 01125 }); 01126 01127 // Decide what we want to thread through. Convert our list of known values to 01128 // a list of known destinations for each pred. This also discards duplicate 01129 // predecessors and keeps track of the undefined inputs (which are represented 01130 // as a null dest in the PredToDestList). 01131 SmallPtrSet<BasicBlock*, 16> SeenPreds; 01132 SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; 01133 01134 BasicBlock *OnlyDest = nullptr; 01135 BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; 01136 01137 for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { 01138 BasicBlock *Pred = PredValues[i].second; 01139 if (!SeenPreds.insert(Pred)) 01140 continue; // Duplicate predecessor entry. 01141 01142 // If the predecessor ends with an indirect goto, we can't change its 01143 // destination. 01144 if (isa<IndirectBrInst>(Pred->getTerminator())) 01145 continue; 01146 01147 Constant *Val = PredValues[i].first; 01148 01149 BasicBlock *DestBB; 01150 if (isa<UndefValue>(Val)) 01151 DestBB = nullptr; 01152 else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) 01153 DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); 01154 else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { 01155 DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor(); 01156 } else { 01157 assert(isa<IndirectBrInst>(BB->getTerminator()) 01158 && "Unexpected terminator"); 01159 DestBB = cast<BlockAddress>(Val)->getBasicBlock(); 01160 } 01161 01162 // If we have exactly one destination, remember it for efficiency below. 01163 if (PredToDestList.empty()) 01164 OnlyDest = DestBB; 01165 else if (OnlyDest != DestBB) 01166 OnlyDest = MultipleDestSentinel; 01167 01168 PredToDestList.push_back(std::make_pair(Pred, DestBB)); 01169 } 01170 01171 // If all edges were unthreadable, we fail. 01172 if (PredToDestList.empty()) 01173 return false; 01174 01175 // Determine which is the most common successor. If we have many inputs and 01176 // this block is a switch, we want to start by threading the batch that goes 01177 // to the most popular destination first. If we only know about one 01178 // threadable destination (the common case) we can avoid this. 01179 BasicBlock *MostPopularDest = OnlyDest; 01180 01181 if (MostPopularDest == MultipleDestSentinel) 01182 MostPopularDest = FindMostPopularDest(BB, PredToDestList); 01183 01184 // Now that we know what the most popular destination is, factor all 01185 // predecessors that will jump to it into a single predecessor. 01186 SmallVector<BasicBlock*, 16> PredsToFactor; 01187 for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) 01188 if (PredToDestList[i].second == MostPopularDest) { 01189 BasicBlock *Pred = PredToDestList[i].first; 01190 01191 // This predecessor may be a switch or something else that has multiple 01192 // edges to the block. Factor each of these edges by listing them 01193 // according to # occurrences in PredsToFactor. 01194 TerminatorInst *PredTI = Pred->getTerminator(); 01195 for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i) 01196 if (PredTI->getSuccessor(i) == BB) 01197 PredsToFactor.push_back(Pred); 01198 } 01199 01200 // If the threadable edges are branching on an undefined value, we get to pick 01201 // the destination that these predecessors should get to. 01202 if (!MostPopularDest) 01203 MostPopularDest = BB->getTerminator()-> 01204 getSuccessor(GetBestDestForJumpOnUndef(BB)); 01205 01206 // Ok, try to thread it! 01207 return ThreadEdge(BB, PredsToFactor, MostPopularDest); 01208 } 01209 01210 /// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on 01211 /// a PHI node in the current block. See if there are any simplifications we 01212 /// can do based on inputs to the phi node. 01213 /// 01214 bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { 01215 BasicBlock *BB = PN->getParent(); 01216 01217 // TODO: We could make use of this to do it once for blocks with common PHI 01218 // values. 01219 SmallVector<BasicBlock*, 1> PredBBs; 01220 PredBBs.resize(1); 01221 01222 // If any of the predecessor blocks end in an unconditional branch, we can 01223 // *duplicate* the conditional branch into that block in order to further 01224 // encourage jump threading and to eliminate cases where we have branch on a 01225 // phi of an icmp (branch on icmp is much better). 01226 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 01227 BasicBlock *PredBB = PN->getIncomingBlock(i); 01228 if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) 01229 if (PredBr->isUnconditional()) { 01230 PredBBs[0] = PredBB; 01231 // Try to duplicate BB into PredBB. 01232 if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs)) 01233 return true; 01234 } 01235 } 01236 01237 return false; 01238 } 01239 01240 /// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on 01241 /// a xor instruction in the current block. See if there are any 01242 /// simplifications we can do based on inputs to the xor. 01243 /// 01244 bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { 01245 BasicBlock *BB = BO->getParent(); 01246 01247 // If either the LHS or RHS of the xor is a constant, don't do this 01248 // optimization. 01249 if (isa<ConstantInt>(BO->getOperand(0)) || 01250 isa<ConstantInt>(BO->getOperand(1))) 01251 return false; 01252 01253 // If the first instruction in BB isn't a phi, we won't be able to infer 01254 // anything special about any particular predecessor. 01255 if (!isa<PHINode>(BB->front())) 01256 return false; 01257 01258 // If we have a xor as the branch input to this block, and we know that the 01259 // LHS or RHS of the xor in any predecessor is true/false, then we can clone 01260 // the condition into the predecessor and fix that value to true, saving some 01261 // logical ops on that path and encouraging other paths to simplify. 01262 // 01263 // This copies something like this: 01264 // 01265 // BB: 01266 // %X = phi i1 [1], [%X'] 01267 // %Y = icmp eq i32 %A, %B 01268 // %Z = xor i1 %X, %Y 01269 // br i1 %Z, ... 01270 // 01271 // Into: 01272 // BB': 01273 // %Y = icmp ne i32 %A, %B 01274 // br i1 %Z, ... 01275 01276 PredValueInfoTy XorOpValues; 01277 bool isLHS = true; 01278 if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues, 01279 WantInteger, BO)) { 01280 assert(XorOpValues.empty()); 01281 if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues, 01282 WantInteger, BO)) 01283 return false; 01284 isLHS = false; 01285 } 01286 01287 assert(!XorOpValues.empty() && 01288 "ComputeValueKnownInPredecessors returned true with no values"); 01289 01290 // Scan the information to see which is most popular: true or false. The 01291 // predecessors can be of the set true, false, or undef. 01292 unsigned NumTrue = 0, NumFalse = 0; 01293 for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { 01294 if (isa<UndefValue>(XorOpValues[i].first)) 01295 // Ignore undefs for the count. 01296 continue; 01297 if (cast<ConstantInt>(XorOpValues[i].first)->isZero()) 01298 ++NumFalse; 01299 else 01300 ++NumTrue; 01301 } 01302 01303 // Determine which value to split on, true, false, or undef if neither. 01304 ConstantInt *SplitVal = nullptr; 01305 if (NumTrue > NumFalse) 01306 SplitVal = ConstantInt::getTrue(BB->getContext()); 01307 else if (NumTrue != 0 || NumFalse != 0) 01308 SplitVal = ConstantInt::getFalse(BB->getContext()); 01309 01310 // Collect all of the blocks that this can be folded into so that we can 01311 // factor this once and clone it once. 01312 SmallVector<BasicBlock*, 8> BlocksToFoldInto; 01313 for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) { 01314 if (XorOpValues[i].first != SplitVal && 01315 !isa<UndefValue>(XorOpValues[i].first)) 01316 continue; 01317 01318 BlocksToFoldInto.push_back(XorOpValues[i].second); 01319 } 01320 01321 // If we inferred a value for all of the predecessors, then duplication won't 01322 // help us. However, we can just replace the LHS or RHS with the constant. 01323 if (BlocksToFoldInto.size() == 01324 cast<PHINode>(BB->front()).getNumIncomingValues()) { 01325 if (!SplitVal) { 01326 // If all preds provide undef, just nuke the xor, because it is undef too. 01327 BO->replaceAllUsesWith(UndefValue::get(BO->getType())); 01328 BO->eraseFromParent(); 01329 } else if (SplitVal->isZero()) { 01330 // If all preds provide 0, replace the xor with the other input. 01331 BO->replaceAllUsesWith(BO->getOperand(isLHS)); 01332 BO->eraseFromParent(); 01333 } else { 01334 // If all preds provide 1, set the computed value to 1. 01335 BO->setOperand(!isLHS, SplitVal); 01336 } 01337 01338 return true; 01339 } 01340 01341 // Try to duplicate BB into PredBB. 01342 return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); 01343 } 01344 01345 01346 /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new 01347 /// predecessor to the PHIBB block. If it has PHI nodes, add entries for 01348 /// NewPred using the entries from OldPred (suitably mapped). 01349 static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, 01350 BasicBlock *OldPred, 01351 BasicBlock *NewPred, 01352 DenseMap<Instruction*, Value*> &ValueMap) { 01353 for (BasicBlock::iterator PNI = PHIBB->begin(); 01354 PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) { 01355 // Ok, we have a PHI node. Figure out what the incoming value was for the 01356 // DestBlock. 01357 Value *IV = PN->getIncomingValueForBlock(OldPred); 01358 01359 // Remap the value if necessary. 01360 if (Instruction *Inst = dyn_cast<Instruction>(IV)) { 01361 DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst); 01362 if (I != ValueMap.end()) 01363 IV = I->second; 01364 } 01365 01366 PN->addIncoming(IV, NewPred); 01367 } 01368 } 01369 01370 /// ThreadEdge - We have decided that it is safe and profitable to factor the 01371 /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB 01372 /// across BB. Transform the IR to reflect this change. 01373 bool JumpThreading::ThreadEdge(BasicBlock *BB, 01374 const SmallVectorImpl<BasicBlock*> &PredBBs, 01375 BasicBlock *SuccBB) { 01376 // If threading to the same block as we come from, we would infinite loop. 01377 if (SuccBB == BB) { 01378 DEBUG(dbgs() << " Not threading across BB '" << BB->getName() 01379 << "' - would thread to self!\n"); 01380 return false; 01381 } 01382 01383 // If threading this would thread across a loop header, don't thread the edge. 01384 // See the comments above FindLoopHeaders for justifications and caveats. 01385 if (LoopHeaders.count(BB)) { 01386 DEBUG(dbgs() << " Not threading across loop header BB '" << BB->getName() 01387 << "' to dest BB '" << SuccBB->getName() 01388 << "' - it might create an irreducible loop!\n"); 01389 return false; 01390 } 01391 01392 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold); 01393 if (JumpThreadCost > Threshold) { 01394 DEBUG(dbgs() << " Not threading BB '" << BB->getName() 01395 << "' - Cost is too high: " << JumpThreadCost << "\n"); 01396 return false; 01397 } 01398 01399 // And finally, do it! Start by factoring the predecessors is needed. 01400 BasicBlock *PredBB; 01401 if (PredBBs.size() == 1) 01402 PredBB = PredBBs[0]; 01403 else { 01404 DEBUG(dbgs() << " Factoring out " << PredBBs.size() 01405 << " common predecessors.\n"); 01406 PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); 01407 } 01408 01409 // And finally, do it! 01410 DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '" 01411 << SuccBB->getName() << "' with cost: " << JumpThreadCost 01412 << ", across block:\n " 01413 << *BB << "\n"); 01414 01415 LVI->threadEdge(PredBB, BB, SuccBB); 01416 01417 // We are going to have to map operands from the original BB block to the new 01418 // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to 01419 // account for entry from PredBB. 01420 DenseMap<Instruction*, Value*> ValueMapping; 01421 01422 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), 01423 BB->getName()+".thread", 01424 BB->getParent(), BB); 01425 NewBB->moveAfter(PredBB); 01426 01427 BasicBlock::iterator BI = BB->begin(); 01428 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 01429 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); 01430 01431 // Clone the non-phi instructions of BB into NewBB, keeping track of the 01432 // mapping and using it to remap operands in the cloned instructions. 01433 for (; !isa<TerminatorInst>(BI); ++BI) { 01434 Instruction *New = BI->clone(); 01435 New->setName(BI->getName()); 01436 NewBB->getInstList().push_back(New); 01437 ValueMapping[BI] = New; 01438 01439 // Remap operands to patch up intra-block references. 01440 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) 01441 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { 01442 DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); 01443 if (I != ValueMapping.end()) 01444 New->setOperand(i, I->second); 01445 } 01446 } 01447 01448 // We didn't copy the terminator from BB over to NewBB, because there is now 01449 // an unconditional jump to SuccBB. Insert the unconditional jump. 01450 BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB); 01451 NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc()); 01452 01453 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the 01454 // PHI nodes for NewBB now. 01455 AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); 01456 01457 // If there were values defined in BB that are used outside the block, then we 01458 // now have to update all uses of the value to use either the original value, 01459 // the cloned value, or some PHI derived value. This can require arbitrary 01460 // PHI insertion, of which we are prepared to do, clean these up now. 01461 SSAUpdater SSAUpdate; 01462 SmallVector<Use*, 16> UsesToRename; 01463 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { 01464 // Scan all uses of this instruction to see if it is used outside of its 01465 // block, and if so, record them in UsesToRename. 01466 for (Use &U : I->uses()) { 01467 Instruction *User = cast<Instruction>(U.getUser()); 01468 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 01469 if (UserPN->getIncomingBlock(U) == BB) 01470 continue; 01471 } else if (User->getParent() == BB) 01472 continue; 01473 01474 UsesToRename.push_back(&U); 01475 } 01476 01477 // If there are no uses outside the block, we're done with this instruction. 01478 if (UsesToRename.empty()) 01479 continue; 01480 01481 DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); 01482 01483 // We found a use of I outside of BB. Rename all uses of I that are outside 01484 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks 01485 // with the two values we know. 01486 SSAUpdate.Initialize(I->getType(), I->getName()); 01487 SSAUpdate.AddAvailableValue(BB, I); 01488 SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); 01489 01490 while (!UsesToRename.empty()) 01491 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); 01492 DEBUG(dbgs() << "\n"); 01493 } 01494 01495 01496 // Ok, NewBB is good to go. Update the terminator of PredBB to jump to 01497 // NewBB instead of BB. This eliminates predecessors from BB, which requires 01498 // us to simplify any PHI nodes in BB. 01499 TerminatorInst *PredTerm = PredBB->getTerminator(); 01500 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) 01501 if (PredTerm->getSuccessor(i) == BB) { 01502 BB->removePredecessor(PredBB, true); 01503 PredTerm->setSuccessor(i, NewBB); 01504 } 01505 01506 // At this point, the IR is fully up to date and consistent. Do a quick scan 01507 // over the new instructions and zap any that are constants or dead. This 01508 // frequently happens because of phi translation. 01509 SimplifyInstructionsInBlock(NewBB, DL, TLI); 01510 01511 // Threaded an edge! 01512 ++NumThreads; 01513 return true; 01514 } 01515 01516 /// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch 01517 /// to BB which contains an i1 PHI node and a conditional branch on that PHI. 01518 /// If we can duplicate the contents of BB up into PredBB do so now, this 01519 /// improves the odds that the branch will be on an analyzable instruction like 01520 /// a compare. 01521 bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, 01522 const SmallVectorImpl<BasicBlock *> &PredBBs) { 01523 assert(!PredBBs.empty() && "Can't handle an empty set"); 01524 01525 // If BB is a loop header, then duplicating this block outside the loop would 01526 // cause us to transform this into an irreducible loop, don't do this. 01527 // See the comments above FindLoopHeaders for justifications and caveats. 01528 if (LoopHeaders.count(BB)) { 01529 DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() 01530 << "' into predecessor block '" << PredBBs[0]->getName() 01531 << "' - it might create an irreducible loop!\n"); 01532 return false; 01533 } 01534 01535 unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold); 01536 if (DuplicationCost > Threshold) { 01537 DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() 01538 << "' - Cost is too high: " << DuplicationCost << "\n"); 01539 return false; 01540 } 01541 01542 // And finally, do it! Start by factoring the predecessors is needed. 01543 BasicBlock *PredBB; 01544 if (PredBBs.size() == 1) 01545 PredBB = PredBBs[0]; 01546 else { 01547 DEBUG(dbgs() << " Factoring out " << PredBBs.size() 01548 << " common predecessors.\n"); 01549 PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); 01550 } 01551 01552 // Okay, we decided to do this! Clone all the instructions in BB onto the end 01553 // of PredBB. 01554 DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" 01555 << PredBB->getName() << "' to eliminate branch on phi. Cost: " 01556 << DuplicationCost << " block is:" << *BB << "\n"); 01557 01558 // Unless PredBB ends with an unconditional branch, split the edge so that we 01559 // can just clone the bits from BB into the end of the new PredBB. 01560 BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); 01561 01562 if (!OldPredBranch || !OldPredBranch->isUnconditional()) { 01563 PredBB = SplitEdge(PredBB, BB, this); 01564 OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); 01565 } 01566 01567 // We are going to have to map operands from the original BB block into the 01568 // PredBB block. Evaluate PHI nodes in BB. 01569 DenseMap<Instruction*, Value*> ValueMapping; 01570 01571 BasicBlock::iterator BI = BB->begin(); 01572 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 01573 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); 01574 01575 // Clone the non-phi instructions of BB into PredBB, keeping track of the 01576 // mapping and using it to remap operands in the cloned instructions. 01577 for (; BI != BB->end(); ++BI) { 01578 Instruction *New = BI->clone(); 01579 01580 // Remap operands to patch up intra-block references. 01581 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) 01582 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { 01583 DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); 01584 if (I != ValueMapping.end()) 01585 New->setOperand(i, I->second); 01586 } 01587 01588 // If this instruction can be simplified after the operands are updated, 01589 // just use the simplified value instead. This frequently happens due to 01590 // phi translation. 01591 if (Value *IV = SimplifyInstruction(New, DL)) { 01592 delete New; 01593 ValueMapping[BI] = IV; 01594 } else { 01595 // Otherwise, insert the new instruction into the block. 01596 New->setName(BI->getName()); 01597 PredBB->getInstList().insert(OldPredBranch, New); 01598 ValueMapping[BI] = New; 01599 } 01600 } 01601 01602 // Check to see if the targets of the branch had PHI nodes. If so, we need to 01603 // add entries to the PHI nodes for branch from PredBB now. 01604 BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); 01605 AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, 01606 ValueMapping); 01607 AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, 01608 ValueMapping); 01609 01610 // If there were values defined in BB that are used outside the block, then we 01611 // now have to update all uses of the value to use either the original value, 01612 // the cloned value, or some PHI derived value. This can require arbitrary 01613 // PHI insertion, of which we are prepared to do, clean these up now. 01614 SSAUpdater SSAUpdate; 01615 SmallVector<Use*, 16> UsesToRename; 01616 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { 01617 // Scan all uses of this instruction to see if it is used outside of its 01618 // block, and if so, record them in UsesToRename. 01619 for (Use &U : I->uses()) { 01620 Instruction *User = cast<Instruction>(U.getUser()); 01621 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 01622 if (UserPN->getIncomingBlock(U) == BB) 01623 continue; 01624 } else if (User->getParent() == BB) 01625 continue; 01626 01627 UsesToRename.push_back(&U); 01628 } 01629 01630 // If there are no uses outside the block, we're done with this instruction. 01631 if (UsesToRename.empty()) 01632 continue; 01633 01634 DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n"); 01635 01636 // We found a use of I outside of BB. Rename all uses of I that are outside 01637 // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks 01638 // with the two values we know. 01639 SSAUpdate.Initialize(I->getType(), I->getName()); 01640 SSAUpdate.AddAvailableValue(BB, I); 01641 SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); 01642 01643 while (!UsesToRename.empty()) 01644 SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); 01645 DEBUG(dbgs() << "\n"); 01646 } 01647 01648 // PredBB no longer jumps to BB, remove entries in the PHI node for the edge 01649 // that we nuked. 01650 BB->removePredecessor(PredBB, true); 01651 01652 // Remove the unconditional branch at the end of the PredBB block. 01653 OldPredBranch->eraseFromParent(); 01654 01655 ++NumDupes; 01656 return true; 01657 } 01658 01659 /// TryToUnfoldSelect - Look for blocks of the form 01660 /// bb1: 01661 /// %a = select 01662 /// br bb 01663 /// 01664 /// bb2: 01665 /// %p = phi [%a, %bb] ... 01666 /// %c = icmp %p 01667 /// br i1 %c 01668 /// 01669 /// And expand the select into a branch structure if one of its arms allows %c 01670 /// to be folded. This later enables threading from bb1 over bb2. 01671 bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { 01672 BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); 01673 PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0)); 01674 Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1)); 01675 01676 if (!CondBr || !CondBr->isConditional() || !CondLHS || 01677 CondLHS->getParent() != BB) 01678 return false; 01679 01680 for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) { 01681 BasicBlock *Pred = CondLHS->getIncomingBlock(I); 01682 SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I)); 01683 01684 // Look if one of the incoming values is a select in the corresponding 01685 // predecessor. 01686 if (!SI || SI->getParent() != Pred || !SI->hasOneUse()) 01687 continue; 01688 01689 BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator()); 01690 if (!PredTerm || !PredTerm->isUnconditional()) 01691 continue; 01692 01693 // Now check if one of the select values would allow us to constant fold the 01694 // terminator in BB. We don't do the transform if both sides fold, those 01695 // cases will be threaded in any case. 01696 LazyValueInfo::Tristate LHSFolds = 01697 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1), 01698 CondRHS, Pred, BB, CondCmp); 01699 LazyValueInfo::Tristate RHSFolds = 01700 LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2), 01701 CondRHS, Pred, BB, CondCmp); 01702 if ((LHSFolds != LazyValueInfo::Unknown || 01703 RHSFolds != LazyValueInfo::Unknown) && 01704 LHSFolds != RHSFolds) { 01705 // Expand the select. 01706 // 01707 // Pred -- 01708 // | v 01709 // | NewBB 01710 // | | 01711 // |----- 01712 // v 01713 // BB 01714 BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", 01715 BB->getParent(), BB); 01716 // Move the unconditional branch to NewBB. 01717 PredTerm->removeFromParent(); 01718 NewBB->getInstList().insert(NewBB->end(), PredTerm); 01719 // Create a conditional branch and update PHI nodes. 01720 BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); 01721 CondLHS->setIncomingValue(I, SI->getFalseValue()); 01722 CondLHS->addIncoming(SI->getTrueValue(), NewBB); 01723 // The select is now dead. 01724 SI->eraseFromParent(); 01725 01726 // Update any other PHI nodes in BB. 01727 for (BasicBlock::iterator BI = BB->begin(); 01728 PHINode *Phi = dyn_cast<PHINode>(BI); ++BI) 01729 if (Phi != CondLHS) 01730 Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB); 01731 return true; 01732 } 01733 } 01734 return false; 01735 }