LLVM API Documentation
00001 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs global value numbering to eliminate fully redundant 00011 // instructions. It also performs simple dead load elimination. 00012 // 00013 // Note that this pass does the value numbering itself; it does not use the 00014 // ValueNumbering analysis passes. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #include "llvm/Transforms/Scalar.h" 00019 #include "llvm/ADT/DenseMap.h" 00020 #include "llvm/ADT/DepthFirstIterator.h" 00021 #include "llvm/ADT/Hashing.h" 00022 #include "llvm/ADT/MapVector.h" 00023 #include "llvm/ADT/SetVector.h" 00024 #include "llvm/ADT/SmallPtrSet.h" 00025 #include "llvm/ADT/Statistic.h" 00026 #include "llvm/Analysis/AliasAnalysis.h" 00027 #include "llvm/Analysis/AssumptionTracker.h" 00028 #include "llvm/Analysis/CFG.h" 00029 #include "llvm/Analysis/ConstantFolding.h" 00030 #include "llvm/Analysis/InstructionSimplify.h" 00031 #include "llvm/Analysis/Loads.h" 00032 #include "llvm/Analysis/MemoryBuiltins.h" 00033 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 00034 #include "llvm/Analysis/PHITransAddr.h" 00035 #include "llvm/Analysis/ValueTracking.h" 00036 #include "llvm/IR/DataLayout.h" 00037 #include "llvm/IR/Dominators.h" 00038 #include "llvm/IR/GlobalVariable.h" 00039 #include "llvm/IR/IRBuilder.h" 00040 #include "llvm/IR/IntrinsicInst.h" 00041 #include "llvm/IR/LLVMContext.h" 00042 #include "llvm/IR/Metadata.h" 00043 #include "llvm/IR/PatternMatch.h" 00044 #include "llvm/Support/Allocator.h" 00045 #include "llvm/Support/CommandLine.h" 00046 #include "llvm/Support/Debug.h" 00047 #include "llvm/Target/TargetLibraryInfo.h" 00048 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00049 #include "llvm/Transforms/Utils/Local.h" 00050 #include "llvm/Transforms/Utils/SSAUpdater.h" 00051 #include <vector> 00052 using namespace llvm; 00053 using namespace PatternMatch; 00054 00055 #define DEBUG_TYPE "gvn" 00056 00057 STATISTIC(NumGVNInstr, "Number of instructions deleted"); 00058 STATISTIC(NumGVNLoad, "Number of loads deleted"); 00059 STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 00060 STATISTIC(NumGVNBlocks, "Number of blocks merged"); 00061 STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 00062 STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 00063 STATISTIC(NumPRELoad, "Number of loads PRE'd"); 00064 00065 static cl::opt<bool> EnablePRE("enable-pre", 00066 cl::init(true), cl::Hidden); 00067 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); 00068 00069 // Maximum allowed recursion depth. 00070 static cl::opt<uint32_t> 00071 MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, 00072 cl::desc("Max recurse depth (default = 1000)")); 00073 00074 //===----------------------------------------------------------------------===// 00075 // ValueTable Class 00076 //===----------------------------------------------------------------------===// 00077 00078 /// This class holds the mapping between values and value numbers. It is used 00079 /// as an efficient mechanism to determine the expression-wise equivalence of 00080 /// two values. 00081 namespace { 00082 struct Expression { 00083 uint32_t opcode; 00084 Type *type; 00085 SmallVector<uint32_t, 4> varargs; 00086 00087 Expression(uint32_t o = ~2U) : opcode(o) { } 00088 00089 bool operator==(const Expression &other) const { 00090 if (opcode != other.opcode) 00091 return false; 00092 if (opcode == ~0U || opcode == ~1U) 00093 return true; 00094 if (type != other.type) 00095 return false; 00096 if (varargs != other.varargs) 00097 return false; 00098 return true; 00099 } 00100 00101 friend hash_code hash_value(const Expression &Value) { 00102 return hash_combine(Value.opcode, Value.type, 00103 hash_combine_range(Value.varargs.begin(), 00104 Value.varargs.end())); 00105 } 00106 }; 00107 00108 class ValueTable { 00109 DenseMap<Value*, uint32_t> valueNumbering; 00110 DenseMap<Expression, uint32_t> expressionNumbering; 00111 AliasAnalysis *AA; 00112 MemoryDependenceAnalysis *MD; 00113 DominatorTree *DT; 00114 00115 uint32_t nextValueNumber; 00116 00117 Expression create_expression(Instruction* I); 00118 Expression create_cmp_expression(unsigned Opcode, 00119 CmpInst::Predicate Predicate, 00120 Value *LHS, Value *RHS); 00121 Expression create_extractvalue_expression(ExtractValueInst* EI); 00122 uint32_t lookup_or_add_call(CallInst* C); 00123 public: 00124 ValueTable() : nextValueNumber(1) { } 00125 uint32_t lookup_or_add(Value *V); 00126 uint32_t lookup(Value *V) const; 00127 uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred, 00128 Value *LHS, Value *RHS); 00129 void add(Value *V, uint32_t num); 00130 void clear(); 00131 void erase(Value *v); 00132 void setAliasAnalysis(AliasAnalysis* A) { AA = A; } 00133 AliasAnalysis *getAliasAnalysis() const { return AA; } 00134 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } 00135 void setDomTree(DominatorTree* D) { DT = D; } 00136 uint32_t getNextUnusedValueNumber() { return nextValueNumber; } 00137 void verifyRemoved(const Value *) const; 00138 }; 00139 } 00140 00141 namespace llvm { 00142 template <> struct DenseMapInfo<Expression> { 00143 static inline Expression getEmptyKey() { 00144 return ~0U; 00145 } 00146 00147 static inline Expression getTombstoneKey() { 00148 return ~1U; 00149 } 00150 00151 static unsigned getHashValue(const Expression e) { 00152 using llvm::hash_value; 00153 return static_cast<unsigned>(hash_value(e)); 00154 } 00155 static bool isEqual(const Expression &LHS, const Expression &RHS) { 00156 return LHS == RHS; 00157 } 00158 }; 00159 00160 } 00161 00162 //===----------------------------------------------------------------------===// 00163 // ValueTable Internal Functions 00164 //===----------------------------------------------------------------------===// 00165 00166 Expression ValueTable::create_expression(Instruction *I) { 00167 Expression e; 00168 e.type = I->getType(); 00169 e.opcode = I->getOpcode(); 00170 for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); 00171 OI != OE; ++OI) 00172 e.varargs.push_back(lookup_or_add(*OI)); 00173 if (I->isCommutative()) { 00174 // Ensure that commutative instructions that only differ by a permutation 00175 // of their operands get the same value number by sorting the operand value 00176 // numbers. Since all commutative instructions have two operands it is more 00177 // efficient to sort by hand rather than using, say, std::sort. 00178 assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); 00179 if (e.varargs[0] > e.varargs[1]) 00180 std::swap(e.varargs[0], e.varargs[1]); 00181 } 00182 00183 if (CmpInst *C = dyn_cast<CmpInst>(I)) { 00184 // Sort the operand value numbers so x<y and y>x get the same value number. 00185 CmpInst::Predicate Predicate = C->getPredicate(); 00186 if (e.varargs[0] > e.varargs[1]) { 00187 std::swap(e.varargs[0], e.varargs[1]); 00188 Predicate = CmpInst::getSwappedPredicate(Predicate); 00189 } 00190 e.opcode = (C->getOpcode() << 8) | Predicate; 00191 } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { 00192 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); 00193 II != IE; ++II) 00194 e.varargs.push_back(*II); 00195 } 00196 00197 return e; 00198 } 00199 00200 Expression ValueTable::create_cmp_expression(unsigned Opcode, 00201 CmpInst::Predicate Predicate, 00202 Value *LHS, Value *RHS) { 00203 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 00204 "Not a comparison!"); 00205 Expression e; 00206 e.type = CmpInst::makeCmpResultType(LHS->getType()); 00207 e.varargs.push_back(lookup_or_add(LHS)); 00208 e.varargs.push_back(lookup_or_add(RHS)); 00209 00210 // Sort the operand value numbers so x<y and y>x get the same value number. 00211 if (e.varargs[0] > e.varargs[1]) { 00212 std::swap(e.varargs[0], e.varargs[1]); 00213 Predicate = CmpInst::getSwappedPredicate(Predicate); 00214 } 00215 e.opcode = (Opcode << 8) | Predicate; 00216 return e; 00217 } 00218 00219 Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { 00220 assert(EI && "Not an ExtractValueInst?"); 00221 Expression e; 00222 e.type = EI->getType(); 00223 e.opcode = 0; 00224 00225 IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); 00226 if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { 00227 // EI might be an extract from one of our recognised intrinsics. If it 00228 // is we'll synthesize a semantically equivalent expression instead on 00229 // an extract value expression. 00230 switch (I->getIntrinsicID()) { 00231 case Intrinsic::sadd_with_overflow: 00232 case Intrinsic::uadd_with_overflow: 00233 e.opcode = Instruction::Add; 00234 break; 00235 case Intrinsic::ssub_with_overflow: 00236 case Intrinsic::usub_with_overflow: 00237 e.opcode = Instruction::Sub; 00238 break; 00239 case Intrinsic::smul_with_overflow: 00240 case Intrinsic::umul_with_overflow: 00241 e.opcode = Instruction::Mul; 00242 break; 00243 default: 00244 break; 00245 } 00246 00247 if (e.opcode != 0) { 00248 // Intrinsic recognized. Grab its args to finish building the expression. 00249 assert(I->getNumArgOperands() == 2 && 00250 "Expect two args for recognised intrinsics."); 00251 e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); 00252 e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); 00253 return e; 00254 } 00255 } 00256 00257 // Not a recognised intrinsic. Fall back to producing an extract value 00258 // expression. 00259 e.opcode = EI->getOpcode(); 00260 for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); 00261 OI != OE; ++OI) 00262 e.varargs.push_back(lookup_or_add(*OI)); 00263 00264 for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); 00265 II != IE; ++II) 00266 e.varargs.push_back(*II); 00267 00268 return e; 00269 } 00270 00271 //===----------------------------------------------------------------------===// 00272 // ValueTable External Functions 00273 //===----------------------------------------------------------------------===// 00274 00275 /// add - Insert a value into the table with a specified value number. 00276 void ValueTable::add(Value *V, uint32_t num) { 00277 valueNumbering.insert(std::make_pair(V, num)); 00278 } 00279 00280 uint32_t ValueTable::lookup_or_add_call(CallInst *C) { 00281 if (AA->doesNotAccessMemory(C)) { 00282 Expression exp = create_expression(C); 00283 uint32_t &e = expressionNumbering[exp]; 00284 if (!e) e = nextValueNumber++; 00285 valueNumbering[C] = e; 00286 return e; 00287 } else if (AA->onlyReadsMemory(C)) { 00288 Expression exp = create_expression(C); 00289 uint32_t &e = expressionNumbering[exp]; 00290 if (!e) { 00291 e = nextValueNumber++; 00292 valueNumbering[C] = e; 00293 return e; 00294 } 00295 if (!MD) { 00296 e = nextValueNumber++; 00297 valueNumbering[C] = e; 00298 return e; 00299 } 00300 00301 MemDepResult local_dep = MD->getDependency(C); 00302 00303 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 00304 valueNumbering[C] = nextValueNumber; 00305 return nextValueNumber++; 00306 } 00307 00308 if (local_dep.isDef()) { 00309 CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); 00310 00311 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { 00312 valueNumbering[C] = nextValueNumber; 00313 return nextValueNumber++; 00314 } 00315 00316 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 00317 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 00318 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); 00319 if (c_vn != cd_vn) { 00320 valueNumbering[C] = nextValueNumber; 00321 return nextValueNumber++; 00322 } 00323 } 00324 00325 uint32_t v = lookup_or_add(local_cdep); 00326 valueNumbering[C] = v; 00327 return v; 00328 } 00329 00330 // Non-local case. 00331 const MemoryDependenceAnalysis::NonLocalDepInfo &deps = 00332 MD->getNonLocalCallDependency(CallSite(C)); 00333 // FIXME: Move the checking logic to MemDep! 00334 CallInst* cdep = nullptr; 00335 00336 // Check to see if we have a single dominating call instruction that is 00337 // identical to C. 00338 for (unsigned i = 0, e = deps.size(); i != e; ++i) { 00339 const NonLocalDepEntry *I = &deps[i]; 00340 if (I->getResult().isNonLocal()) 00341 continue; 00342 00343 // We don't handle non-definitions. If we already have a call, reject 00344 // instruction dependencies. 00345 if (!I->getResult().isDef() || cdep != nullptr) { 00346 cdep = nullptr; 00347 break; 00348 } 00349 00350 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); 00351 // FIXME: All duplicated with non-local case. 00352 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ 00353 cdep = NonLocalDepCall; 00354 continue; 00355 } 00356 00357 cdep = nullptr; 00358 break; 00359 } 00360 00361 if (!cdep) { 00362 valueNumbering[C] = nextValueNumber; 00363 return nextValueNumber++; 00364 } 00365 00366 if (cdep->getNumArgOperands() != C->getNumArgOperands()) { 00367 valueNumbering[C] = nextValueNumber; 00368 return nextValueNumber++; 00369 } 00370 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { 00371 uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); 00372 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); 00373 if (c_vn != cd_vn) { 00374 valueNumbering[C] = nextValueNumber; 00375 return nextValueNumber++; 00376 } 00377 } 00378 00379 uint32_t v = lookup_or_add(cdep); 00380 valueNumbering[C] = v; 00381 return v; 00382 00383 } else { 00384 valueNumbering[C] = nextValueNumber; 00385 return nextValueNumber++; 00386 } 00387 } 00388 00389 /// lookup_or_add - Returns the value number for the specified value, assigning 00390 /// it a new number if it did not have one before. 00391 uint32_t ValueTable::lookup_or_add(Value *V) { 00392 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 00393 if (VI != valueNumbering.end()) 00394 return VI->second; 00395 00396 if (!isa<Instruction>(V)) { 00397 valueNumbering[V] = nextValueNumber; 00398 return nextValueNumber++; 00399 } 00400 00401 Instruction* I = cast<Instruction>(V); 00402 Expression exp; 00403 switch (I->getOpcode()) { 00404 case Instruction::Call: 00405 return lookup_or_add_call(cast<CallInst>(I)); 00406 case Instruction::Add: 00407 case Instruction::FAdd: 00408 case Instruction::Sub: 00409 case Instruction::FSub: 00410 case Instruction::Mul: 00411 case Instruction::FMul: 00412 case Instruction::UDiv: 00413 case Instruction::SDiv: 00414 case Instruction::FDiv: 00415 case Instruction::URem: 00416 case Instruction::SRem: 00417 case Instruction::FRem: 00418 case Instruction::Shl: 00419 case Instruction::LShr: 00420 case Instruction::AShr: 00421 case Instruction::And: 00422 case Instruction::Or: 00423 case Instruction::Xor: 00424 case Instruction::ICmp: 00425 case Instruction::FCmp: 00426 case Instruction::Trunc: 00427 case Instruction::ZExt: 00428 case Instruction::SExt: 00429 case Instruction::FPToUI: 00430 case Instruction::FPToSI: 00431 case Instruction::UIToFP: 00432 case Instruction::SIToFP: 00433 case Instruction::FPTrunc: 00434 case Instruction::FPExt: 00435 case Instruction::PtrToInt: 00436 case Instruction::IntToPtr: 00437 case Instruction::BitCast: 00438 case Instruction::Select: 00439 case Instruction::ExtractElement: 00440 case Instruction::InsertElement: 00441 case Instruction::ShuffleVector: 00442 case Instruction::InsertValue: 00443 case Instruction::GetElementPtr: 00444 exp = create_expression(I); 00445 break; 00446 case Instruction::ExtractValue: 00447 exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); 00448 break; 00449 default: 00450 valueNumbering[V] = nextValueNumber; 00451 return nextValueNumber++; 00452 } 00453 00454 uint32_t& e = expressionNumbering[exp]; 00455 if (!e) e = nextValueNumber++; 00456 valueNumbering[V] = e; 00457 return e; 00458 } 00459 00460 /// lookup - Returns the value number of the specified value. Fails if 00461 /// the value has not yet been numbered. 00462 uint32_t ValueTable::lookup(Value *V) const { 00463 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 00464 assert(VI != valueNumbering.end() && "Value not numbered?"); 00465 return VI->second; 00466 } 00467 00468 /// lookup_or_add_cmp - Returns the value number of the given comparison, 00469 /// assigning it a new number if it did not have one before. Useful when 00470 /// we deduced the result of a comparison, but don't immediately have an 00471 /// instruction realizing that comparison to hand. 00472 uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode, 00473 CmpInst::Predicate Predicate, 00474 Value *LHS, Value *RHS) { 00475 Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS); 00476 uint32_t& e = expressionNumbering[exp]; 00477 if (!e) e = nextValueNumber++; 00478 return e; 00479 } 00480 00481 /// clear - Remove all entries from the ValueTable. 00482 void ValueTable::clear() { 00483 valueNumbering.clear(); 00484 expressionNumbering.clear(); 00485 nextValueNumber = 1; 00486 } 00487 00488 /// erase - Remove a value from the value numbering. 00489 void ValueTable::erase(Value *V) { 00490 valueNumbering.erase(V); 00491 } 00492 00493 /// verifyRemoved - Verify that the value is removed from all internal data 00494 /// structures. 00495 void ValueTable::verifyRemoved(const Value *V) const { 00496 for (DenseMap<Value*, uint32_t>::const_iterator 00497 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { 00498 assert(I->first != V && "Inst still occurs in value numbering map!"); 00499 } 00500 } 00501 00502 //===----------------------------------------------------------------------===// 00503 // GVN Pass 00504 //===----------------------------------------------------------------------===// 00505 00506 namespace { 00507 class GVN; 00508 struct AvailableValueInBlock { 00509 /// BB - The basic block in question. 00510 BasicBlock *BB; 00511 enum ValType { 00512 SimpleVal, // A simple offsetted value that is accessed. 00513 LoadVal, // A value produced by a load. 00514 MemIntrin, // A memory intrinsic which is loaded from. 00515 UndefVal // A UndefValue representing a value from dead block (which 00516 // is not yet physically removed from the CFG). 00517 }; 00518 00519 /// V - The value that is live out of the block. 00520 PointerIntPair<Value *, 2, ValType> Val; 00521 00522 /// Offset - The byte offset in Val that is interesting for the load query. 00523 unsigned Offset; 00524 00525 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 00526 unsigned Offset = 0) { 00527 AvailableValueInBlock Res; 00528 Res.BB = BB; 00529 Res.Val.setPointer(V); 00530 Res.Val.setInt(SimpleVal); 00531 Res.Offset = Offset; 00532 return Res; 00533 } 00534 00535 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, 00536 unsigned Offset = 0) { 00537 AvailableValueInBlock Res; 00538 Res.BB = BB; 00539 Res.Val.setPointer(MI); 00540 Res.Val.setInt(MemIntrin); 00541 Res.Offset = Offset; 00542 return Res; 00543 } 00544 00545 static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, 00546 unsigned Offset = 0) { 00547 AvailableValueInBlock Res; 00548 Res.BB = BB; 00549 Res.Val.setPointer(LI); 00550 Res.Val.setInt(LoadVal); 00551 Res.Offset = Offset; 00552 return Res; 00553 } 00554 00555 static AvailableValueInBlock getUndef(BasicBlock *BB) { 00556 AvailableValueInBlock Res; 00557 Res.BB = BB; 00558 Res.Val.setPointer(nullptr); 00559 Res.Val.setInt(UndefVal); 00560 Res.Offset = 0; 00561 return Res; 00562 } 00563 00564 bool isSimpleValue() const { return Val.getInt() == SimpleVal; } 00565 bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } 00566 bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } 00567 bool isUndefValue() const { return Val.getInt() == UndefVal; } 00568 00569 Value *getSimpleValue() const { 00570 assert(isSimpleValue() && "Wrong accessor"); 00571 return Val.getPointer(); 00572 } 00573 00574 LoadInst *getCoercedLoadValue() const { 00575 assert(isCoercedLoadValue() && "Wrong accessor"); 00576 return cast<LoadInst>(Val.getPointer()); 00577 } 00578 00579 MemIntrinsic *getMemIntrinValue() const { 00580 assert(isMemIntrinValue() && "Wrong accessor"); 00581 return cast<MemIntrinsic>(Val.getPointer()); 00582 } 00583 00584 /// MaterializeAdjustedValue - Emit code into this block to adjust the value 00585 /// defined here to the specified type. This handles various coercion cases. 00586 Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; 00587 }; 00588 00589 class GVN : public FunctionPass { 00590 bool NoLoads; 00591 MemoryDependenceAnalysis *MD; 00592 DominatorTree *DT; 00593 const DataLayout *DL; 00594 const TargetLibraryInfo *TLI; 00595 AssumptionTracker *AT; 00596 SetVector<BasicBlock *> DeadBlocks; 00597 00598 ValueTable VN; 00599 00600 /// LeaderTable - A mapping from value numbers to lists of Value*'s that 00601 /// have that value number. Use findLeader to query it. 00602 struct LeaderTableEntry { 00603 Value *Val; 00604 const BasicBlock *BB; 00605 LeaderTableEntry *Next; 00606 }; 00607 DenseMap<uint32_t, LeaderTableEntry> LeaderTable; 00608 BumpPtrAllocator TableAllocator; 00609 00610 SmallVector<Instruction*, 8> InstrsToErase; 00611 00612 typedef SmallVector<NonLocalDepResult, 64> LoadDepVect; 00613 typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect; 00614 typedef SmallVector<BasicBlock*, 64> UnavailBlkVect; 00615 00616 public: 00617 static char ID; // Pass identification, replacement for typeid 00618 explicit GVN(bool noloads = false) 00619 : FunctionPass(ID), NoLoads(noloads), MD(nullptr) { 00620 initializeGVNPass(*PassRegistry::getPassRegistry()); 00621 } 00622 00623 bool runOnFunction(Function &F) override; 00624 00625 /// markInstructionForDeletion - This removes the specified instruction from 00626 /// our various maps and marks it for deletion. 00627 void markInstructionForDeletion(Instruction *I) { 00628 VN.erase(I); 00629 InstrsToErase.push_back(I); 00630 } 00631 00632 const DataLayout *getDataLayout() const { return DL; } 00633 DominatorTree &getDominatorTree() const { return *DT; } 00634 AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } 00635 MemoryDependenceAnalysis &getMemDep() const { return *MD; } 00636 private: 00637 /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for 00638 /// its value number. 00639 void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { 00640 LeaderTableEntry &Curr = LeaderTable[N]; 00641 if (!Curr.Val) { 00642 Curr.Val = V; 00643 Curr.BB = BB; 00644 return; 00645 } 00646 00647 LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); 00648 Node->Val = V; 00649 Node->BB = BB; 00650 Node->Next = Curr.Next; 00651 Curr.Next = Node; 00652 } 00653 00654 /// removeFromLeaderTable - Scan the list of values corresponding to a given 00655 /// value number, and remove the given instruction if encountered. 00656 void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { 00657 LeaderTableEntry* Prev = nullptr; 00658 LeaderTableEntry* Curr = &LeaderTable[N]; 00659 00660 while (Curr->Val != I || Curr->BB != BB) { 00661 Prev = Curr; 00662 Curr = Curr->Next; 00663 } 00664 00665 if (Prev) { 00666 Prev->Next = Curr->Next; 00667 } else { 00668 if (!Curr->Next) { 00669 Curr->Val = nullptr; 00670 Curr->BB = nullptr; 00671 } else { 00672 LeaderTableEntry* Next = Curr->Next; 00673 Curr->Val = Next->Val; 00674 Curr->BB = Next->BB; 00675 Curr->Next = Next->Next; 00676 } 00677 } 00678 } 00679 00680 // List of critical edges to be split between iterations. 00681 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit; 00682 00683 // This transformation requires dominator postdominator info 00684 void getAnalysisUsage(AnalysisUsage &AU) const override { 00685 AU.addRequired<AssumptionTracker>(); 00686 AU.addRequired<DominatorTreeWrapperPass>(); 00687 AU.addRequired<TargetLibraryInfo>(); 00688 if (!NoLoads) 00689 AU.addRequired<MemoryDependenceAnalysis>(); 00690 AU.addRequired<AliasAnalysis>(); 00691 00692 AU.addPreserved<DominatorTreeWrapperPass>(); 00693 AU.addPreserved<AliasAnalysis>(); 00694 } 00695 00696 00697 // Helper fuctions of redundant load elimination 00698 bool processLoad(LoadInst *L); 00699 bool processNonLocalLoad(LoadInst *L); 00700 void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 00701 AvailValInBlkVect &ValuesPerBlock, 00702 UnavailBlkVect &UnavailableBlocks); 00703 bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 00704 UnavailBlkVect &UnavailableBlocks); 00705 00706 // Other helper routines 00707 bool processInstruction(Instruction *I); 00708 bool processBlock(BasicBlock *BB); 00709 void dump(DenseMap<uint32_t, Value*> &d); 00710 bool iterateOnFunction(Function &F); 00711 bool performPRE(Function &F); 00712 Value *findLeader(const BasicBlock *BB, uint32_t num); 00713 void cleanupGlobalSets(); 00714 void verifyRemoved(const Instruction *I) const; 00715 bool splitCriticalEdges(); 00716 BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); 00717 unsigned replaceAllDominatedUsesWith(Value *From, Value *To, 00718 const BasicBlockEdge &Root); 00719 bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); 00720 bool processFoldableCondBr(BranchInst *BI); 00721 void addDeadBlock(BasicBlock *BB); 00722 void assignValNumForDeadCode(); 00723 }; 00724 00725 char GVN::ID = 0; 00726 } 00727 00728 // createGVNPass - The public interface to this file... 00729 FunctionPass *llvm::createGVNPass(bool NoLoads) { 00730 return new GVN(NoLoads); 00731 } 00732 00733 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) 00734 INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) 00735 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 00736 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 00737 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 00738 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 00739 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) 00740 00741 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 00742 void GVN::dump(DenseMap<uint32_t, Value*>& d) { 00743 errs() << "{\n"; 00744 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), 00745 E = d.end(); I != E; ++I) { 00746 errs() << I->first << "\n"; 00747 I->second->dump(); 00748 } 00749 errs() << "}\n"; 00750 } 00751 #endif 00752 00753 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value 00754 /// we're analyzing is fully available in the specified block. As we go, keep 00755 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This 00756 /// map is actually a tri-state map with the following values: 00757 /// 0) we know the block *is not* fully available. 00758 /// 1) we know the block *is* fully available. 00759 /// 2) we do not know whether the block is fully available or not, but we are 00760 /// currently speculating that it will be. 00761 /// 3) we are speculating for this block and have used that to speculate for 00762 /// other blocks. 00763 static bool IsValueFullyAvailableInBlock(BasicBlock *BB, 00764 DenseMap<BasicBlock*, char> &FullyAvailableBlocks, 00765 uint32_t RecurseDepth) { 00766 if (RecurseDepth > MaxRecurseDepth) 00767 return false; 00768 00769 // Optimistically assume that the block is fully available and check to see 00770 // if we already know about this block in one lookup. 00771 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV = 00772 FullyAvailableBlocks.insert(std::make_pair(BB, 2)); 00773 00774 // If the entry already existed for this block, return the precomputed value. 00775 if (!IV.second) { 00776 // If this is a speculative "available" value, mark it as being used for 00777 // speculation of other blocks. 00778 if (IV.first->second == 2) 00779 IV.first->second = 3; 00780 return IV.first->second != 0; 00781 } 00782 00783 // Otherwise, see if it is fully available in all predecessors. 00784 pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 00785 00786 // If this block has no predecessors, it isn't live-in here. 00787 if (PI == PE) 00788 goto SpeculationFailure; 00789 00790 for (; PI != PE; ++PI) 00791 // If the value isn't fully available in one of our predecessors, then it 00792 // isn't fully available in this block either. Undo our previous 00793 // optimistic assumption and bail out. 00794 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) 00795 goto SpeculationFailure; 00796 00797 return true; 00798 00799 // SpeculationFailure - If we get here, we found out that this is not, after 00800 // all, a fully-available block. We have a problem if we speculated on this and 00801 // used the speculation to mark other blocks as available. 00802 SpeculationFailure: 00803 char &BBVal = FullyAvailableBlocks[BB]; 00804 00805 // If we didn't speculate on this, just return with it set to false. 00806 if (BBVal == 2) { 00807 BBVal = 0; 00808 return false; 00809 } 00810 00811 // If we did speculate on this value, we could have blocks set to 1 that are 00812 // incorrect. Walk the (transitive) successors of this block and mark them as 00813 // 0 if set to one. 00814 SmallVector<BasicBlock*, 32> BBWorklist; 00815 BBWorklist.push_back(BB); 00816 00817 do { 00818 BasicBlock *Entry = BBWorklist.pop_back_val(); 00819 // Note that this sets blocks to 0 (unavailable) if they happen to not 00820 // already be in FullyAvailableBlocks. This is safe. 00821 char &EntryVal = FullyAvailableBlocks[Entry]; 00822 if (EntryVal == 0) continue; // Already unavailable. 00823 00824 // Mark as unavailable. 00825 EntryVal = 0; 00826 00827 BBWorklist.append(succ_begin(Entry), succ_end(Entry)); 00828 } while (!BBWorklist.empty()); 00829 00830 return false; 00831 } 00832 00833 00834 /// CanCoerceMustAliasedValueToLoad - Return true if 00835 /// CoerceAvailableValueToLoadType will succeed. 00836 static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, 00837 Type *LoadTy, 00838 const DataLayout &DL) { 00839 // If the loaded or stored value is an first class array or struct, don't try 00840 // to transform them. We need to be able to bitcast to integer. 00841 if (LoadTy->isStructTy() || LoadTy->isArrayTy() || 00842 StoredVal->getType()->isStructTy() || 00843 StoredVal->getType()->isArrayTy()) 00844 return false; 00845 00846 // The store has to be at least as big as the load. 00847 if (DL.getTypeSizeInBits(StoredVal->getType()) < 00848 DL.getTypeSizeInBits(LoadTy)) 00849 return false; 00850 00851 return true; 00852 } 00853 00854 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and 00855 /// then a load from a must-aliased pointer of a different type, try to coerce 00856 /// the stored value. LoadedTy is the type of the load we want to replace and 00857 /// InsertPt is the place to insert new instructions. 00858 /// 00859 /// If we can't do it, return null. 00860 static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 00861 Type *LoadedTy, 00862 Instruction *InsertPt, 00863 const DataLayout &DL) { 00864 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL)) 00865 return nullptr; 00866 00867 // If this is already the right type, just return it. 00868 Type *StoredValTy = StoredVal->getType(); 00869 00870 uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy); 00871 uint64_t LoadSize = DL.getTypeSizeInBits(LoadedTy); 00872 00873 // If the store and reload are the same size, we can always reuse it. 00874 if (StoreSize == LoadSize) { 00875 // Pointer to Pointer -> use bitcast. 00876 if (StoredValTy->getScalarType()->isPointerTy() && 00877 LoadedTy->getScalarType()->isPointerTy()) 00878 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); 00879 00880 // Convert source pointers to integers, which can be bitcast. 00881 if (StoredValTy->getScalarType()->isPointerTy()) { 00882 StoredValTy = DL.getIntPtrType(StoredValTy); 00883 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 00884 } 00885 00886 Type *TypeToCastTo = LoadedTy; 00887 if (TypeToCastTo->getScalarType()->isPointerTy()) 00888 TypeToCastTo = DL.getIntPtrType(TypeToCastTo); 00889 00890 if (StoredValTy != TypeToCastTo) 00891 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); 00892 00893 // Cast to pointer if the load needs a pointer type. 00894 if (LoadedTy->getScalarType()->isPointerTy()) 00895 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); 00896 00897 return StoredVal; 00898 } 00899 00900 // If the loaded value is smaller than the available value, then we can 00901 // extract out a piece from it. If the available value is too small, then we 00902 // can't do anything. 00903 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); 00904 00905 // Convert source pointers to integers, which can be manipulated. 00906 if (StoredValTy->getScalarType()->isPointerTy()) { 00907 StoredValTy = DL.getIntPtrType(StoredValTy); 00908 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); 00909 } 00910 00911 // Convert vectors and fp to integer, which can be manipulated. 00912 if (!StoredValTy->isIntegerTy()) { 00913 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); 00914 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); 00915 } 00916 00917 // If this is a big-endian system, we need to shift the value down to the low 00918 // bits so that a truncate will work. 00919 if (DL.isBigEndian()) { 00920 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); 00921 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); 00922 } 00923 00924 // Truncate the integer to the right size now. 00925 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); 00926 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); 00927 00928 if (LoadedTy == NewIntTy) 00929 return StoredVal; 00930 00931 // If the result is a pointer, inttoptr. 00932 if (LoadedTy->getScalarType()->isPointerTy()) 00933 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); 00934 00935 // Otherwise, bitcast. 00936 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); 00937 } 00938 00939 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a 00940 /// memdep query of a load that ends up being a clobbering memory write (store, 00941 /// memset, memcpy, memmove). This means that the write *may* provide bits used 00942 /// by the load but we can't be sure because the pointers don't mustalias. 00943 /// 00944 /// Check this case to see if there is anything more we can do before we give 00945 /// up. This returns -1 if we have to give up, or a byte number in the stored 00946 /// value of the piece that feeds the load. 00947 static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 00948 Value *WritePtr, 00949 uint64_t WriteSizeInBits, 00950 const DataLayout &DL) { 00951 // If the loaded or stored value is a first class array or struct, don't try 00952 // to transform them. We need to be able to bitcast to integer. 00953 if (LoadTy->isStructTy() || LoadTy->isArrayTy()) 00954 return -1; 00955 00956 int64_t StoreOffset = 0, LoadOffset = 0; 00957 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL); 00958 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL); 00959 if (StoreBase != LoadBase) 00960 return -1; 00961 00962 // If the load and store are to the exact same address, they should have been 00963 // a must alias. AA must have gotten confused. 00964 // FIXME: Study to see if/when this happens. One case is forwarding a memset 00965 // to a load from the base of the memset. 00966 #if 0 00967 if (LoadOffset == StoreOffset) { 00968 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" 00969 << "Base = " << *StoreBase << "\n" 00970 << "Store Ptr = " << *WritePtr << "\n" 00971 << "Store Offs = " << StoreOffset << "\n" 00972 << "Load Ptr = " << *LoadPtr << "\n"; 00973 abort(); 00974 } 00975 #endif 00976 00977 // If the load and store don't overlap at all, the store doesn't provide 00978 // anything to the load. In this case, they really don't alias at all, AA 00979 // must have gotten confused. 00980 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); 00981 00982 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 00983 return -1; 00984 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. 00985 LoadSize >>= 3; 00986 00987 00988 bool isAAFailure = false; 00989 if (StoreOffset < LoadOffset) 00990 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; 00991 else 00992 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; 00993 00994 if (isAAFailure) { 00995 #if 0 00996 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" 00997 << "Base = " << *StoreBase << "\n" 00998 << "Store Ptr = " << *WritePtr << "\n" 00999 << "Store Offs = " << StoreOffset << "\n" 01000 << "Load Ptr = " << *LoadPtr << "\n"; 01001 abort(); 01002 #endif 01003 return -1; 01004 } 01005 01006 // If the Load isn't completely contained within the stored bits, we don't 01007 // have all the bits to feed it. We could do something crazy in the future 01008 // (issue a smaller load then merge the bits in) but this seems unlikely to be 01009 // valuable. 01010 if (StoreOffset > LoadOffset || 01011 StoreOffset+StoreSize < LoadOffset+LoadSize) 01012 return -1; 01013 01014 // Okay, we can do this transformation. Return the number of bytes into the 01015 // store that the load is. 01016 return LoadOffset-StoreOffset; 01017 } 01018 01019 /// AnalyzeLoadFromClobberingStore - This function is called when we have a 01020 /// memdep query of a load that ends up being a clobbering store. 01021 static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 01022 StoreInst *DepSI, 01023 const DataLayout &DL) { 01024 // Cannot handle reading from store of first-class aggregate yet. 01025 if (DepSI->getValueOperand()->getType()->isStructTy() || 01026 DepSI->getValueOperand()->getType()->isArrayTy()) 01027 return -1; 01028 01029 Value *StorePtr = DepSI->getPointerOperand(); 01030 uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); 01031 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 01032 StorePtr, StoreSize, DL); 01033 } 01034 01035 /// AnalyzeLoadFromClobberingLoad - This function is called when we have a 01036 /// memdep query of a load that ends up being clobbered by another load. See if 01037 /// the other load can feed into the second load. 01038 static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, 01039 LoadInst *DepLI, const DataLayout &DL){ 01040 // Cannot handle reading from store of first-class aggregate yet. 01041 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 01042 return -1; 01043 01044 Value *DepPtr = DepLI->getPointerOperand(); 01045 uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); 01046 int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); 01047 if (R != -1) return R; 01048 01049 // If we have a load/load clobber an DepLI can be widened to cover this load, 01050 // then we should widen it! 01051 int64_t LoadOffs = 0; 01052 const Value *LoadBase = 01053 GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL); 01054 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 01055 01056 unsigned Size = MemoryDependenceAnalysis:: 01057 getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL); 01058 if (Size == 0) return -1; 01059 01060 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL); 01061 } 01062 01063 01064 01065 static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 01066 MemIntrinsic *MI, 01067 const DataLayout &DL) { 01068 // If the mem operation is a non-constant size, we can't handle it. 01069 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 01070 if (!SizeCst) return -1; 01071 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; 01072 01073 // If this is memset, we just need to see if the offset is valid in the size 01074 // of the memset.. 01075 if (MI->getIntrinsicID() == Intrinsic::memset) 01076 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 01077 MemSizeInBits, DL); 01078 01079 // If we have a memcpy/memmove, the only case we can handle is if this is a 01080 // copy from constant memory. In that case, we can read directly from the 01081 // constant memory. 01082 MemTransferInst *MTI = cast<MemTransferInst>(MI); 01083 01084 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 01085 if (!Src) return -1; 01086 01087 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL)); 01088 if (!GV || !GV->isConstant()) return -1; 01089 01090 // See if the access is within the bounds of the transfer. 01091 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, 01092 MI->getDest(), MemSizeInBits, DL); 01093 if (Offset == -1) 01094 return Offset; 01095 01096 unsigned AS = Src->getType()->getPointerAddressSpace(); 01097 // Otherwise, see if we can constant fold a load from the constant with the 01098 // offset applied as appropriate. 01099 Src = ConstantExpr::getBitCast(Src, 01100 Type::getInt8PtrTy(Src->getContext(), AS)); 01101 Constant *OffsetCst = 01102 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 01103 Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); 01104 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 01105 if (ConstantFoldLoadFromConstPtr(Src, &DL)) 01106 return Offset; 01107 return -1; 01108 } 01109 01110 01111 /// GetStoreValueForLoad - This function is called when we have a 01112 /// memdep query of a load that ends up being a clobbering store. This means 01113 /// that the store provides bits used by the load but we the pointers don't 01114 /// mustalias. Check this case to see if there is anything more we can do 01115 /// before we give up. 01116 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, 01117 Type *LoadTy, 01118 Instruction *InsertPt, const DataLayout &DL){ 01119 LLVMContext &Ctx = SrcVal->getType()->getContext(); 01120 01121 uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; 01122 uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; 01123 01124 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 01125 01126 // Compute which bits of the stored value are being used by the load. Convert 01127 // to an integer type to start with. 01128 if (SrcVal->getType()->getScalarType()->isPointerTy()) 01129 SrcVal = Builder.CreatePtrToInt(SrcVal, 01130 DL.getIntPtrType(SrcVal->getType())); 01131 if (!SrcVal->getType()->isIntegerTy()) 01132 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); 01133 01134 // Shift the bits to the least significant depending on endianness. 01135 unsigned ShiftAmt; 01136 if (DL.isLittleEndian()) 01137 ShiftAmt = Offset*8; 01138 else 01139 ShiftAmt = (StoreSize-LoadSize-Offset)*8; 01140 01141 if (ShiftAmt) 01142 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); 01143 01144 if (LoadSize != StoreSize) 01145 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); 01146 01147 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL); 01148 } 01149 01150 /// GetLoadValueForLoad - This function is called when we have a 01151 /// memdep query of a load that ends up being a clobbering load. This means 01152 /// that the load *may* provide bits used by the load but we can't be sure 01153 /// because the pointers don't mustalias. Check this case to see if there is 01154 /// anything more we can do before we give up. 01155 static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, 01156 Type *LoadTy, Instruction *InsertPt, 01157 GVN &gvn) { 01158 const DataLayout &DL = *gvn.getDataLayout(); 01159 // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to 01160 // widen SrcVal out to a larger load. 01161 unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()); 01162 unsigned LoadSize = DL.getTypeStoreSize(LoadTy); 01163 if (Offset+LoadSize > SrcValSize) { 01164 assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); 01165 assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); 01166 // If we have a load/load clobber an DepLI can be widened to cover this 01167 // load, then we should widen it to the next power of 2 size big enough! 01168 unsigned NewLoadSize = Offset+LoadSize; 01169 if (!isPowerOf2_32(NewLoadSize)) 01170 NewLoadSize = NextPowerOf2(NewLoadSize); 01171 01172 Value *PtrVal = SrcVal->getPointerOperand(); 01173 01174 // Insert the new load after the old load. This ensures that subsequent 01175 // memdep queries will find the new load. We can't easily remove the old 01176 // load completely because it is already in the value numbering table. 01177 IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); 01178 Type *DestPTy = 01179 IntegerType::get(LoadTy->getContext(), NewLoadSize*8); 01180 DestPTy = PointerType::get(DestPTy, 01181 PtrVal->getType()->getPointerAddressSpace()); 01182 Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); 01183 PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); 01184 LoadInst *NewLoad = Builder.CreateLoad(PtrVal); 01185 NewLoad->takeName(SrcVal); 01186 NewLoad->setAlignment(SrcVal->getAlignment()); 01187 01188 DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); 01189 DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); 01190 01191 // Replace uses of the original load with the wider load. On a big endian 01192 // system, we need to shift down to get the relevant bits. 01193 Value *RV = NewLoad; 01194 if (DL.isBigEndian()) 01195 RV = Builder.CreateLShr(RV, 01196 NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); 01197 RV = Builder.CreateTrunc(RV, SrcVal->getType()); 01198 SrcVal->replaceAllUsesWith(RV); 01199 01200 // We would like to use gvn.markInstructionForDeletion here, but we can't 01201 // because the load is already memoized into the leader map table that GVN 01202 // tracks. It is potentially possible to remove the load from the table, 01203 // but then there all of the operations based on it would need to be 01204 // rehashed. Just leave the dead load around. 01205 gvn.getMemDep().removeInstruction(SrcVal); 01206 SrcVal = NewLoad; 01207 } 01208 01209 return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL); 01210 } 01211 01212 01213 /// GetMemInstValueForLoad - This function is called when we have a 01214 /// memdep query of a load that ends up being a clobbering mem intrinsic. 01215 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 01216 Type *LoadTy, Instruction *InsertPt, 01217 const DataLayout &DL){ 01218 LLVMContext &Ctx = LoadTy->getContext(); 01219 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8; 01220 01221 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 01222 01223 // We know that this method is only called when the mem transfer fully 01224 // provides the bits for the load. 01225 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 01226 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 01227 // independently of what the offset is. 01228 Value *Val = MSI->getValue(); 01229 if (LoadSize != 1) 01230 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); 01231 01232 Value *OneElt = Val; 01233 01234 // Splat the value out to the right number of bits. 01235 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { 01236 // If we can double the number of bytes set, do it. 01237 if (NumBytesSet*2 <= LoadSize) { 01238 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); 01239 Val = Builder.CreateOr(Val, ShVal); 01240 NumBytesSet <<= 1; 01241 continue; 01242 } 01243 01244 // Otherwise insert one byte at a time. 01245 Value *ShVal = Builder.CreateShl(Val, 1*8); 01246 Val = Builder.CreateOr(OneElt, ShVal); 01247 ++NumBytesSet; 01248 } 01249 01250 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, DL); 01251 } 01252 01253 // Otherwise, this is a memcpy/memmove from a constant global. 01254 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 01255 Constant *Src = cast<Constant>(MTI->getSource()); 01256 unsigned AS = Src->getType()->getPointerAddressSpace(); 01257 01258 // Otherwise, see if we can constant fold a load from the constant with the 01259 // offset applied as appropriate. 01260 Src = ConstantExpr::getBitCast(Src, 01261 Type::getInt8PtrTy(Src->getContext(), AS)); 01262 Constant *OffsetCst = 01263 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); 01264 Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); 01265 Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); 01266 return ConstantFoldLoadFromConstPtr(Src, &DL); 01267 } 01268 01269 01270 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, 01271 /// construct SSA form, allowing us to eliminate LI. This returns the value 01272 /// that should be used at LI's definition site. 01273 static Value *ConstructSSAForLoadSet(LoadInst *LI, 01274 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 01275 GVN &gvn) { 01276 // Check for the fully redundant, dominating load case. In this case, we can 01277 // just use the dominating value directly. 01278 if (ValuesPerBlock.size() == 1 && 01279 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 01280 LI->getParent())) { 01281 assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block"); 01282 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); 01283 } 01284 01285 // Otherwise, we have to construct SSA form. 01286 SmallVector<PHINode*, 8> NewPHIs; 01287 SSAUpdater SSAUpdate(&NewPHIs); 01288 SSAUpdate.Initialize(LI->getType(), LI->getName()); 01289 01290 Type *LoadTy = LI->getType(); 01291 01292 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { 01293 const AvailableValueInBlock &AV = ValuesPerBlock[i]; 01294 BasicBlock *BB = AV.BB; 01295 01296 if (SSAUpdate.HasValueForBlock(BB)) 01297 continue; 01298 01299 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); 01300 } 01301 01302 // Perform PHI construction. 01303 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); 01304 01305 // If new PHI nodes were created, notify alias analysis. 01306 if (V->getType()->getScalarType()->isPointerTy()) { 01307 AliasAnalysis *AA = gvn.getAliasAnalysis(); 01308 01309 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) 01310 AA->copyValue(LI, NewPHIs[i]); 01311 01312 // Now that we've copied information to the new PHIs, scan through 01313 // them again and inform alias analysis that we've added potentially 01314 // escaping uses to any values that are operands to these PHIs. 01315 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { 01316 PHINode *P = NewPHIs[i]; 01317 for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) { 01318 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 01319 AA->addEscapingUse(P->getOperandUse(jj)); 01320 } 01321 } 01322 } 01323 01324 return V; 01325 } 01326 01327 Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { 01328 Value *Res; 01329 if (isSimpleValue()) { 01330 Res = getSimpleValue(); 01331 if (Res->getType() != LoadTy) { 01332 const DataLayout *DL = gvn.getDataLayout(); 01333 assert(DL && "Need target data to handle type mismatch case"); 01334 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), 01335 *DL); 01336 01337 DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " 01338 << *getSimpleValue() << '\n' 01339 << *Res << '\n' << "\n\n\n"); 01340 } 01341 } else if (isCoercedLoadValue()) { 01342 LoadInst *Load = getCoercedLoadValue(); 01343 if (Load->getType() == LoadTy && Offset == 0) { 01344 Res = Load; 01345 } else { 01346 Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), 01347 gvn); 01348 01349 DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " 01350 << *getCoercedLoadValue() << '\n' 01351 << *Res << '\n' << "\n\n\n"); 01352 } 01353 } else if (isMemIntrinValue()) { 01354 const DataLayout *DL = gvn.getDataLayout(); 01355 assert(DL && "Need target data to handle type mismatch case"); 01356 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, 01357 LoadTy, BB->getTerminator(), *DL); 01358 DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 01359 << " " << *getMemIntrinValue() << '\n' 01360 << *Res << '\n' << "\n\n\n"); 01361 } else { 01362 assert(isUndefValue() && "Should be UndefVal"); 01363 DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";); 01364 return UndefValue::get(LoadTy); 01365 } 01366 return Res; 01367 } 01368 01369 static bool isLifetimeStart(const Instruction *Inst) { 01370 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 01371 return II->getIntrinsicID() == Intrinsic::lifetime_start; 01372 return false; 01373 } 01374 01375 void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 01376 AvailValInBlkVect &ValuesPerBlock, 01377 UnavailBlkVect &UnavailableBlocks) { 01378 01379 // Filter out useless results (non-locals, etc). Keep track of the blocks 01380 // where we have a value available in repl, also keep track of whether we see 01381 // dependencies that produce an unknown value for the load (such as a call 01382 // that could potentially clobber the load). 01383 unsigned NumDeps = Deps.size(); 01384 for (unsigned i = 0, e = NumDeps; i != e; ++i) { 01385 BasicBlock *DepBB = Deps[i].getBB(); 01386 MemDepResult DepInfo = Deps[i].getResult(); 01387 01388 if (DeadBlocks.count(DepBB)) { 01389 // Dead dependent mem-op disguise as a load evaluating the same value 01390 // as the load in question. 01391 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); 01392 continue; 01393 } 01394 01395 if (!DepInfo.isDef() && !DepInfo.isClobber()) { 01396 UnavailableBlocks.push_back(DepBB); 01397 continue; 01398 } 01399 01400 if (DepInfo.isClobber()) { 01401 // The address being loaded in this non-local block may not be the same as 01402 // the pointer operand of the load if PHI translation occurs. Make sure 01403 // to consider the right address. 01404 Value *Address = Deps[i].getAddress(); 01405 01406 // If the dependence is to a store that writes to a superset of the bits 01407 // read by the load, we can extract the bits we need for the load from the 01408 // stored value. 01409 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { 01410 if (DL && Address) { 01411 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, 01412 DepSI, *DL); 01413 if (Offset != -1) { 01414 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 01415 DepSI->getValueOperand(), 01416 Offset)); 01417 continue; 01418 } 01419 } 01420 } 01421 01422 // Check to see if we have something like this: 01423 // load i32* P 01424 // load i8* (P+1) 01425 // if we have this, replace the later with an extraction from the former. 01426 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { 01427 // If this is a clobber and L is the first instruction in its block, then 01428 // we have the first instruction in the entry block. 01429 if (DepLI != LI && Address && DL) { 01430 int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, 01431 DepLI, *DL); 01432 01433 if (Offset != -1) { 01434 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, 01435 Offset)); 01436 continue; 01437 } 01438 } 01439 } 01440 01441 // If the clobbering value is a memset/memcpy/memmove, see if we can 01442 // forward a value on from it. 01443 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { 01444 if (DL && Address) { 01445 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, 01446 DepMI, *DL); 01447 if (Offset != -1) { 01448 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, 01449 Offset)); 01450 continue; 01451 } 01452 } 01453 } 01454 01455 UnavailableBlocks.push_back(DepBB); 01456 continue; 01457 } 01458 01459 // DepInfo.isDef() here 01460 01461 Instruction *DepInst = DepInfo.getInst(); 01462 01463 // Loading the allocation -> undef. 01464 if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || 01465 // Loading immediately after lifetime begin -> undef. 01466 isLifetimeStart(DepInst)) { 01467 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 01468 UndefValue::get(LI->getType()))); 01469 continue; 01470 } 01471 01472 // Loading from calloc (which zero initializes memory) -> zero 01473 if (isCallocLikeFn(DepInst, TLI)) { 01474 ValuesPerBlock.push_back(AvailableValueInBlock::get( 01475 DepBB, Constant::getNullValue(LI->getType()))); 01476 continue; 01477 } 01478 01479 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 01480 // Reject loads and stores that are to the same address but are of 01481 // different types if we have to. 01482 if (S->getValueOperand()->getType() != LI->getType()) { 01483 // If the stored value is larger or equal to the loaded value, we can 01484 // reuse it. 01485 if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), 01486 LI->getType(), *DL)) { 01487 UnavailableBlocks.push_back(DepBB); 01488 continue; 01489 } 01490 } 01491 01492 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, 01493 S->getValueOperand())); 01494 continue; 01495 } 01496 01497 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 01498 // If the types mismatch and we can't handle it, reject reuse of the load. 01499 if (LD->getType() != LI->getType()) { 01500 // If the stored value is larger or equal to the loaded value, we can 01501 // reuse it. 01502 if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { 01503 UnavailableBlocks.push_back(DepBB); 01504 continue; 01505 } 01506 } 01507 ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); 01508 continue; 01509 } 01510 01511 UnavailableBlocks.push_back(DepBB); 01512 } 01513 } 01514 01515 bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 01516 UnavailBlkVect &UnavailableBlocks) { 01517 // Okay, we have *some* definitions of the value. This means that the value 01518 // is available in some of our (transitive) predecessors. Lets think about 01519 // doing PRE of this load. This will involve inserting a new load into the 01520 // predecessor when it's not available. We could do this in general, but 01521 // prefer to not increase code size. As such, we only do this when we know 01522 // that we only have to insert *one* load (which means we're basically moving 01523 // the load, not inserting a new one). 01524 01525 SmallPtrSet<BasicBlock *, 4> Blockers; 01526 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 01527 Blockers.insert(UnavailableBlocks[i]); 01528 01529 // Let's find the first basic block with more than one predecessor. Walk 01530 // backwards through predecessors if needed. 01531 BasicBlock *LoadBB = LI->getParent(); 01532 BasicBlock *TmpBB = LoadBB; 01533 01534 while (TmpBB->getSinglePredecessor()) { 01535 TmpBB = TmpBB->getSinglePredecessor(); 01536 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 01537 return false; 01538 if (Blockers.count(TmpBB)) 01539 return false; 01540 01541 // If any of these blocks has more than one successor (i.e. if the edge we 01542 // just traversed was critical), then there are other paths through this 01543 // block along which the load may not be anticipated. Hoisting the load 01544 // above this block would be adding the load to execution paths along 01545 // which it was not previously executed. 01546 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 01547 return false; 01548 } 01549 01550 assert(TmpBB); 01551 LoadBB = TmpBB; 01552 01553 // Check to see how many predecessors have the loaded value fully 01554 // available. 01555 MapVector<BasicBlock *, Value *> PredLoads; 01556 DenseMap<BasicBlock*, char> FullyAvailableBlocks; 01557 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) 01558 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; 01559 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) 01560 FullyAvailableBlocks[UnavailableBlocks[i]] = false; 01561 01562 SmallVector<BasicBlock *, 4> CriticalEdgePred; 01563 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); 01564 PI != E; ++PI) { 01565 BasicBlock *Pred = *PI; 01566 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { 01567 continue; 01568 } 01569 01570 if (Pred->getTerminator()->getNumSuccessors() != 1) { 01571 if (isa<IndirectBrInst>(Pred->getTerminator())) { 01572 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 01573 << Pred->getName() << "': " << *LI << '\n'); 01574 return false; 01575 } 01576 01577 if (LoadBB->isLandingPad()) { 01578 DEBUG(dbgs() 01579 << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '" 01580 << Pred->getName() << "': " << *LI << '\n'); 01581 return false; 01582 } 01583 01584 CriticalEdgePred.push_back(Pred); 01585 } else { 01586 // Only add the predecessors that will not be split for now. 01587 PredLoads[Pred] = nullptr; 01588 } 01589 } 01590 01591 // Decide whether PRE is profitable for this load. 01592 unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); 01593 assert(NumUnavailablePreds != 0 && 01594 "Fully available value should already be eliminated!"); 01595 01596 // If this load is unavailable in multiple predecessors, reject it. 01597 // FIXME: If we could restructure the CFG, we could make a common pred with 01598 // all the preds that don't have an available LI and insert a new load into 01599 // that one block. 01600 if (NumUnavailablePreds != 1) 01601 return false; 01602 01603 // Split critical edges, and update the unavailable predecessors accordingly. 01604 for (BasicBlock *OrigPred : CriticalEdgePred) { 01605 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); 01606 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); 01607 PredLoads[NewPred] = nullptr; 01608 DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" 01609 << LoadBB->getName() << '\n'); 01610 } 01611 01612 // Check if the load can safely be moved to all the unavailable predecessors. 01613 bool CanDoPRE = true; 01614 SmallVector<Instruction*, 8> NewInsts; 01615 for (auto &PredLoad : PredLoads) { 01616 BasicBlock *UnavailablePred = PredLoad.first; 01617 01618 // Do PHI translation to get its value in the predecessor if necessary. The 01619 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 01620 01621 // If all preds have a single successor, then we know it is safe to insert 01622 // the load on the pred (?!?), so we can insert code to materialize the 01623 // pointer if it is not available. 01624 PHITransAddr Address(LI->getPointerOperand(), DL, AT); 01625 Value *LoadPtr = nullptr; 01626 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, 01627 *DT, NewInsts); 01628 01629 // If we couldn't find or insert a computation of this phi translated value, 01630 // we fail PRE. 01631 if (!LoadPtr) { 01632 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 01633 << *LI->getPointerOperand() << "\n"); 01634 CanDoPRE = false; 01635 break; 01636 } 01637 01638 PredLoad.second = LoadPtr; 01639 } 01640 01641 if (!CanDoPRE) { 01642 while (!NewInsts.empty()) { 01643 Instruction *I = NewInsts.pop_back_val(); 01644 if (MD) MD->removeInstruction(I); 01645 I->eraseFromParent(); 01646 } 01647 // HINT: Don't revert the edge-splitting as following transformation may 01648 // also need to split these critical edges. 01649 return !CriticalEdgePred.empty(); 01650 } 01651 01652 // Okay, we can eliminate this load by inserting a reload in the predecessor 01653 // and using PHI construction to get the value in the other predecessors, do 01654 // it. 01655 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); 01656 DEBUG(if (!NewInsts.empty()) 01657 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " 01658 << *NewInsts.back() << '\n'); 01659 01660 // Assign value numbers to the new instructions. 01661 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { 01662 // FIXME: We really _ought_ to insert these value numbers into their 01663 // parent's availability map. However, in doing so, we risk getting into 01664 // ordering issues. If a block hasn't been processed yet, we would be 01665 // marking a value as AVAIL-IN, which isn't what we intend. 01666 VN.lookup_or_add(NewInsts[i]); 01667 } 01668 01669 for (const auto &PredLoad : PredLoads) { 01670 BasicBlock *UnavailablePred = PredLoad.first; 01671 Value *LoadPtr = PredLoad.second; 01672 01673 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, 01674 LI->getAlignment(), 01675 UnavailablePred->getTerminator()); 01676 01677 // Transfer the old load's AA tags to the new load. 01678 AAMDNodes Tags; 01679 LI->getAAMetadata(Tags); 01680 if (Tags) 01681 NewLoad->setAAMetadata(Tags); 01682 01683 // Transfer DebugLoc. 01684 NewLoad->setDebugLoc(LI->getDebugLoc()); 01685 01686 // Add the newly created load. 01687 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, 01688 NewLoad)); 01689 MD->invalidateCachedPointerInfo(LoadPtr); 01690 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 01691 } 01692 01693 // Perform PHI construction. 01694 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 01695 LI->replaceAllUsesWith(V); 01696 if (isa<PHINode>(V)) 01697 V->takeName(LI); 01698 if (V->getType()->getScalarType()->isPointerTy()) 01699 MD->invalidateCachedPointerInfo(V); 01700 markInstructionForDeletion(LI); 01701 ++NumPRELoad; 01702 return true; 01703 } 01704 01705 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are 01706 /// non-local by performing PHI construction. 01707 bool GVN::processNonLocalLoad(LoadInst *LI) { 01708 // Step 1: Find the non-local dependencies of the load. 01709 LoadDepVect Deps; 01710 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); 01711 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); 01712 01713 // If we had to process more than one hundred blocks to find the 01714 // dependencies, this load isn't worth worrying about. Optimizing 01715 // it will be too expensive. 01716 unsigned NumDeps = Deps.size(); 01717 if (NumDeps > 100) 01718 return false; 01719 01720 // If we had a phi translation failure, we'll have a single entry which is a 01721 // clobber in the current block. Reject this early. 01722 if (NumDeps == 1 && 01723 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { 01724 DEBUG( 01725 dbgs() << "GVN: non-local load "; 01726 LI->printAsOperand(dbgs()); 01727 dbgs() << " has unknown dependencies\n"; 01728 ); 01729 return false; 01730 } 01731 01732 // Step 2: Analyze the availability of the load 01733 AvailValInBlkVect ValuesPerBlock; 01734 UnavailBlkVect UnavailableBlocks; 01735 AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); 01736 01737 // If we have no predecessors that produce a known value for this load, exit 01738 // early. 01739 if (ValuesPerBlock.empty()) 01740 return false; 01741 01742 // Step 3: Eliminate fully redundancy. 01743 // 01744 // If all of the instructions we depend on produce a known value for this 01745 // load, then it is fully redundant and we can use PHI insertion to compute 01746 // its value. Insert PHIs and remove the fully redundant value now. 01747 if (UnavailableBlocks.empty()) { 01748 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); 01749 01750 // Perform PHI construction. 01751 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); 01752 LI->replaceAllUsesWith(V); 01753 01754 if (isa<PHINode>(V)) 01755 V->takeName(LI); 01756 if (V->getType()->getScalarType()->isPointerTy()) 01757 MD->invalidateCachedPointerInfo(V); 01758 markInstructionForDeletion(LI); 01759 ++NumGVNLoad; 01760 return true; 01761 } 01762 01763 // Step 4: Eliminate partial redundancy. 01764 if (!EnablePRE || !EnableLoadPRE) 01765 return false; 01766 01767 return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); 01768 } 01769 01770 01771 static void patchReplacementInstruction(Instruction *I, Value *Repl) { 01772 // Patch the replacement so that it is not more restrictive than the value 01773 // being replaced. 01774 BinaryOperator *Op = dyn_cast<BinaryOperator>(I); 01775 BinaryOperator *ReplOp = dyn_cast<BinaryOperator>(Repl); 01776 if (Op && ReplOp && isa<OverflowingBinaryOperator>(Op) && 01777 isa<OverflowingBinaryOperator>(ReplOp)) { 01778 if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap()) 01779 ReplOp->setHasNoSignedWrap(false); 01780 if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap()) 01781 ReplOp->setHasNoUnsignedWrap(false); 01782 } 01783 if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) { 01784 // FIXME: If both the original and replacement value are part of the 01785 // same control-flow region (meaning that the execution of one 01786 // guarentees the executation of the other), then we can combine the 01787 // noalias scopes here and do better than the general conservative 01788 // answer used in combineMetadata(). 01789 01790 // In general, GVN unifies expressions over different control-flow 01791 // regions, and so we need a conservative combination of the noalias 01792 // scopes. 01793 unsigned KnownIDs[] = { 01794 LLVMContext::MD_tbaa, 01795 LLVMContext::MD_alias_scope, 01796 LLVMContext::MD_noalias, 01797 LLVMContext::MD_range, 01798 LLVMContext::MD_fpmath, 01799 LLVMContext::MD_invariant_load, 01800 }; 01801 combineMetadata(ReplInst, I, KnownIDs); 01802 } 01803 } 01804 01805 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { 01806 patchReplacementInstruction(I, Repl); 01807 I->replaceAllUsesWith(Repl); 01808 } 01809 01810 /// processLoad - Attempt to eliminate a load, first by eliminating it 01811 /// locally, and then attempting non-local elimination if that fails. 01812 bool GVN::processLoad(LoadInst *L) { 01813 if (!MD) 01814 return false; 01815 01816 if (!L->isSimple()) 01817 return false; 01818 01819 if (L->use_empty()) { 01820 markInstructionForDeletion(L); 01821 return true; 01822 } 01823 01824 // ... to a pointer that has been loaded from before... 01825 MemDepResult Dep = MD->getDependency(L); 01826 01827 // If we have a clobber and target data is around, see if this is a clobber 01828 // that we can fix up through code synthesis. 01829 if (Dep.isClobber() && DL) { 01830 // Check to see if we have something like this: 01831 // store i32 123, i32* %P 01832 // %A = bitcast i32* %P to i8* 01833 // %B = gep i8* %A, i32 1 01834 // %C = load i8* %B 01835 // 01836 // We could do that by recognizing if the clobber instructions are obviously 01837 // a common base + constant offset, and if the previous store (or memset) 01838 // completely covers this load. This sort of thing can happen in bitfield 01839 // access code. 01840 Value *AvailVal = nullptr; 01841 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { 01842 int Offset = AnalyzeLoadFromClobberingStore(L->getType(), 01843 L->getPointerOperand(), 01844 DepSI, *DL); 01845 if (Offset != -1) 01846 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, 01847 L->getType(), L, *DL); 01848 } 01849 01850 // Check to see if we have something like this: 01851 // load i32* P 01852 // load i8* (P+1) 01853 // if we have this, replace the later with an extraction from the former. 01854 if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { 01855 // If this is a clobber and L is the first instruction in its block, then 01856 // we have the first instruction in the entry block. 01857 if (DepLI == L) 01858 return false; 01859 01860 int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), 01861 L->getPointerOperand(), 01862 DepLI, *DL); 01863 if (Offset != -1) 01864 AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); 01865 } 01866 01867 // If the clobbering value is a memset/memcpy/memmove, see if we can forward 01868 // a value on from it. 01869 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { 01870 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), 01871 L->getPointerOperand(), 01872 DepMI, *DL); 01873 if (Offset != -1) 01874 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL); 01875 } 01876 01877 if (AvailVal) { 01878 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' 01879 << *AvailVal << '\n' << *L << "\n\n\n"); 01880 01881 // Replace the load! 01882 L->replaceAllUsesWith(AvailVal); 01883 if (AvailVal->getType()->getScalarType()->isPointerTy()) 01884 MD->invalidateCachedPointerInfo(AvailVal); 01885 markInstructionForDeletion(L); 01886 ++NumGVNLoad; 01887 return true; 01888 } 01889 } 01890 01891 // If the value isn't available, don't do anything! 01892 if (Dep.isClobber()) { 01893 DEBUG( 01894 // fast print dep, using operator<< on instruction is too slow. 01895 dbgs() << "GVN: load "; 01896 L->printAsOperand(dbgs()); 01897 Instruction *I = Dep.getInst(); 01898 dbgs() << " is clobbered by " << *I << '\n'; 01899 ); 01900 return false; 01901 } 01902 01903 // If it is defined in another block, try harder. 01904 if (Dep.isNonLocal()) 01905 return processNonLocalLoad(L); 01906 01907 if (!Dep.isDef()) { 01908 DEBUG( 01909 // fast print dep, using operator<< on instruction is too slow. 01910 dbgs() << "GVN: load "; 01911 L->printAsOperand(dbgs()); 01912 dbgs() << " has unknown dependence\n"; 01913 ); 01914 return false; 01915 } 01916 01917 Instruction *DepInst = Dep.getInst(); 01918 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 01919 Value *StoredVal = DepSI->getValueOperand(); 01920 01921 // The store and load are to a must-aliased pointer, but they may not 01922 // actually have the same type. See if we know how to reuse the stored 01923 // value (depending on its type). 01924 if (StoredVal->getType() != L->getType()) { 01925 if (DL) { 01926 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), 01927 L, *DL); 01928 if (!StoredVal) 01929 return false; 01930 01931 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal 01932 << '\n' << *L << "\n\n\n"); 01933 } 01934 else 01935 return false; 01936 } 01937 01938 // Remove it! 01939 L->replaceAllUsesWith(StoredVal); 01940 if (StoredVal->getType()->getScalarType()->isPointerTy()) 01941 MD->invalidateCachedPointerInfo(StoredVal); 01942 markInstructionForDeletion(L); 01943 ++NumGVNLoad; 01944 return true; 01945 } 01946 01947 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { 01948 Value *AvailableVal = DepLI; 01949 01950 // The loads are of a must-aliased pointer, but they may not actually have 01951 // the same type. See if we know how to reuse the previously loaded value 01952 // (depending on its type). 01953 if (DepLI->getType() != L->getType()) { 01954 if (DL) { 01955 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), 01956 L, *DL); 01957 if (!AvailableVal) 01958 return false; 01959 01960 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal 01961 << "\n" << *L << "\n\n\n"); 01962 } 01963 else 01964 return false; 01965 } 01966 01967 // Remove it! 01968 patchAndReplaceAllUsesWith(L, AvailableVal); 01969 if (DepLI->getType()->getScalarType()->isPointerTy()) 01970 MD->invalidateCachedPointerInfo(DepLI); 01971 markInstructionForDeletion(L); 01972 ++NumGVNLoad; 01973 return true; 01974 } 01975 01976 // If this load really doesn't depend on anything, then we must be loading an 01977 // undef value. This can happen when loading for a fresh allocation with no 01978 // intervening stores, for example. 01979 if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { 01980 L->replaceAllUsesWith(UndefValue::get(L->getType())); 01981 markInstructionForDeletion(L); 01982 ++NumGVNLoad; 01983 return true; 01984 } 01985 01986 // If this load occurs either right after a lifetime begin, 01987 // then the loaded value is undefined. 01988 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { 01989 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 01990 L->replaceAllUsesWith(UndefValue::get(L->getType())); 01991 markInstructionForDeletion(L); 01992 ++NumGVNLoad; 01993 return true; 01994 } 01995 } 01996 01997 // If this load follows a calloc (which zero initializes memory), 01998 // then the loaded value is zero 01999 if (isCallocLikeFn(DepInst, TLI)) { 02000 L->replaceAllUsesWith(Constant::getNullValue(L->getType())); 02001 markInstructionForDeletion(L); 02002 ++NumGVNLoad; 02003 return true; 02004 } 02005 02006 return false; 02007 } 02008 02009 // findLeader - In order to find a leader for a given value number at a 02010 // specific basic block, we first obtain the list of all Values for that number, 02011 // and then scan the list to find one whose block dominates the block in 02012 // question. This is fast because dominator tree queries consist of only 02013 // a few comparisons of DFS numbers. 02014 Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { 02015 LeaderTableEntry Vals = LeaderTable[num]; 02016 if (!Vals.Val) return nullptr; 02017 02018 Value *Val = nullptr; 02019 if (DT->dominates(Vals.BB, BB)) { 02020 Val = Vals.Val; 02021 if (isa<Constant>(Val)) return Val; 02022 } 02023 02024 LeaderTableEntry* Next = Vals.Next; 02025 while (Next) { 02026 if (DT->dominates(Next->BB, BB)) { 02027 if (isa<Constant>(Next->Val)) return Next->Val; 02028 if (!Val) Val = Next->Val; 02029 } 02030 02031 Next = Next->Next; 02032 } 02033 02034 return Val; 02035 } 02036 02037 /// replaceAllDominatedUsesWith - Replace all uses of 'From' with 'To' if the 02038 /// use is dominated by the given basic block. Returns the number of uses that 02039 /// were replaced. 02040 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, 02041 const BasicBlockEdge &Root) { 02042 unsigned Count = 0; 02043 for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); 02044 UI != UE; ) { 02045 Use &U = *UI++; 02046 02047 if (DT->dominates(Root, U)) { 02048 U.set(To); 02049 ++Count; 02050 } 02051 } 02052 return Count; 02053 } 02054 02055 /// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return 02056 /// true if every path from the entry block to 'Dst' passes via this edge. In 02057 /// particular 'Dst' must not be reachable via another edge from 'Src'. 02058 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, 02059 DominatorTree *DT) { 02060 // While in theory it is interesting to consider the case in which Dst has 02061 // more than one predecessor, because Dst might be part of a loop which is 02062 // only reachable from Src, in practice it is pointless since at the time 02063 // GVN runs all such loops have preheaders, which means that Dst will have 02064 // been changed to have only one predecessor, namely Src. 02065 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); 02066 const BasicBlock *Src = E.getStart(); 02067 assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); 02068 (void)Src; 02069 return Pred != nullptr; 02070 } 02071 02072 /// propagateEquality - The given values are known to be equal in every block 02073 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 02074 /// 'RHS' everywhere in the scope. Returns whether a change was made. 02075 bool GVN::propagateEquality(Value *LHS, Value *RHS, 02076 const BasicBlockEdge &Root) { 02077 SmallVector<std::pair<Value*, Value*>, 4> Worklist; 02078 Worklist.push_back(std::make_pair(LHS, RHS)); 02079 bool Changed = false; 02080 // For speed, compute a conservative fast approximation to 02081 // DT->dominates(Root, Root.getEnd()); 02082 bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); 02083 02084 while (!Worklist.empty()) { 02085 std::pair<Value*, Value*> Item = Worklist.pop_back_val(); 02086 LHS = Item.first; RHS = Item.second; 02087 02088 if (LHS == RHS) continue; 02089 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); 02090 02091 // Don't try to propagate equalities between constants. 02092 if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue; 02093 02094 // Prefer a constant on the right-hand side, or an Argument if no constants. 02095 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) 02096 std::swap(LHS, RHS); 02097 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); 02098 02099 // If there is no obvious reason to prefer the left-hand side over the right- 02100 // hand side, ensure the longest lived term is on the right-hand side, so the 02101 // shortest lived term will be replaced by the longest lived. This tends to 02102 // expose more simplifications. 02103 uint32_t LVN = VN.lookup_or_add(LHS); 02104 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || 02105 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { 02106 // Move the 'oldest' value to the right-hand side, using the value number as 02107 // a proxy for age. 02108 uint32_t RVN = VN.lookup_or_add(RHS); 02109 if (LVN < RVN) { 02110 std::swap(LHS, RHS); 02111 LVN = RVN; 02112 } 02113 } 02114 02115 // If value numbering later sees that an instruction in the scope is equal 02116 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve 02117 // the invariant that instructions only occur in the leader table for their 02118 // own value number (this is used by removeFromLeaderTable), do not do this 02119 // if RHS is an instruction (if an instruction in the scope is morphed into 02120 // LHS then it will be turned into RHS by the next GVN iteration anyway, so 02121 // using the leader table is about compiling faster, not optimizing better). 02122 // The leader table only tracks basic blocks, not edges. Only add to if we 02123 // have the simple case where the edge dominates the end. 02124 if (RootDominatesEnd && !isa<Instruction>(RHS)) 02125 addToLeaderTable(LVN, RHS, Root.getEnd()); 02126 02127 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As 02128 // LHS always has at least one use that is not dominated by Root, this will 02129 // never do anything if LHS has only one use. 02130 if (!LHS->hasOneUse()) { 02131 unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); 02132 Changed |= NumReplacements > 0; 02133 NumGVNEqProp += NumReplacements; 02134 } 02135 02136 // Now try to deduce additional equalities from this one. For example, if the 02137 // known equality was "(A != B)" == "false" then it follows that A and B are 02138 // equal in the scope. Only boolean equalities with an explicit true or false 02139 // RHS are currently supported. 02140 if (!RHS->getType()->isIntegerTy(1)) 02141 // Not a boolean equality - bail out. 02142 continue; 02143 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 02144 if (!CI) 02145 // RHS neither 'true' nor 'false' - bail out. 02146 continue; 02147 // Whether RHS equals 'true'. Otherwise it equals 'false'. 02148 bool isKnownTrue = CI->isAllOnesValue(); 02149 bool isKnownFalse = !isKnownTrue; 02150 02151 // If "A && B" is known true then both A and B are known true. If "A || B" 02152 // is known false then both A and B are known false. 02153 Value *A, *B; 02154 if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || 02155 (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { 02156 Worklist.push_back(std::make_pair(A, RHS)); 02157 Worklist.push_back(std::make_pair(B, RHS)); 02158 continue; 02159 } 02160 02161 // If we are propagating an equality like "(A == B)" == "true" then also 02162 // propagate the equality A == B. When propagating a comparison such as 02163 // "(A >= B)" == "true", replace all instances of "A < B" with "false". 02164 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { 02165 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 02166 02167 // If "A == B" is known true, or "A != B" is known false, then replace 02168 // A with B everywhere in the scope. 02169 if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || 02170 (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) 02171 Worklist.push_back(std::make_pair(Op0, Op1)); 02172 02173 // If "A >= B" is known true, replace "A < B" with false everywhere. 02174 CmpInst::Predicate NotPred = Cmp->getInversePredicate(); 02175 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); 02176 // Since we don't have the instruction "A < B" immediately to hand, work out 02177 // the value number that it would have and use that to find an appropriate 02178 // instruction (if any). 02179 uint32_t NextNum = VN.getNextUnusedValueNumber(); 02180 uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1); 02181 // If the number we were assigned was brand new then there is no point in 02182 // looking for an instruction realizing it: there cannot be one! 02183 if (Num < NextNum) { 02184 Value *NotCmp = findLeader(Root.getEnd(), Num); 02185 if (NotCmp && isa<Instruction>(NotCmp)) { 02186 unsigned NumReplacements = 02187 replaceAllDominatedUsesWith(NotCmp, NotVal, Root); 02188 Changed |= NumReplacements > 0; 02189 NumGVNEqProp += NumReplacements; 02190 } 02191 } 02192 // Ensure that any instruction in scope that gets the "A < B" value number 02193 // is replaced with false. 02194 // The leader table only tracks basic blocks, not edges. Only add to if we 02195 // have the simple case where the edge dominates the end. 02196 if (RootDominatesEnd) 02197 addToLeaderTable(Num, NotVal, Root.getEnd()); 02198 02199 continue; 02200 } 02201 } 02202 02203 return Changed; 02204 } 02205 02206 /// processInstruction - When calculating availability, handle an instruction 02207 /// by inserting it into the appropriate sets 02208 bool GVN::processInstruction(Instruction *I) { 02209 // Ignore dbg info intrinsics. 02210 if (isa<DbgInfoIntrinsic>(I)) 02211 return false; 02212 02213 // If the instruction can be easily simplified then do so now in preference 02214 // to value numbering it. Value numbering often exposes redundancies, for 02215 // example if it determines that %y is equal to %x then the instruction 02216 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 02217 if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AT)) { 02218 I->replaceAllUsesWith(V); 02219 if (MD && V->getType()->getScalarType()->isPointerTy()) 02220 MD->invalidateCachedPointerInfo(V); 02221 markInstructionForDeletion(I); 02222 ++NumGVNSimpl; 02223 return true; 02224 } 02225 02226 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 02227 if (processLoad(LI)) 02228 return true; 02229 02230 unsigned Num = VN.lookup_or_add(LI); 02231 addToLeaderTable(Num, LI, LI->getParent()); 02232 return false; 02233 } 02234 02235 // For conditional branches, we can perform simple conditional propagation on 02236 // the condition value itself. 02237 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 02238 if (!BI->isConditional()) 02239 return false; 02240 02241 if (isa<Constant>(BI->getCondition())) 02242 return processFoldableCondBr(BI); 02243 02244 Value *BranchCond = BI->getCondition(); 02245 BasicBlock *TrueSucc = BI->getSuccessor(0); 02246 BasicBlock *FalseSucc = BI->getSuccessor(1); 02247 // Avoid multiple edges early. 02248 if (TrueSucc == FalseSucc) 02249 return false; 02250 02251 BasicBlock *Parent = BI->getParent(); 02252 bool Changed = false; 02253 02254 Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); 02255 BasicBlockEdge TrueE(Parent, TrueSucc); 02256 Changed |= propagateEquality(BranchCond, TrueVal, TrueE); 02257 02258 Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); 02259 BasicBlockEdge FalseE(Parent, FalseSucc); 02260 Changed |= propagateEquality(BranchCond, FalseVal, FalseE); 02261 02262 return Changed; 02263 } 02264 02265 // For switches, propagate the case values into the case destinations. 02266 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 02267 Value *SwitchCond = SI->getCondition(); 02268 BasicBlock *Parent = SI->getParent(); 02269 bool Changed = false; 02270 02271 // Remember how many outgoing edges there are to every successor. 02272 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; 02273 for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) 02274 ++SwitchEdges[SI->getSuccessor(i)]; 02275 02276 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 02277 i != e; ++i) { 02278 BasicBlock *Dst = i.getCaseSuccessor(); 02279 // If there is only a single edge, propagate the case value into it. 02280 if (SwitchEdges.lookup(Dst) == 1) { 02281 BasicBlockEdge E(Parent, Dst); 02282 Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); 02283 } 02284 } 02285 return Changed; 02286 } 02287 02288 // Instructions with void type don't return a value, so there's 02289 // no point in trying to find redundancies in them. 02290 if (I->getType()->isVoidTy()) return false; 02291 02292 uint32_t NextNum = VN.getNextUnusedValueNumber(); 02293 unsigned Num = VN.lookup_or_add(I); 02294 02295 // Allocations are always uniquely numbered, so we can save time and memory 02296 // by fast failing them. 02297 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) { 02298 addToLeaderTable(Num, I, I->getParent()); 02299 return false; 02300 } 02301 02302 // If the number we were assigned was a brand new VN, then we don't 02303 // need to do a lookup to see if the number already exists 02304 // somewhere in the domtree: it can't! 02305 if (Num >= NextNum) { 02306 addToLeaderTable(Num, I, I->getParent()); 02307 return false; 02308 } 02309 02310 // Perform fast-path value-number based elimination of values inherited from 02311 // dominators. 02312 Value *repl = findLeader(I->getParent(), Num); 02313 if (!repl) { 02314 // Failure, just remember this instance for future use. 02315 addToLeaderTable(Num, I, I->getParent()); 02316 return false; 02317 } 02318 02319 // Remove it! 02320 patchAndReplaceAllUsesWith(I, repl); 02321 if (MD && repl->getType()->getScalarType()->isPointerTy()) 02322 MD->invalidateCachedPointerInfo(repl); 02323 markInstructionForDeletion(I); 02324 return true; 02325 } 02326 02327 /// runOnFunction - This is the main transformation entry point for a function. 02328 bool GVN::runOnFunction(Function& F) { 02329 if (skipOptnoneFunction(F)) 02330 return false; 02331 02332 if (!NoLoads) 02333 MD = &getAnalysis<MemoryDependenceAnalysis>(); 02334 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 02335 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 02336 DL = DLP ? &DLP->getDataLayout() : nullptr; 02337 AT = &getAnalysis<AssumptionTracker>(); 02338 TLI = &getAnalysis<TargetLibraryInfo>(); 02339 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); 02340 VN.setMemDep(MD); 02341 VN.setDomTree(DT); 02342 02343 bool Changed = false; 02344 bool ShouldContinue = true; 02345 02346 // Merge unconditional branches, allowing PRE to catch more 02347 // optimization opportunities. 02348 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { 02349 BasicBlock *BB = FI++; 02350 02351 bool removedBlock = MergeBlockIntoPredecessor(BB, this); 02352 if (removedBlock) ++NumGVNBlocks; 02353 02354 Changed |= removedBlock; 02355 } 02356 02357 unsigned Iteration = 0; 02358 while (ShouldContinue) { 02359 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 02360 ShouldContinue = iterateOnFunction(F); 02361 Changed |= ShouldContinue; 02362 ++Iteration; 02363 } 02364 02365 if (EnablePRE) { 02366 // Fabricate val-num for dead-code in order to suppress assertion in 02367 // performPRE(). 02368 assignValNumForDeadCode(); 02369 bool PREChanged = true; 02370 while (PREChanged) { 02371 PREChanged = performPRE(F); 02372 Changed |= PREChanged; 02373 } 02374 } 02375 02376 // FIXME: Should perform GVN again after PRE does something. PRE can move 02377 // computations into blocks where they become fully redundant. Note that 02378 // we can't do this until PRE's critical edge splitting updates memdep. 02379 // Actually, when this happens, we should just fully integrate PRE into GVN. 02380 02381 cleanupGlobalSets(); 02382 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each 02383 // iteration. 02384 DeadBlocks.clear(); 02385 02386 return Changed; 02387 } 02388 02389 02390 bool GVN::processBlock(BasicBlock *BB) { 02391 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 02392 // (and incrementing BI before processing an instruction). 02393 assert(InstrsToErase.empty() && 02394 "We expect InstrsToErase to be empty across iterations"); 02395 if (DeadBlocks.count(BB)) 02396 return false; 02397 02398 bool ChangedFunction = false; 02399 02400 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 02401 BI != BE;) { 02402 ChangedFunction |= processInstruction(BI); 02403 if (InstrsToErase.empty()) { 02404 ++BI; 02405 continue; 02406 } 02407 02408 // If we need some instructions deleted, do it now. 02409 NumGVNInstr += InstrsToErase.size(); 02410 02411 // Avoid iterator invalidation. 02412 bool AtStart = BI == BB->begin(); 02413 if (!AtStart) 02414 --BI; 02415 02416 for (SmallVectorImpl<Instruction *>::iterator I = InstrsToErase.begin(), 02417 E = InstrsToErase.end(); I != E; ++I) { 02418 DEBUG(dbgs() << "GVN removed: " << **I << '\n'); 02419 if (MD) MD->removeInstruction(*I); 02420 DEBUG(verifyRemoved(*I)); 02421 (*I)->eraseFromParent(); 02422 } 02423 InstrsToErase.clear(); 02424 02425 if (AtStart) 02426 BI = BB->begin(); 02427 else 02428 ++BI; 02429 } 02430 02431 return ChangedFunction; 02432 } 02433 02434 /// performPRE - Perform a purely local form of PRE that looks for diamond 02435 /// control flow patterns and attempts to perform simple PRE at the join point. 02436 bool GVN::performPRE(Function &F) { 02437 bool Changed = false; 02438 SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; 02439 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { 02440 // Nothing to PRE in the entry block. 02441 if (CurrentBlock == &F.getEntryBlock()) continue; 02442 02443 // Don't perform PRE on a landing pad. 02444 if (CurrentBlock->isLandingPad()) continue; 02445 02446 for (BasicBlock::iterator BI = CurrentBlock->begin(), 02447 BE = CurrentBlock->end(); BI != BE; ) { 02448 Instruction *CurInst = BI++; 02449 02450 if (isa<AllocaInst>(CurInst) || 02451 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || 02452 CurInst->getType()->isVoidTy() || 02453 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 02454 isa<DbgInfoIntrinsic>(CurInst)) 02455 continue; 02456 02457 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from 02458 // sinking the compare again, and it would force the code generator to 02459 // move the i1 from processor flags or predicate registers into a general 02460 // purpose register. 02461 if (isa<CmpInst>(CurInst)) 02462 continue; 02463 02464 // We don't currently value number ANY inline asm calls. 02465 if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) 02466 if (CallI->isInlineAsm()) 02467 continue; 02468 02469 uint32_t ValNo = VN.lookup(CurInst); 02470 02471 // Look for the predecessors for PRE opportunities. We're 02472 // only trying to solve the basic diamond case, where 02473 // a value is computed in the successor and one predecessor, 02474 // but not the other. We also explicitly disallow cases 02475 // where the successor is its own predecessor, because they're 02476 // more complicated to get right. 02477 unsigned NumWith = 0; 02478 unsigned NumWithout = 0; 02479 BasicBlock *PREPred = nullptr; 02480 predMap.clear(); 02481 02482 for (pred_iterator PI = pred_begin(CurrentBlock), 02483 PE = pred_end(CurrentBlock); PI != PE; ++PI) { 02484 BasicBlock *P = *PI; 02485 // We're not interested in PRE where the block is its 02486 // own predecessor, or in blocks with predecessors 02487 // that are not reachable. 02488 if (P == CurrentBlock) { 02489 NumWithout = 2; 02490 break; 02491 } else if (!DT->isReachableFromEntry(P)) { 02492 NumWithout = 2; 02493 break; 02494 } 02495 02496 Value* predV = findLeader(P, ValNo); 02497 if (!predV) { 02498 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); 02499 PREPred = P; 02500 ++NumWithout; 02501 } else if (predV == CurInst) { 02502 /* CurInst dominates this predecessor. */ 02503 NumWithout = 2; 02504 break; 02505 } else { 02506 predMap.push_back(std::make_pair(predV, P)); 02507 ++NumWith; 02508 } 02509 } 02510 02511 // Don't do PRE when it might increase code size, i.e. when 02512 // we would need to insert instructions in more than one pred. 02513 if (NumWithout != 1 || NumWith == 0) 02514 continue; 02515 02516 // Don't do PRE across indirect branch. 02517 if (isa<IndirectBrInst>(PREPred->getTerminator())) 02518 continue; 02519 02520 // We can't do PRE safely on a critical edge, so instead we schedule 02521 // the edge to be split and perform the PRE the next time we iterate 02522 // on the function. 02523 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 02524 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 02525 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 02526 continue; 02527 } 02528 02529 // Instantiate the expression in the predecessor that lacked it. 02530 // Because we are going top-down through the block, all value numbers 02531 // will be available in the predecessor by the time we need them. Any 02532 // that weren't originally present will have been instantiated earlier 02533 // in this loop. 02534 Instruction *PREInstr = CurInst->clone(); 02535 bool success = true; 02536 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { 02537 Value *Op = PREInstr->getOperand(i); 02538 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 02539 continue; 02540 02541 if (Value *V = findLeader(PREPred, VN.lookup(Op))) { 02542 PREInstr->setOperand(i, V); 02543 } else { 02544 success = false; 02545 break; 02546 } 02547 } 02548 02549 // Fail out if we encounter an operand that is not available in 02550 // the PRE predecessor. This is typically because of loads which 02551 // are not value numbered precisely. 02552 if (!success) { 02553 DEBUG(verifyRemoved(PREInstr)); 02554 delete PREInstr; 02555 continue; 02556 } 02557 02558 PREInstr->insertBefore(PREPred->getTerminator()); 02559 PREInstr->setName(CurInst->getName() + ".pre"); 02560 PREInstr->setDebugLoc(CurInst->getDebugLoc()); 02561 VN.add(PREInstr, ValNo); 02562 ++NumGVNPRE; 02563 02564 // Update the availability map to include the new instruction. 02565 addToLeaderTable(ValNo, PREInstr, PREPred); 02566 02567 // Create a PHI to make the value available in this block. 02568 PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), 02569 CurInst->getName() + ".pre-phi", 02570 CurrentBlock->begin()); 02571 for (unsigned i = 0, e = predMap.size(); i != e; ++i) { 02572 if (Value *V = predMap[i].first) 02573 Phi->addIncoming(V, predMap[i].second); 02574 else 02575 Phi->addIncoming(PREInstr, PREPred); 02576 } 02577 02578 VN.add(Phi, ValNo); 02579 addToLeaderTable(ValNo, Phi, CurrentBlock); 02580 Phi->setDebugLoc(CurInst->getDebugLoc()); 02581 CurInst->replaceAllUsesWith(Phi); 02582 if (Phi->getType()->getScalarType()->isPointerTy()) { 02583 // Because we have added a PHI-use of the pointer value, it has now 02584 // "escaped" from alias analysis' perspective. We need to inform 02585 // AA of this. 02586 for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; 02587 ++ii) { 02588 unsigned jj = PHINode::getOperandNumForIncomingValue(ii); 02589 VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); 02590 } 02591 02592 if (MD) 02593 MD->invalidateCachedPointerInfo(Phi); 02594 } 02595 VN.erase(CurInst); 02596 removeFromLeaderTable(ValNo, CurInst, CurrentBlock); 02597 02598 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 02599 if (MD) MD->removeInstruction(CurInst); 02600 DEBUG(verifyRemoved(CurInst)); 02601 CurInst->eraseFromParent(); 02602 Changed = true; 02603 } 02604 } 02605 02606 if (splitCriticalEdges()) 02607 Changed = true; 02608 02609 return Changed; 02610 } 02611 02612 /// Split the critical edge connecting the given two blocks, and return 02613 /// the block inserted to the critical edge. 02614 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { 02615 BasicBlock *BB = SplitCriticalEdge(Pred, Succ, this); 02616 if (MD) 02617 MD->invalidateCachedPredecessors(); 02618 return BB; 02619 } 02620 02621 /// splitCriticalEdges - Split critical edges found during the previous 02622 /// iteration that may enable further optimization. 02623 bool GVN::splitCriticalEdges() { 02624 if (toSplit.empty()) 02625 return false; 02626 do { 02627 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val(); 02628 SplitCriticalEdge(Edge.first, Edge.second, this); 02629 } while (!toSplit.empty()); 02630 if (MD) MD->invalidateCachedPredecessors(); 02631 return true; 02632 } 02633 02634 /// iterateOnFunction - Executes one iteration of GVN 02635 bool GVN::iterateOnFunction(Function &F) { 02636 cleanupGlobalSets(); 02637 02638 // Top-down walk of the dominator tree 02639 bool Changed = false; 02640 #if 0 02641 // Needed for value numbering with phi construction to work. 02642 ReversePostOrderTraversal<Function*> RPOT(&F); 02643 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(), 02644 RE = RPOT.end(); RI != RE; ++RI) 02645 Changed |= processBlock(*RI); 02646 #else 02647 // Save the blocks this function have before transformation begins. GVN may 02648 // split critical edge, and hence may invalidate the RPO/DT iterator. 02649 // 02650 std::vector<BasicBlock *> BBVect; 02651 BBVect.reserve(256); 02652 for (DomTreeNode *X : depth_first(DT->getRootNode())) 02653 BBVect.push_back(X->getBlock()); 02654 02655 for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); 02656 I != E; I++) 02657 Changed |= processBlock(*I); 02658 #endif 02659 02660 return Changed; 02661 } 02662 02663 void GVN::cleanupGlobalSets() { 02664 VN.clear(); 02665 LeaderTable.clear(); 02666 TableAllocator.Reset(); 02667 } 02668 02669 /// verifyRemoved - Verify that the specified instruction does not occur in our 02670 /// internal data structures. 02671 void GVN::verifyRemoved(const Instruction *Inst) const { 02672 VN.verifyRemoved(Inst); 02673 02674 // Walk through the value number scope to make sure the instruction isn't 02675 // ferreted away in it. 02676 for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator 02677 I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { 02678 const LeaderTableEntry *Node = &I->second; 02679 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 02680 02681 while (Node->Next) { 02682 Node = Node->Next; 02683 assert(Node->Val != Inst && "Inst still in value numbering scope!"); 02684 } 02685 } 02686 } 02687 02688 // BB is declared dead, which implied other blocks become dead as well. This 02689 // function is to add all these blocks to "DeadBlocks". For the dead blocks' 02690 // live successors, update their phi nodes by replacing the operands 02691 // corresponding to dead blocks with UndefVal. 02692 // 02693 void GVN::addDeadBlock(BasicBlock *BB) { 02694 SmallVector<BasicBlock *, 4> NewDead; 02695 SmallSetVector<BasicBlock *, 4> DF; 02696 02697 NewDead.push_back(BB); 02698 while (!NewDead.empty()) { 02699 BasicBlock *D = NewDead.pop_back_val(); 02700 if (DeadBlocks.count(D)) 02701 continue; 02702 02703 // All blocks dominated by D are dead. 02704 SmallVector<BasicBlock *, 8> Dom; 02705 DT->getDescendants(D, Dom); 02706 DeadBlocks.insert(Dom.begin(), Dom.end()); 02707 02708 // Figure out the dominance-frontier(D). 02709 for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(), 02710 E = Dom.end(); I != E; I++) { 02711 BasicBlock *B = *I; 02712 for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) { 02713 BasicBlock *S = *SI; 02714 if (DeadBlocks.count(S)) 02715 continue; 02716 02717 bool AllPredDead = true; 02718 for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++) 02719 if (!DeadBlocks.count(*PI)) { 02720 AllPredDead = false; 02721 break; 02722 } 02723 02724 if (!AllPredDead) { 02725 // S could be proved dead later on. That is why we don't update phi 02726 // operands at this moment. 02727 DF.insert(S); 02728 } else { 02729 // While S is not dominated by D, it is dead by now. This could take 02730 // place if S already have a dead predecessor before D is declared 02731 // dead. 02732 NewDead.push_back(S); 02733 } 02734 } 02735 } 02736 } 02737 02738 // For the dead blocks' live successors, update their phi nodes by replacing 02739 // the operands corresponding to dead blocks with UndefVal. 02740 for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end(); 02741 I != E; I++) { 02742 BasicBlock *B = *I; 02743 if (DeadBlocks.count(B)) 02744 continue; 02745 02746 SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B)); 02747 for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(), 02748 PE = Preds.end(); PI != PE; PI++) { 02749 BasicBlock *P = *PI; 02750 02751 if (!DeadBlocks.count(P)) 02752 continue; 02753 02754 if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) { 02755 if (BasicBlock *S = splitCriticalEdges(P, B)) 02756 DeadBlocks.insert(P = S); 02757 } 02758 02759 for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) { 02760 PHINode &Phi = cast<PHINode>(*II); 02761 Phi.setIncomingValue(Phi.getBasicBlockIndex(P), 02762 UndefValue::get(Phi.getType())); 02763 } 02764 } 02765 } 02766 } 02767 02768 // If the given branch is recognized as a foldable branch (i.e. conditional 02769 // branch with constant condition), it will perform following analyses and 02770 // transformation. 02771 // 1) If the dead out-coming edge is a critical-edge, split it. Let 02772 // R be the target of the dead out-coming edge. 02773 // 1) Identify the set of dead blocks implied by the branch's dead outcoming 02774 // edge. The result of this step will be {X| X is dominated by R} 02775 // 2) Identify those blocks which haves at least one dead prodecessor. The 02776 // result of this step will be dominance-frontier(R). 02777 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to 02778 // dead blocks with "UndefVal" in an hope these PHIs will optimized away. 02779 // 02780 // Return true iff *NEW* dead code are found. 02781 bool GVN::processFoldableCondBr(BranchInst *BI) { 02782 if (!BI || BI->isUnconditional()) 02783 return false; 02784 02785 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); 02786 if (!Cond) 02787 return false; 02788 02789 BasicBlock *DeadRoot = Cond->getZExtValue() ? 02790 BI->getSuccessor(1) : BI->getSuccessor(0); 02791 if (DeadBlocks.count(DeadRoot)) 02792 return false; 02793 02794 if (!DeadRoot->getSinglePredecessor()) 02795 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); 02796 02797 addDeadBlock(DeadRoot); 02798 return true; 02799 } 02800 02801 // performPRE() will trigger assert if it comes across an instruction without 02802 // associated val-num. As it normally has far more live instructions than dead 02803 // instructions, it makes more sense just to "fabricate" a val-number for the 02804 // dead code than checking if instruction involved is dead or not. 02805 void GVN::assignValNumForDeadCode() { 02806 for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(), 02807 E = DeadBlocks.end(); I != E; I++) { 02808 BasicBlock *BB = *I; 02809 for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); 02810 II != EE; II++) { 02811 Instruction *Inst = &*II; 02812 unsigned ValNum = VN.lookup_or_add(Inst); 02813 addToLeaderTable(ValNum, Inst, BB); 02814 } 02815 } 02816 }