LLVM API Documentation
00001 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements inline cost analysis. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "llvm/Analysis/InlineCost.h" 00015 #include "llvm/ADT/STLExtras.h" 00016 #include "llvm/ADT/SetVector.h" 00017 #include "llvm/ADT/SmallPtrSet.h" 00018 #include "llvm/ADT/SmallVector.h" 00019 #include "llvm/ADT/Statistic.h" 00020 #include "llvm/Analysis/AssumptionTracker.h" 00021 #include "llvm/Analysis/ConstantFolding.h" 00022 #include "llvm/Analysis/CodeMetrics.h" 00023 #include "llvm/Analysis/InstructionSimplify.h" 00024 #include "llvm/Analysis/TargetTransformInfo.h" 00025 #include "llvm/IR/CallSite.h" 00026 #include "llvm/IR/CallingConv.h" 00027 #include "llvm/IR/DataLayout.h" 00028 #include "llvm/IR/GetElementPtrTypeIterator.h" 00029 #include "llvm/IR/GlobalAlias.h" 00030 #include "llvm/IR/InstVisitor.h" 00031 #include "llvm/IR/IntrinsicInst.h" 00032 #include "llvm/IR/Operator.h" 00033 #include "llvm/Support/Debug.h" 00034 #include "llvm/Support/raw_ostream.h" 00035 00036 using namespace llvm; 00037 00038 #define DEBUG_TYPE "inline-cost" 00039 00040 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); 00041 00042 namespace { 00043 00044 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { 00045 typedef InstVisitor<CallAnalyzer, bool> Base; 00046 friend class InstVisitor<CallAnalyzer, bool>; 00047 00048 // DataLayout if available, or null. 00049 const DataLayout *const DL; 00050 00051 /// The TargetTransformInfo available for this compilation. 00052 const TargetTransformInfo &TTI; 00053 00054 /// The cache of @llvm.assume intrinsics. 00055 AssumptionTracker *AT; 00056 00057 // The called function. 00058 Function &F; 00059 00060 int Threshold; 00061 int Cost; 00062 00063 bool IsCallerRecursive; 00064 bool IsRecursiveCall; 00065 bool ExposesReturnsTwice; 00066 bool HasDynamicAlloca; 00067 bool ContainsNoDuplicateCall; 00068 bool HasReturn; 00069 bool HasIndirectBr; 00070 00071 /// Number of bytes allocated statically by the callee. 00072 uint64_t AllocatedSize; 00073 unsigned NumInstructions, NumVectorInstructions; 00074 int FiftyPercentVectorBonus, TenPercentVectorBonus; 00075 int VectorBonus; 00076 00077 // While we walk the potentially-inlined instructions, we build up and 00078 // maintain a mapping of simplified values specific to this callsite. The 00079 // idea is to propagate any special information we have about arguments to 00080 // this call through the inlinable section of the function, and account for 00081 // likely simplifications post-inlining. The most important aspect we track 00082 // is CFG altering simplifications -- when we prove a basic block dead, that 00083 // can cause dramatic shifts in the cost of inlining a function. 00084 DenseMap<Value *, Constant *> SimplifiedValues; 00085 00086 // Keep track of the values which map back (through function arguments) to 00087 // allocas on the caller stack which could be simplified through SROA. 00088 DenseMap<Value *, Value *> SROAArgValues; 00089 00090 // The mapping of caller Alloca values to their accumulated cost savings. If 00091 // we have to disable SROA for one of the allocas, this tells us how much 00092 // cost must be added. 00093 DenseMap<Value *, int> SROAArgCosts; 00094 00095 // Keep track of values which map to a pointer base and constant offset. 00096 DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; 00097 00098 // Custom simplification helper routines. 00099 bool isAllocaDerivedArg(Value *V); 00100 bool lookupSROAArgAndCost(Value *V, Value *&Arg, 00101 DenseMap<Value *, int>::iterator &CostIt); 00102 void disableSROA(DenseMap<Value *, int>::iterator CostIt); 00103 void disableSROA(Value *V); 00104 void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 00105 int InstructionCost); 00106 bool isGEPOffsetConstant(GetElementPtrInst &GEP); 00107 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); 00108 bool simplifyCallSite(Function *F, CallSite CS); 00109 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); 00110 00111 // Custom analysis routines. 00112 bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues); 00113 00114 // Disable several entry points to the visitor so we don't accidentally use 00115 // them by declaring but not defining them here. 00116 void visit(Module *); void visit(Module &); 00117 void visit(Function *); void visit(Function &); 00118 void visit(BasicBlock *); void visit(BasicBlock &); 00119 00120 // Provide base case for our instruction visit. 00121 bool visitInstruction(Instruction &I); 00122 00123 // Our visit overrides. 00124 bool visitAlloca(AllocaInst &I); 00125 bool visitPHI(PHINode &I); 00126 bool visitGetElementPtr(GetElementPtrInst &I); 00127 bool visitBitCast(BitCastInst &I); 00128 bool visitPtrToInt(PtrToIntInst &I); 00129 bool visitIntToPtr(IntToPtrInst &I); 00130 bool visitCastInst(CastInst &I); 00131 bool visitUnaryInstruction(UnaryInstruction &I); 00132 bool visitCmpInst(CmpInst &I); 00133 bool visitSub(BinaryOperator &I); 00134 bool visitBinaryOperator(BinaryOperator &I); 00135 bool visitLoad(LoadInst &I); 00136 bool visitStore(StoreInst &I); 00137 bool visitExtractValue(ExtractValueInst &I); 00138 bool visitInsertValue(InsertValueInst &I); 00139 bool visitCallSite(CallSite CS); 00140 bool visitReturnInst(ReturnInst &RI); 00141 bool visitBranchInst(BranchInst &BI); 00142 bool visitSwitchInst(SwitchInst &SI); 00143 bool visitIndirectBrInst(IndirectBrInst &IBI); 00144 bool visitResumeInst(ResumeInst &RI); 00145 bool visitUnreachableInst(UnreachableInst &I); 00146 00147 public: 00148 CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, 00149 AssumptionTracker *AT, Function &Callee, int Threshold) 00150 : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0), 00151 IsCallerRecursive(false), IsRecursiveCall(false), 00152 ExposesReturnsTwice(false), HasDynamicAlloca(false), 00153 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), 00154 AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), 00155 FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), 00156 NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), 00157 NumConstantPtrCmps(0), NumConstantPtrDiffs(0), 00158 NumInstructionsSimplified(0), SROACostSavings(0), 00159 SROACostSavingsLost(0) {} 00160 00161 bool analyzeCall(CallSite CS); 00162 00163 int getThreshold() { return Threshold; } 00164 int getCost() { return Cost; } 00165 00166 // Keep a bunch of stats about the cost savings found so we can print them 00167 // out when debugging. 00168 unsigned NumConstantArgs; 00169 unsigned NumConstantOffsetPtrArgs; 00170 unsigned NumAllocaArgs; 00171 unsigned NumConstantPtrCmps; 00172 unsigned NumConstantPtrDiffs; 00173 unsigned NumInstructionsSimplified; 00174 unsigned SROACostSavings; 00175 unsigned SROACostSavingsLost; 00176 00177 void dump(); 00178 }; 00179 00180 } // namespace 00181 00182 /// \brief Test whether the given value is an Alloca-derived function argument. 00183 bool CallAnalyzer::isAllocaDerivedArg(Value *V) { 00184 return SROAArgValues.count(V); 00185 } 00186 00187 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. 00188 /// Returns false if V does not map to a SROA-candidate. 00189 bool CallAnalyzer::lookupSROAArgAndCost( 00190 Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { 00191 if (SROAArgValues.empty() || SROAArgCosts.empty()) 00192 return false; 00193 00194 DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); 00195 if (ArgIt == SROAArgValues.end()) 00196 return false; 00197 00198 Arg = ArgIt->second; 00199 CostIt = SROAArgCosts.find(Arg); 00200 return CostIt != SROAArgCosts.end(); 00201 } 00202 00203 /// \brief Disable SROA for the candidate marked by this cost iterator. 00204 /// 00205 /// This marks the candidate as no longer viable for SROA, and adds the cost 00206 /// savings associated with it back into the inline cost measurement. 00207 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { 00208 // If we're no longer able to perform SROA we need to undo its cost savings 00209 // and prevent subsequent analysis. 00210 Cost += CostIt->second; 00211 SROACostSavings -= CostIt->second; 00212 SROACostSavingsLost += CostIt->second; 00213 SROAArgCosts.erase(CostIt); 00214 } 00215 00216 /// \brief If 'V' maps to a SROA candidate, disable SROA for it. 00217 void CallAnalyzer::disableSROA(Value *V) { 00218 Value *SROAArg; 00219 DenseMap<Value *, int>::iterator CostIt; 00220 if (lookupSROAArgAndCost(V, SROAArg, CostIt)) 00221 disableSROA(CostIt); 00222 } 00223 00224 /// \brief Accumulate the given cost for a particular SROA candidate. 00225 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, 00226 int InstructionCost) { 00227 CostIt->second += InstructionCost; 00228 SROACostSavings += InstructionCost; 00229 } 00230 00231 /// \brief Check whether a GEP's indices are all constant. 00232 /// 00233 /// Respects any simplified values known during the analysis of this callsite. 00234 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { 00235 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) 00236 if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) 00237 return false; 00238 00239 return true; 00240 } 00241 00242 /// \brief Accumulate a constant GEP offset into an APInt if possible. 00243 /// 00244 /// Returns false if unable to compute the offset for any reason. Respects any 00245 /// simplified values known during the analysis of this callsite. 00246 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { 00247 if (!DL) 00248 return false; 00249 00250 unsigned IntPtrWidth = DL->getPointerSizeInBits(); 00251 assert(IntPtrWidth == Offset.getBitWidth()); 00252 00253 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 00254 GTI != GTE; ++GTI) { 00255 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 00256 if (!OpC) 00257 if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) 00258 OpC = dyn_cast<ConstantInt>(SimpleOp); 00259 if (!OpC) 00260 return false; 00261 if (OpC->isZero()) continue; 00262 00263 // Handle a struct index, which adds its field offset to the pointer. 00264 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 00265 unsigned ElementIdx = OpC->getZExtValue(); 00266 const StructLayout *SL = DL->getStructLayout(STy); 00267 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); 00268 continue; 00269 } 00270 00271 APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType())); 00272 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; 00273 } 00274 return true; 00275 } 00276 00277 bool CallAnalyzer::visitAlloca(AllocaInst &I) { 00278 // Check whether inlining will turn a dynamic alloca into a static 00279 // alloca, and handle that case. 00280 if (I.isArrayAllocation()) { 00281 if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) { 00282 ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size); 00283 assert(AllocSize && "Allocation size not a constant int?"); 00284 Type *Ty = I.getAllocatedType(); 00285 AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue(); 00286 return Base::visitAlloca(I); 00287 } 00288 } 00289 00290 // Accumulate the allocated size. 00291 if (I.isStaticAlloca()) { 00292 Type *Ty = I.getAllocatedType(); 00293 AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) : 00294 Ty->getPrimitiveSizeInBits()); 00295 } 00296 00297 // We will happily inline static alloca instructions. 00298 if (I.isStaticAlloca()) 00299 return Base::visitAlloca(I); 00300 00301 // FIXME: This is overly conservative. Dynamic allocas are inefficient for 00302 // a variety of reasons, and so we would like to not inline them into 00303 // functions which don't currently have a dynamic alloca. This simply 00304 // disables inlining altogether in the presence of a dynamic alloca. 00305 HasDynamicAlloca = true; 00306 return false; 00307 } 00308 00309 bool CallAnalyzer::visitPHI(PHINode &I) { 00310 // FIXME: We should potentially be tracking values through phi nodes, 00311 // especially when they collapse to a single value due to deleted CFG edges 00312 // during inlining. 00313 00314 // FIXME: We need to propagate SROA *disabling* through phi nodes, even 00315 // though we don't want to propagate it's bonuses. The idea is to disable 00316 // SROA if it *might* be used in an inappropriate manner. 00317 00318 // Phi nodes are always zero-cost. 00319 return true; 00320 } 00321 00322 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { 00323 Value *SROAArg; 00324 DenseMap<Value *, int>::iterator CostIt; 00325 bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), 00326 SROAArg, CostIt); 00327 00328 // Try to fold GEPs of constant-offset call site argument pointers. This 00329 // requires target data and inbounds GEPs. 00330 if (DL && I.isInBounds()) { 00331 // Check if we have a base + offset for the pointer. 00332 Value *Ptr = I.getPointerOperand(); 00333 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); 00334 if (BaseAndOffset.first) { 00335 // Check if the offset of this GEP is constant, and if so accumulate it 00336 // into Offset. 00337 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { 00338 // Non-constant GEPs aren't folded, and disable SROA. 00339 if (SROACandidate) 00340 disableSROA(CostIt); 00341 return false; 00342 } 00343 00344 // Add the result as a new mapping to Base + Offset. 00345 ConstantOffsetPtrs[&I] = BaseAndOffset; 00346 00347 // Also handle SROA candidates here, we already know that the GEP is 00348 // all-constant indexed. 00349 if (SROACandidate) 00350 SROAArgValues[&I] = SROAArg; 00351 00352 return true; 00353 } 00354 } 00355 00356 if (isGEPOffsetConstant(I)) { 00357 if (SROACandidate) 00358 SROAArgValues[&I] = SROAArg; 00359 00360 // Constant GEPs are modeled as free. 00361 return true; 00362 } 00363 00364 // Variable GEPs will require math and will disable SROA. 00365 if (SROACandidate) 00366 disableSROA(CostIt); 00367 return false; 00368 } 00369 00370 bool CallAnalyzer::visitBitCast(BitCastInst &I) { 00371 // Propagate constants through bitcasts. 00372 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00373 if (!COp) 00374 COp = SimplifiedValues.lookup(I.getOperand(0)); 00375 if (COp) 00376 if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { 00377 SimplifiedValues[&I] = C; 00378 return true; 00379 } 00380 00381 // Track base/offsets through casts 00382 std::pair<Value *, APInt> BaseAndOffset 00383 = ConstantOffsetPtrs.lookup(I.getOperand(0)); 00384 // Casts don't change the offset, just wrap it up. 00385 if (BaseAndOffset.first) 00386 ConstantOffsetPtrs[&I] = BaseAndOffset; 00387 00388 // Also look for SROA candidates here. 00389 Value *SROAArg; 00390 DenseMap<Value *, int>::iterator CostIt; 00391 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 00392 SROAArgValues[&I] = SROAArg; 00393 00394 // Bitcasts are always zero cost. 00395 return true; 00396 } 00397 00398 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { 00399 const DataLayout *DL = I.getDataLayout(); 00400 // Propagate constants through ptrtoint. 00401 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00402 if (!COp) 00403 COp = SimplifiedValues.lookup(I.getOperand(0)); 00404 if (COp) 00405 if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { 00406 SimplifiedValues[&I] = C; 00407 return true; 00408 } 00409 00410 // Track base/offset pairs when converted to a plain integer provided the 00411 // integer is large enough to represent the pointer. 00412 unsigned IntegerSize = I.getType()->getScalarSizeInBits(); 00413 if (DL && IntegerSize >= DL->getPointerSizeInBits()) { 00414 std::pair<Value *, APInt> BaseAndOffset 00415 = ConstantOffsetPtrs.lookup(I.getOperand(0)); 00416 if (BaseAndOffset.first) 00417 ConstantOffsetPtrs[&I] = BaseAndOffset; 00418 } 00419 00420 // This is really weird. Technically, ptrtoint will disable SROA. However, 00421 // unless that ptrtoint is *used* somewhere in the live basic blocks after 00422 // inlining, it will be nuked, and SROA should proceed. All of the uses which 00423 // would block SROA would also block SROA if applied directly to a pointer, 00424 // and so we can just add the integer in here. The only places where SROA is 00425 // preserved either cannot fire on an integer, or won't in-and-of themselves 00426 // disable SROA (ext) w/o some later use that we would see and disable. 00427 Value *SROAArg; 00428 DenseMap<Value *, int>::iterator CostIt; 00429 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) 00430 SROAArgValues[&I] = SROAArg; 00431 00432 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 00433 } 00434 00435 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { 00436 const DataLayout *DL = I.getDataLayout(); 00437 // Propagate constants through ptrtoint. 00438 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00439 if (!COp) 00440 COp = SimplifiedValues.lookup(I.getOperand(0)); 00441 if (COp) 00442 if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { 00443 SimplifiedValues[&I] = C; 00444 return true; 00445 } 00446 00447 // Track base/offset pairs when round-tripped through a pointer without 00448 // modifications provided the integer is not too large. 00449 Value *Op = I.getOperand(0); 00450 unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); 00451 if (DL && IntegerSize <= DL->getPointerSizeInBits()) { 00452 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); 00453 if (BaseAndOffset.first) 00454 ConstantOffsetPtrs[&I] = BaseAndOffset; 00455 } 00456 00457 // "Propagate" SROA here in the same manner as we do for ptrtoint above. 00458 Value *SROAArg; 00459 DenseMap<Value *, int>::iterator CostIt; 00460 if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) 00461 SROAArgValues[&I] = SROAArg; 00462 00463 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 00464 } 00465 00466 bool CallAnalyzer::visitCastInst(CastInst &I) { 00467 // Propagate constants through ptrtoint. 00468 Constant *COp = dyn_cast<Constant>(I.getOperand(0)); 00469 if (!COp) 00470 COp = SimplifiedValues.lookup(I.getOperand(0)); 00471 if (COp) 00472 if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { 00473 SimplifiedValues[&I] = C; 00474 return true; 00475 } 00476 00477 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. 00478 disableSROA(I.getOperand(0)); 00479 00480 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); 00481 } 00482 00483 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { 00484 Value *Operand = I.getOperand(0); 00485 Constant *COp = dyn_cast<Constant>(Operand); 00486 if (!COp) 00487 COp = SimplifiedValues.lookup(Operand); 00488 if (COp) 00489 if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), 00490 COp, DL)) { 00491 SimplifiedValues[&I] = C; 00492 return true; 00493 } 00494 00495 // Disable any SROA on the argument to arbitrary unary operators. 00496 disableSROA(Operand); 00497 00498 return false; 00499 } 00500 00501 bool CallAnalyzer::visitCmpInst(CmpInst &I) { 00502 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00503 // First try to handle simplified comparisons. 00504 if (!isa<Constant>(LHS)) 00505 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) 00506 LHS = SimpleLHS; 00507 if (!isa<Constant>(RHS)) 00508 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) 00509 RHS = SimpleRHS; 00510 if (Constant *CLHS = dyn_cast<Constant>(LHS)) { 00511 if (Constant *CRHS = dyn_cast<Constant>(RHS)) 00512 if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { 00513 SimplifiedValues[&I] = C; 00514 return true; 00515 } 00516 } 00517 00518 if (I.getOpcode() == Instruction::FCmp) 00519 return false; 00520 00521 // Otherwise look for a comparison between constant offset pointers with 00522 // a common base. 00523 Value *LHSBase, *RHSBase; 00524 APInt LHSOffset, RHSOffset; 00525 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 00526 if (LHSBase) { 00527 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 00528 if (RHSBase && LHSBase == RHSBase) { 00529 // We have common bases, fold the icmp to a constant based on the 00530 // offsets. 00531 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 00532 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 00533 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { 00534 SimplifiedValues[&I] = C; 00535 ++NumConstantPtrCmps; 00536 return true; 00537 } 00538 } 00539 } 00540 00541 // If the comparison is an equality comparison with null, we can simplify it 00542 // for any alloca-derived argument. 00543 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) 00544 if (isAllocaDerivedArg(I.getOperand(0))) { 00545 // We can actually predict the result of comparisons between an 00546 // alloca-derived value and null. Note that this fires regardless of 00547 // SROA firing. 00548 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; 00549 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) 00550 : ConstantInt::getFalse(I.getType()); 00551 return true; 00552 } 00553 00554 // Finally check for SROA candidates in comparisons. 00555 Value *SROAArg; 00556 DenseMap<Value *, int>::iterator CostIt; 00557 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 00558 if (isa<ConstantPointerNull>(I.getOperand(1))) { 00559 accumulateSROACost(CostIt, InlineConstants::InstrCost); 00560 return true; 00561 } 00562 00563 disableSROA(CostIt); 00564 } 00565 00566 return false; 00567 } 00568 00569 bool CallAnalyzer::visitSub(BinaryOperator &I) { 00570 // Try to handle a special case: we can fold computing the difference of two 00571 // constant-related pointers. 00572 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00573 Value *LHSBase, *RHSBase; 00574 APInt LHSOffset, RHSOffset; 00575 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); 00576 if (LHSBase) { 00577 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); 00578 if (RHSBase && LHSBase == RHSBase) { 00579 // We have common bases, fold the subtract to a constant based on the 00580 // offsets. 00581 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); 00582 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); 00583 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { 00584 SimplifiedValues[&I] = C; 00585 ++NumConstantPtrDiffs; 00586 return true; 00587 } 00588 } 00589 } 00590 00591 // Otherwise, fall back to the generic logic for simplifying and handling 00592 // instructions. 00593 return Base::visitSub(I); 00594 } 00595 00596 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { 00597 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 00598 if (!isa<Constant>(LHS)) 00599 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) 00600 LHS = SimpleLHS; 00601 if (!isa<Constant>(RHS)) 00602 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) 00603 RHS = SimpleRHS; 00604 Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); 00605 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { 00606 SimplifiedValues[&I] = C; 00607 return true; 00608 } 00609 00610 // Disable any SROA on arguments to arbitrary, unsimplified binary operators. 00611 disableSROA(LHS); 00612 disableSROA(RHS); 00613 00614 return false; 00615 } 00616 00617 bool CallAnalyzer::visitLoad(LoadInst &I) { 00618 Value *SROAArg; 00619 DenseMap<Value *, int>::iterator CostIt; 00620 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 00621 if (I.isSimple()) { 00622 accumulateSROACost(CostIt, InlineConstants::InstrCost); 00623 return true; 00624 } 00625 00626 disableSROA(CostIt); 00627 } 00628 00629 return false; 00630 } 00631 00632 bool CallAnalyzer::visitStore(StoreInst &I) { 00633 Value *SROAArg; 00634 DenseMap<Value *, int>::iterator CostIt; 00635 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { 00636 if (I.isSimple()) { 00637 accumulateSROACost(CostIt, InlineConstants::InstrCost); 00638 return true; 00639 } 00640 00641 disableSROA(CostIt); 00642 } 00643 00644 return false; 00645 } 00646 00647 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { 00648 // Constant folding for extract value is trivial. 00649 Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); 00650 if (!C) 00651 C = SimplifiedValues.lookup(I.getAggregateOperand()); 00652 if (C) { 00653 SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); 00654 return true; 00655 } 00656 00657 // SROA can look through these but give them a cost. 00658 return false; 00659 } 00660 00661 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { 00662 // Constant folding for insert value is trivial. 00663 Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); 00664 if (!AggC) 00665 AggC = SimplifiedValues.lookup(I.getAggregateOperand()); 00666 Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); 00667 if (!InsertedC) 00668 InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); 00669 if (AggC && InsertedC) { 00670 SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, 00671 I.getIndices()); 00672 return true; 00673 } 00674 00675 // SROA can look through these but give them a cost. 00676 return false; 00677 } 00678 00679 /// \brief Try to simplify a call site. 00680 /// 00681 /// Takes a concrete function and callsite and tries to actually simplify it by 00682 /// analyzing the arguments and call itself with instsimplify. Returns true if 00683 /// it has simplified the callsite to some other entity (a constant), making it 00684 /// free. 00685 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { 00686 // FIXME: Using the instsimplify logic directly for this is inefficient 00687 // because we have to continually rebuild the argument list even when no 00688 // simplifications can be performed. Until that is fixed with remapping 00689 // inside of instsimplify, directly constant fold calls here. 00690 if (!canConstantFoldCallTo(F)) 00691 return false; 00692 00693 // Try to re-map the arguments to constants. 00694 SmallVector<Constant *, 4> ConstantArgs; 00695 ConstantArgs.reserve(CS.arg_size()); 00696 for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 00697 I != E; ++I) { 00698 Constant *C = dyn_cast<Constant>(*I); 00699 if (!C) 00700 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); 00701 if (!C) 00702 return false; // This argument doesn't map to a constant. 00703 00704 ConstantArgs.push_back(C); 00705 } 00706 if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { 00707 SimplifiedValues[CS.getInstruction()] = C; 00708 return true; 00709 } 00710 00711 return false; 00712 } 00713 00714 bool CallAnalyzer::visitCallSite(CallSite CS) { 00715 if (CS.hasFnAttr(Attribute::ReturnsTwice) && 00716 !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 00717 Attribute::ReturnsTwice)) { 00718 // This aborts the entire analysis. 00719 ExposesReturnsTwice = true; 00720 return false; 00721 } 00722 if (CS.isCall() && 00723 cast<CallInst>(CS.getInstruction())->cannotDuplicate()) 00724 ContainsNoDuplicateCall = true; 00725 00726 if (Function *F = CS.getCalledFunction()) { 00727 // When we have a concrete function, first try to simplify it directly. 00728 if (simplifyCallSite(F, CS)) 00729 return true; 00730 00731 // Next check if it is an intrinsic we know about. 00732 // FIXME: Lift this into part of the InstVisitor. 00733 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { 00734 switch (II->getIntrinsicID()) { 00735 default: 00736 return Base::visitCallSite(CS); 00737 00738 case Intrinsic::memset: 00739 case Intrinsic::memcpy: 00740 case Intrinsic::memmove: 00741 // SROA can usually chew through these intrinsics, but they aren't free. 00742 return false; 00743 } 00744 } 00745 00746 if (F == CS.getInstruction()->getParent()->getParent()) { 00747 // This flag will fully abort the analysis, so don't bother with anything 00748 // else. 00749 IsRecursiveCall = true; 00750 return false; 00751 } 00752 00753 if (TTI.isLoweredToCall(F)) { 00754 // We account for the average 1 instruction per call argument setup 00755 // here. 00756 Cost += CS.arg_size() * InlineConstants::InstrCost; 00757 00758 // Everything other than inline ASM will also have a significant cost 00759 // merely from making the call. 00760 if (!isa<InlineAsm>(CS.getCalledValue())) 00761 Cost += InlineConstants::CallPenalty; 00762 } 00763 00764 return Base::visitCallSite(CS); 00765 } 00766 00767 // Otherwise we're in a very special case -- an indirect function call. See 00768 // if we can be particularly clever about this. 00769 Value *Callee = CS.getCalledValue(); 00770 00771 // First, pay the price of the argument setup. We account for the average 00772 // 1 instruction per call argument setup here. 00773 Cost += CS.arg_size() * InlineConstants::InstrCost; 00774 00775 // Next, check if this happens to be an indirect function call to a known 00776 // function in this inline context. If not, we've done all we can. 00777 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); 00778 if (!F) 00779 return Base::visitCallSite(CS); 00780 00781 // If we have a constant that we are calling as a function, we can peer 00782 // through it and see the function target. This happens not infrequently 00783 // during devirtualization and so we want to give it a hefty bonus for 00784 // inlining, but cap that bonus in the event that inlining wouldn't pan 00785 // out. Pretend to inline the function, with a custom threshold. 00786 CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold); 00787 if (CA.analyzeCall(CS)) { 00788 // We were able to inline the indirect call! Subtract the cost from the 00789 // bonus we want to apply, but don't go below zero. 00790 Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); 00791 } 00792 00793 return Base::visitCallSite(CS); 00794 } 00795 00796 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { 00797 // At least one return instruction will be free after inlining. 00798 bool Free = !HasReturn; 00799 HasReturn = true; 00800 return Free; 00801 } 00802 00803 bool CallAnalyzer::visitBranchInst(BranchInst &BI) { 00804 // We model unconditional branches as essentially free -- they really 00805 // shouldn't exist at all, but handling them makes the behavior of the 00806 // inliner more regular and predictable. Interestingly, conditional branches 00807 // which will fold away are also free. 00808 return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || 00809 dyn_cast_or_null<ConstantInt>( 00810 SimplifiedValues.lookup(BI.getCondition())); 00811 } 00812 00813 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { 00814 // We model unconditional switches as free, see the comments on handling 00815 // branches. 00816 if (isa<ConstantInt>(SI.getCondition())) 00817 return true; 00818 if (Value *V = SimplifiedValues.lookup(SI.getCondition())) 00819 if (isa<ConstantInt>(V)) 00820 return true; 00821 00822 // Otherwise, we need to accumulate a cost proportional to the number of 00823 // distinct successor blocks. This fan-out in the CFG cannot be represented 00824 // for free even if we can represent the core switch as a jumptable that 00825 // takes a single instruction. 00826 // 00827 // NB: We convert large switches which are just used to initialize large phi 00828 // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent 00829 // inlining those. It will prevent inlining in cases where the optimization 00830 // does not (yet) fire. 00831 SmallPtrSet<BasicBlock *, 8> SuccessorBlocks; 00832 SuccessorBlocks.insert(SI.getDefaultDest()); 00833 for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) 00834 SuccessorBlocks.insert(I.getCaseSuccessor()); 00835 // Add cost corresponding to the number of distinct destinations. The first 00836 // we model as free because of fallthrough. 00837 Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; 00838 return false; 00839 } 00840 00841 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { 00842 // We never want to inline functions that contain an indirectbr. This is 00843 // incorrect because all the blockaddress's (in static global initializers 00844 // for example) would be referring to the original function, and this 00845 // indirect jump would jump from the inlined copy of the function into the 00846 // original function which is extremely undefined behavior. 00847 // FIXME: This logic isn't really right; we can safely inline functions with 00848 // indirectbr's as long as no other function or global references the 00849 // blockaddress of a block within the current function. 00850 HasIndirectBr = true; 00851 return false; 00852 } 00853 00854 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { 00855 // FIXME: It's not clear that a single instruction is an accurate model for 00856 // the inline cost of a resume instruction. 00857 return false; 00858 } 00859 00860 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { 00861 // FIXME: It might be reasonably to discount the cost of instructions leading 00862 // to unreachable as they have the lowest possible impact on both runtime and 00863 // code size. 00864 return true; // No actual code is needed for unreachable. 00865 } 00866 00867 bool CallAnalyzer::visitInstruction(Instruction &I) { 00868 // Some instructions are free. All of the free intrinsics can also be 00869 // handled by SROA, etc. 00870 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) 00871 return true; 00872 00873 // We found something we don't understand or can't handle. Mark any SROA-able 00874 // values in the operand list as no longer viable. 00875 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) 00876 disableSROA(*OI); 00877 00878 return false; 00879 } 00880 00881 00882 /// \brief Analyze a basic block for its contribution to the inline cost. 00883 /// 00884 /// This method walks the analyzer over every instruction in the given basic 00885 /// block and accounts for their cost during inlining at this callsite. It 00886 /// aborts early if the threshold has been exceeded or an impossible to inline 00887 /// construct has been detected. It returns false if inlining is no longer 00888 /// viable, and true if inlining remains viable. 00889 bool CallAnalyzer::analyzeBlock(BasicBlock *BB, 00890 SmallPtrSetImpl<const Value *> &EphValues) { 00891 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { 00892 // FIXME: Currently, the number of instructions in a function regardless of 00893 // our ability to simplify them during inline to constants or dead code, 00894 // are actually used by the vector bonus heuristic. As long as that's true, 00895 // we have to special case debug intrinsics here to prevent differences in 00896 // inlining due to debug symbols. Eventually, the number of unsimplified 00897 // instructions shouldn't factor into the cost computation, but until then, 00898 // hack around it here. 00899 if (isa<DbgInfoIntrinsic>(I)) 00900 continue; 00901 00902 // Skip ephemeral values. 00903 if (EphValues.count(I)) 00904 continue; 00905 00906 ++NumInstructions; 00907 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) 00908 ++NumVectorInstructions; 00909 00910 // If the instruction simplified to a constant, there is no cost to this 00911 // instruction. Visit the instructions using our InstVisitor to account for 00912 // all of the per-instruction logic. The visit tree returns true if we 00913 // consumed the instruction in any way, and false if the instruction's base 00914 // cost should count against inlining. 00915 if (Base::visit(I)) 00916 ++NumInstructionsSimplified; 00917 else 00918 Cost += InlineConstants::InstrCost; 00919 00920 // If the visit this instruction detected an uninlinable pattern, abort. 00921 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || 00922 HasIndirectBr) 00923 return false; 00924 00925 // If the caller is a recursive function then we don't want to inline 00926 // functions which allocate a lot of stack space because it would increase 00927 // the caller stack usage dramatically. 00928 if (IsCallerRecursive && 00929 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) 00930 return false; 00931 00932 if (NumVectorInstructions > NumInstructions/2) 00933 VectorBonus = FiftyPercentVectorBonus; 00934 else if (NumVectorInstructions > NumInstructions/10) 00935 VectorBonus = TenPercentVectorBonus; 00936 else 00937 VectorBonus = 0; 00938 00939 // Check if we've past the threshold so we don't spin in huge basic 00940 // blocks that will never inline. 00941 if (Cost > (Threshold + VectorBonus)) 00942 return false; 00943 } 00944 00945 return true; 00946 } 00947 00948 /// \brief Compute the base pointer and cumulative constant offsets for V. 00949 /// 00950 /// This strips all constant offsets off of V, leaving it the base pointer, and 00951 /// accumulates the total constant offset applied in the returned constant. It 00952 /// returns 0 if V is not a pointer, and returns the constant '0' if there are 00953 /// no constant offsets applied. 00954 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { 00955 if (!DL || !V->getType()->isPointerTy()) 00956 return nullptr; 00957 00958 unsigned IntPtrWidth = DL->getPointerSizeInBits(); 00959 APInt Offset = APInt::getNullValue(IntPtrWidth); 00960 00961 // Even though we don't look through PHI nodes, we could be called on an 00962 // instruction in an unreachable block, which may be on a cycle. 00963 SmallPtrSet<Value *, 4> Visited; 00964 Visited.insert(V); 00965 do { 00966 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { 00967 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) 00968 return nullptr; 00969 V = GEP->getPointerOperand(); 00970 } else if (Operator::getOpcode(V) == Instruction::BitCast) { 00971 V = cast<Operator>(V)->getOperand(0); 00972 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { 00973 if (GA->mayBeOverridden()) 00974 break; 00975 V = GA->getAliasee(); 00976 } else { 00977 break; 00978 } 00979 assert(V->getType()->isPointerTy() && "Unexpected operand type!"); 00980 } while (Visited.insert(V)); 00981 00982 Type *IntPtrTy = DL->getIntPtrType(V->getContext()); 00983 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); 00984 } 00985 00986 /// \brief Analyze a call site for potential inlining. 00987 /// 00988 /// Returns true if inlining this call is viable, and false if it is not 00989 /// viable. It computes the cost and adjusts the threshold based on numerous 00990 /// factors and heuristics. If this method returns false but the computed cost 00991 /// is below the computed threshold, then inlining was forcibly disabled by 00992 /// some artifact of the routine. 00993 bool CallAnalyzer::analyzeCall(CallSite CS) { 00994 ++NumCallsAnalyzed; 00995 00996 // Track whether the post-inlining function would have more than one basic 00997 // block. A single basic block is often intended for inlining. Balloon the 00998 // threshold by 50% until we pass the single-BB phase. 00999 bool SingleBB = true; 01000 int SingleBBBonus = Threshold / 2; 01001 Threshold += SingleBBBonus; 01002 01003 // Perform some tweaks to the cost and threshold based on the direct 01004 // callsite information. 01005 01006 // We want to more aggressively inline vector-dense kernels, so up the 01007 // threshold, and we'll lower it if the % of vector instructions gets too 01008 // low. 01009 assert(NumInstructions == 0); 01010 assert(NumVectorInstructions == 0); 01011 FiftyPercentVectorBonus = Threshold; 01012 TenPercentVectorBonus = Threshold / 2; 01013 01014 // Give out bonuses per argument, as the instructions setting them up will 01015 // be gone after inlining. 01016 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { 01017 if (DL && CS.isByValArgument(I)) { 01018 // We approximate the number of loads and stores needed by dividing the 01019 // size of the byval type by the target's pointer size. 01020 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); 01021 unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType()); 01022 unsigned PointerSize = DL->getPointerSizeInBits(); 01023 // Ceiling division. 01024 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; 01025 01026 // If it generates more than 8 stores it is likely to be expanded as an 01027 // inline memcpy so we take that as an upper bound. Otherwise we assume 01028 // one load and one store per word copied. 01029 // FIXME: The maxStoresPerMemcpy setting from the target should be used 01030 // here instead of a magic number of 8, but it's not available via 01031 // DataLayout. 01032 NumStores = std::min(NumStores, 8U); 01033 01034 Cost -= 2 * NumStores * InlineConstants::InstrCost; 01035 } else { 01036 // For non-byval arguments subtract off one instruction per call 01037 // argument. 01038 Cost -= InlineConstants::InstrCost; 01039 } 01040 } 01041 01042 // If there is only one call of the function, and it has internal linkage, 01043 // the cost of inlining it drops dramatically. 01044 bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && 01045 &F == CS.getCalledFunction(); 01046 if (OnlyOneCallAndLocalLinkage) 01047 Cost += InlineConstants::LastCallToStaticBonus; 01048 01049 // If the instruction after the call, or if the normal destination of the 01050 // invoke is an unreachable instruction, the function is noreturn. As such, 01051 // there is little point in inlining this unless there is literally zero 01052 // cost. 01053 Instruction *Instr = CS.getInstruction(); 01054 if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { 01055 if (isa<UnreachableInst>(II->getNormalDest()->begin())) 01056 Threshold = 1; 01057 } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) 01058 Threshold = 1; 01059 01060 // If this function uses the coldcc calling convention, prefer not to inline 01061 // it. 01062 if (F.getCallingConv() == CallingConv::Cold) 01063 Cost += InlineConstants::ColdccPenalty; 01064 01065 // Check if we're done. This can happen due to bonuses and penalties. 01066 if (Cost > Threshold) 01067 return false; 01068 01069 if (F.empty()) 01070 return true; 01071 01072 Function *Caller = CS.getInstruction()->getParent()->getParent(); 01073 // Check if the caller function is recursive itself. 01074 for (User *U : Caller->users()) { 01075 CallSite Site(U); 01076 if (!Site) 01077 continue; 01078 Instruction *I = Site.getInstruction(); 01079 if (I->getParent()->getParent() == Caller) { 01080 IsCallerRecursive = true; 01081 break; 01082 } 01083 } 01084 01085 // Populate our simplified values by mapping from function arguments to call 01086 // arguments with known important simplifications. 01087 CallSite::arg_iterator CAI = CS.arg_begin(); 01088 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); 01089 FAI != FAE; ++FAI, ++CAI) { 01090 assert(CAI != CS.arg_end()); 01091 if (Constant *C = dyn_cast<Constant>(CAI)) 01092 SimplifiedValues[FAI] = C; 01093 01094 Value *PtrArg = *CAI; 01095 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { 01096 ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); 01097 01098 // We can SROA any pointer arguments derived from alloca instructions. 01099 if (isa<AllocaInst>(PtrArg)) { 01100 SROAArgValues[FAI] = PtrArg; 01101 SROAArgCosts[PtrArg] = 0; 01102 } 01103 } 01104 } 01105 NumConstantArgs = SimplifiedValues.size(); 01106 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); 01107 NumAllocaArgs = SROAArgValues.size(); 01108 01109 // FIXME: If a caller has multiple calls to a callee, we end up recomputing 01110 // the ephemeral values multiple times (and they're completely determined by 01111 // the callee, so this is purely duplicate work). 01112 SmallPtrSet<const Value *, 32> EphValues; 01113 CodeMetrics::collectEphemeralValues(&F, AT, EphValues); 01114 01115 // The worklist of live basic blocks in the callee *after* inlining. We avoid 01116 // adding basic blocks of the callee which can be proven to be dead for this 01117 // particular call site in order to get more accurate cost estimates. This 01118 // requires a somewhat heavyweight iteration pattern: we need to walk the 01119 // basic blocks in a breadth-first order as we insert live successors. To 01120 // accomplish this, prioritizing for small iterations because we exit after 01121 // crossing our threshold, we use a small-size optimized SetVector. 01122 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, 01123 SmallPtrSet<BasicBlock *, 16> > BBSetVector; 01124 BBSetVector BBWorklist; 01125 BBWorklist.insert(&F.getEntryBlock()); 01126 // Note that we *must not* cache the size, this loop grows the worklist. 01127 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { 01128 // Bail out the moment we cross the threshold. This means we'll under-count 01129 // the cost, but only when undercounting doesn't matter. 01130 if (Cost > (Threshold + VectorBonus)) 01131 break; 01132 01133 BasicBlock *BB = BBWorklist[Idx]; 01134 if (BB->empty()) 01135 continue; 01136 01137 // Disallow inlining a blockaddress. A blockaddress only has defined 01138 // behavior for an indirect branch in the same function, and we do not 01139 // currently support inlining indirect branches. But, the inliner may not 01140 // see an indirect branch that ends up being dead code at a particular call 01141 // site. If the blockaddress escapes the function, e.g., via a global 01142 // variable, inlining may lead to an invalid cross-function reference. 01143 if (BB->hasAddressTaken()) 01144 return false; 01145 01146 // Analyze the cost of this block. If we blow through the threshold, this 01147 // returns false, and we can bail on out. 01148 if (!analyzeBlock(BB, EphValues)) { 01149 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || 01150 HasIndirectBr) 01151 return false; 01152 01153 // If the caller is a recursive function then we don't want to inline 01154 // functions which allocate a lot of stack space because it would increase 01155 // the caller stack usage dramatically. 01156 if (IsCallerRecursive && 01157 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) 01158 return false; 01159 01160 break; 01161 } 01162 01163 TerminatorInst *TI = BB->getTerminator(); 01164 01165 // Add in the live successors by first checking whether we have terminator 01166 // that may be simplified based on the values simplified by this call. 01167 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 01168 if (BI->isConditional()) { 01169 Value *Cond = BI->getCondition(); 01170 if (ConstantInt *SimpleCond 01171 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 01172 BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); 01173 continue; 01174 } 01175 } 01176 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 01177 Value *Cond = SI->getCondition(); 01178 if (ConstantInt *SimpleCond 01179 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { 01180 BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); 01181 continue; 01182 } 01183 } 01184 01185 // If we're unable to select a particular successor, just count all of 01186 // them. 01187 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; 01188 ++TIdx) 01189 BBWorklist.insert(TI->getSuccessor(TIdx)); 01190 01191 // If we had any successors at this point, than post-inlining is likely to 01192 // have them as well. Note that we assume any basic blocks which existed 01193 // due to branches or switches which folded above will also fold after 01194 // inlining. 01195 if (SingleBB && TI->getNumSuccessors() > 1) { 01196 // Take off the bonus we applied to the threshold. 01197 Threshold -= SingleBBBonus; 01198 SingleBB = false; 01199 } 01200 } 01201 01202 // If this is a noduplicate call, we can still inline as long as 01203 // inlining this would cause the removal of the caller (so the instruction 01204 // is not actually duplicated, just moved). 01205 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) 01206 return false; 01207 01208 Threshold += VectorBonus; 01209 01210 return Cost < Threshold; 01211 } 01212 01213 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01214 /// \brief Dump stats about this call's analysis. 01215 void CallAnalyzer::dump() { 01216 #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" 01217 DEBUG_PRINT_STAT(NumConstantArgs); 01218 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); 01219 DEBUG_PRINT_STAT(NumAllocaArgs); 01220 DEBUG_PRINT_STAT(NumConstantPtrCmps); 01221 DEBUG_PRINT_STAT(NumConstantPtrDiffs); 01222 DEBUG_PRINT_STAT(NumInstructionsSimplified); 01223 DEBUG_PRINT_STAT(SROACostSavings); 01224 DEBUG_PRINT_STAT(SROACostSavingsLost); 01225 DEBUG_PRINT_STAT(ContainsNoDuplicateCall); 01226 DEBUG_PRINT_STAT(Cost); 01227 DEBUG_PRINT_STAT(Threshold); 01228 DEBUG_PRINT_STAT(VectorBonus); 01229 #undef DEBUG_PRINT_STAT 01230 } 01231 #endif 01232 01233 INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", 01234 true, true) 01235 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 01236 INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) 01237 INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", 01238 true, true) 01239 01240 char InlineCostAnalysis::ID = 0; 01241 01242 InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {} 01243 01244 InlineCostAnalysis::~InlineCostAnalysis() {} 01245 01246 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 01247 AU.setPreservesAll(); 01248 AU.addRequired<AssumptionTracker>(); 01249 AU.addRequired<TargetTransformInfo>(); 01250 CallGraphSCCPass::getAnalysisUsage(AU); 01251 } 01252 01253 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { 01254 TTI = &getAnalysis<TargetTransformInfo>(); 01255 AT = &getAnalysis<AssumptionTracker>(); 01256 return false; 01257 } 01258 01259 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { 01260 return getInlineCost(CS, CS.getCalledFunction(), Threshold); 01261 } 01262 01263 /// \brief Test that two functions either have or have not the given attribute 01264 /// at the same time. 01265 static bool attributeMatches(Function *F1, Function *F2, 01266 Attribute::AttrKind Attr) { 01267 return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); 01268 } 01269 01270 /// \brief Test that there are no attribute conflicts between Caller and Callee 01271 /// that prevent inlining. 01272 static bool functionsHaveCompatibleAttributes(Function *Caller, 01273 Function *Callee) { 01274 return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && 01275 attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && 01276 attributeMatches(Caller, Callee, Attribute::SanitizeThread); 01277 } 01278 01279 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, 01280 int Threshold) { 01281 // Cannot inline indirect calls. 01282 if (!Callee) 01283 return llvm::InlineCost::getNever(); 01284 01285 // Calls to functions with always-inline attributes should be inlined 01286 // whenever possible. 01287 if (CS.hasFnAttr(Attribute::AlwaysInline)) { 01288 if (isInlineViable(*Callee)) 01289 return llvm::InlineCost::getAlways(); 01290 return llvm::InlineCost::getNever(); 01291 } 01292 01293 // Never inline functions with conflicting attributes (unless callee has 01294 // always-inline attribute). 01295 if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee)) 01296 return llvm::InlineCost::getNever(); 01297 01298 // Don't inline this call if the caller has the optnone attribute. 01299 if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) 01300 return llvm::InlineCost::getNever(); 01301 01302 // Don't inline functions which can be redefined at link-time to mean 01303 // something else. Don't inline functions marked noinline or call sites 01304 // marked noinline. 01305 if (Callee->mayBeOverridden() || 01306 Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) 01307 return llvm::InlineCost::getNever(); 01308 01309 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() 01310 << "...\n"); 01311 01312 CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold); 01313 bool ShouldInline = CA.analyzeCall(CS); 01314 01315 DEBUG(CA.dump()); 01316 01317 // Check if there was a reason to force inlining or no inlining. 01318 if (!ShouldInline && CA.getCost() < CA.getThreshold()) 01319 return InlineCost::getNever(); 01320 if (ShouldInline && CA.getCost() >= CA.getThreshold()) 01321 return InlineCost::getAlways(); 01322 01323 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); 01324 } 01325 01326 bool InlineCostAnalysis::isInlineViable(Function &F) { 01327 bool ReturnsTwice = 01328 F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 01329 Attribute::ReturnsTwice); 01330 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { 01331 // Disallow inlining of functions which contain indirect branches or 01332 // blockaddresses. 01333 if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) 01334 return false; 01335 01336 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; 01337 ++II) { 01338 CallSite CS(II); 01339 if (!CS) 01340 continue; 01341 01342 // Disallow recursive calls. 01343 if (&F == CS.getCalledFunction()) 01344 return false; 01345 01346 // Disallow calls which expose returns-twice to a function not previously 01347 // attributed as such. 01348 if (!ReturnsTwice && CS.isCall() && 01349 cast<CallInst>(CS.getInstruction())->canReturnTwice()) 01350 return false; 01351 } 01352 } 01353 01354 return true; 01355 }