LLVM API Documentation

InlineCost.cpp
Go to the documentation of this file.
00001 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements inline cost analysis.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "llvm/Analysis/InlineCost.h"
00015 #include "llvm/ADT/STLExtras.h"
00016 #include "llvm/ADT/SetVector.h"
00017 #include "llvm/ADT/SmallPtrSet.h"
00018 #include "llvm/ADT/SmallVector.h"
00019 #include "llvm/ADT/Statistic.h"
00020 #include "llvm/Analysis/AssumptionTracker.h"
00021 #include "llvm/Analysis/ConstantFolding.h"
00022 #include "llvm/Analysis/CodeMetrics.h"
00023 #include "llvm/Analysis/InstructionSimplify.h"
00024 #include "llvm/Analysis/TargetTransformInfo.h"
00025 #include "llvm/IR/CallSite.h"
00026 #include "llvm/IR/CallingConv.h"
00027 #include "llvm/IR/DataLayout.h"
00028 #include "llvm/IR/GetElementPtrTypeIterator.h"
00029 #include "llvm/IR/GlobalAlias.h"
00030 #include "llvm/IR/InstVisitor.h"
00031 #include "llvm/IR/IntrinsicInst.h"
00032 #include "llvm/IR/Operator.h"
00033 #include "llvm/Support/Debug.h"
00034 #include "llvm/Support/raw_ostream.h"
00035 
00036 using namespace llvm;
00037 
00038 #define DEBUG_TYPE "inline-cost"
00039 
00040 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
00041 
00042 namespace {
00043 
00044 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
00045   typedef InstVisitor<CallAnalyzer, bool> Base;
00046   friend class InstVisitor<CallAnalyzer, bool>;
00047 
00048   // DataLayout if available, or null.
00049   const DataLayout *const DL;
00050 
00051   /// The TargetTransformInfo available for this compilation.
00052   const TargetTransformInfo &TTI;
00053 
00054   /// The cache of @llvm.assume intrinsics.
00055   AssumptionTracker *AT;
00056 
00057   // The called function.
00058   Function &F;
00059 
00060   int Threshold;
00061   int Cost;
00062 
00063   bool IsCallerRecursive;
00064   bool IsRecursiveCall;
00065   bool ExposesReturnsTwice;
00066   bool HasDynamicAlloca;
00067   bool ContainsNoDuplicateCall;
00068   bool HasReturn;
00069   bool HasIndirectBr;
00070 
00071   /// Number of bytes allocated statically by the callee.
00072   uint64_t AllocatedSize;
00073   unsigned NumInstructions, NumVectorInstructions;
00074   int FiftyPercentVectorBonus, TenPercentVectorBonus;
00075   int VectorBonus;
00076 
00077   // While we walk the potentially-inlined instructions, we build up and
00078   // maintain a mapping of simplified values specific to this callsite. The
00079   // idea is to propagate any special information we have about arguments to
00080   // this call through the inlinable section of the function, and account for
00081   // likely simplifications post-inlining. The most important aspect we track
00082   // is CFG altering simplifications -- when we prove a basic block dead, that
00083   // can cause dramatic shifts in the cost of inlining a function.
00084   DenseMap<Value *, Constant *> SimplifiedValues;
00085 
00086   // Keep track of the values which map back (through function arguments) to
00087   // allocas on the caller stack which could be simplified through SROA.
00088   DenseMap<Value *, Value *> SROAArgValues;
00089 
00090   // The mapping of caller Alloca values to their accumulated cost savings. If
00091   // we have to disable SROA for one of the allocas, this tells us how much
00092   // cost must be added.
00093   DenseMap<Value *, int> SROAArgCosts;
00094 
00095   // Keep track of values which map to a pointer base and constant offset.
00096   DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
00097 
00098   // Custom simplification helper routines.
00099   bool isAllocaDerivedArg(Value *V);
00100   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
00101                             DenseMap<Value *, int>::iterator &CostIt);
00102   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
00103   void disableSROA(Value *V);
00104   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00105                           int InstructionCost);
00106   bool isGEPOffsetConstant(GetElementPtrInst &GEP);
00107   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
00108   bool simplifyCallSite(Function *F, CallSite CS);
00109   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
00110 
00111   // Custom analysis routines.
00112   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
00113 
00114   // Disable several entry points to the visitor so we don't accidentally use
00115   // them by declaring but not defining them here.
00116   void visit(Module *);     void visit(Module &);
00117   void visit(Function *);   void visit(Function &);
00118   void visit(BasicBlock *); void visit(BasicBlock &);
00119 
00120   // Provide base case for our instruction visit.
00121   bool visitInstruction(Instruction &I);
00122 
00123   // Our visit overrides.
00124   bool visitAlloca(AllocaInst &I);
00125   bool visitPHI(PHINode &I);
00126   bool visitGetElementPtr(GetElementPtrInst &I);
00127   bool visitBitCast(BitCastInst &I);
00128   bool visitPtrToInt(PtrToIntInst &I);
00129   bool visitIntToPtr(IntToPtrInst &I);
00130   bool visitCastInst(CastInst &I);
00131   bool visitUnaryInstruction(UnaryInstruction &I);
00132   bool visitCmpInst(CmpInst &I);
00133   bool visitSub(BinaryOperator &I);
00134   bool visitBinaryOperator(BinaryOperator &I);
00135   bool visitLoad(LoadInst &I);
00136   bool visitStore(StoreInst &I);
00137   bool visitExtractValue(ExtractValueInst &I);
00138   bool visitInsertValue(InsertValueInst &I);
00139   bool visitCallSite(CallSite CS);
00140   bool visitReturnInst(ReturnInst &RI);
00141   bool visitBranchInst(BranchInst &BI);
00142   bool visitSwitchInst(SwitchInst &SI);
00143   bool visitIndirectBrInst(IndirectBrInst &IBI);
00144   bool visitResumeInst(ResumeInst &RI);
00145   bool visitUnreachableInst(UnreachableInst &I);
00146 
00147 public:
00148   CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
00149                AssumptionTracker *AT, Function &Callee, int Threshold)
00150       : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
00151         IsCallerRecursive(false), IsRecursiveCall(false),
00152         ExposesReturnsTwice(false), HasDynamicAlloca(false),
00153         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
00154         AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
00155         FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
00156         NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
00157         NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
00158         NumInstructionsSimplified(0), SROACostSavings(0),
00159         SROACostSavingsLost(0) {}
00160 
00161   bool analyzeCall(CallSite CS);
00162 
00163   int getThreshold() { return Threshold; }
00164   int getCost() { return Cost; }
00165 
00166   // Keep a bunch of stats about the cost savings found so we can print them
00167   // out when debugging.
00168   unsigned NumConstantArgs;
00169   unsigned NumConstantOffsetPtrArgs;
00170   unsigned NumAllocaArgs;
00171   unsigned NumConstantPtrCmps;
00172   unsigned NumConstantPtrDiffs;
00173   unsigned NumInstructionsSimplified;
00174   unsigned SROACostSavings;
00175   unsigned SROACostSavingsLost;
00176 
00177   void dump();
00178 };
00179 
00180 } // namespace
00181 
00182 /// \brief Test whether the given value is an Alloca-derived function argument.
00183 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
00184   return SROAArgValues.count(V);
00185 }
00186 
00187 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
00188 /// Returns false if V does not map to a SROA-candidate.
00189 bool CallAnalyzer::lookupSROAArgAndCost(
00190     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
00191   if (SROAArgValues.empty() || SROAArgCosts.empty())
00192     return false;
00193 
00194   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
00195   if (ArgIt == SROAArgValues.end())
00196     return false;
00197 
00198   Arg = ArgIt->second;
00199   CostIt = SROAArgCosts.find(Arg);
00200   return CostIt != SROAArgCosts.end();
00201 }
00202 
00203 /// \brief Disable SROA for the candidate marked by this cost iterator.
00204 ///
00205 /// This marks the candidate as no longer viable for SROA, and adds the cost
00206 /// savings associated with it back into the inline cost measurement.
00207 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
00208   // If we're no longer able to perform SROA we need to undo its cost savings
00209   // and prevent subsequent analysis.
00210   Cost += CostIt->second;
00211   SROACostSavings -= CostIt->second;
00212   SROACostSavingsLost += CostIt->second;
00213   SROAArgCosts.erase(CostIt);
00214 }
00215 
00216 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
00217 void CallAnalyzer::disableSROA(Value *V) {
00218   Value *SROAArg;
00219   DenseMap<Value *, int>::iterator CostIt;
00220   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
00221     disableSROA(CostIt);
00222 }
00223 
00224 /// \brief Accumulate the given cost for a particular SROA candidate.
00225 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
00226                                       int InstructionCost) {
00227   CostIt->second += InstructionCost;
00228   SROACostSavings += InstructionCost;
00229 }
00230 
00231 /// \brief Check whether a GEP's indices are all constant.
00232 ///
00233 /// Respects any simplified values known during the analysis of this callsite.
00234 bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
00235   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
00236     if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
00237       return false;
00238 
00239   return true;
00240 }
00241 
00242 /// \brief Accumulate a constant GEP offset into an APInt if possible.
00243 ///
00244 /// Returns false if unable to compute the offset for any reason. Respects any
00245 /// simplified values known during the analysis of this callsite.
00246 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
00247   if (!DL)
00248     return false;
00249 
00250   unsigned IntPtrWidth = DL->getPointerSizeInBits();
00251   assert(IntPtrWidth == Offset.getBitWidth());
00252 
00253   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
00254        GTI != GTE; ++GTI) {
00255     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
00256     if (!OpC)
00257       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
00258         OpC = dyn_cast<ConstantInt>(SimpleOp);
00259     if (!OpC)
00260       return false;
00261     if (OpC->isZero()) continue;
00262 
00263     // Handle a struct index, which adds its field offset to the pointer.
00264     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
00265       unsigned ElementIdx = OpC->getZExtValue();
00266       const StructLayout *SL = DL->getStructLayout(STy);
00267       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
00268       continue;
00269     }
00270 
00271     APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
00272     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
00273   }
00274   return true;
00275 }
00276 
00277 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
00278   // Check whether inlining will turn a dynamic alloca into a static
00279   // alloca, and handle that case.
00280   if (I.isArrayAllocation()) {
00281     if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
00282       ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
00283       assert(AllocSize && "Allocation size not a constant int?");
00284       Type *Ty = I.getAllocatedType();
00285       AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
00286       return Base::visitAlloca(I);
00287     }
00288   }
00289 
00290   // Accumulate the allocated size.
00291   if (I.isStaticAlloca()) {
00292     Type *Ty = I.getAllocatedType();
00293     AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
00294                       Ty->getPrimitiveSizeInBits());
00295   }
00296 
00297   // We will happily inline static alloca instructions.
00298   if (I.isStaticAlloca())
00299     return Base::visitAlloca(I);
00300 
00301   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
00302   // a variety of reasons, and so we would like to not inline them into
00303   // functions which don't currently have a dynamic alloca. This simply
00304   // disables inlining altogether in the presence of a dynamic alloca.
00305   HasDynamicAlloca = true;
00306   return false;
00307 }
00308 
00309 bool CallAnalyzer::visitPHI(PHINode &I) {
00310   // FIXME: We should potentially be tracking values through phi nodes,
00311   // especially when they collapse to a single value due to deleted CFG edges
00312   // during inlining.
00313 
00314   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
00315   // though we don't want to propagate it's bonuses. The idea is to disable
00316   // SROA if it *might* be used in an inappropriate manner.
00317 
00318   // Phi nodes are always zero-cost.
00319   return true;
00320 }
00321 
00322 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
00323   Value *SROAArg;
00324   DenseMap<Value *, int>::iterator CostIt;
00325   bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
00326                                             SROAArg, CostIt);
00327 
00328   // Try to fold GEPs of constant-offset call site argument pointers. This
00329   // requires target data and inbounds GEPs.
00330   if (DL && I.isInBounds()) {
00331     // Check if we have a base + offset for the pointer.
00332     Value *Ptr = I.getPointerOperand();
00333     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
00334     if (BaseAndOffset.first) {
00335       // Check if the offset of this GEP is constant, and if so accumulate it
00336       // into Offset.
00337       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
00338         // Non-constant GEPs aren't folded, and disable SROA.
00339         if (SROACandidate)
00340           disableSROA(CostIt);
00341         return false;
00342       }
00343 
00344       // Add the result as a new mapping to Base + Offset.
00345       ConstantOffsetPtrs[&I] = BaseAndOffset;
00346 
00347       // Also handle SROA candidates here, we already know that the GEP is
00348       // all-constant indexed.
00349       if (SROACandidate)
00350         SROAArgValues[&I] = SROAArg;
00351 
00352       return true;
00353     }
00354   }
00355 
00356   if (isGEPOffsetConstant(I)) {
00357     if (SROACandidate)
00358       SROAArgValues[&I] = SROAArg;
00359 
00360     // Constant GEPs are modeled as free.
00361     return true;
00362   }
00363 
00364   // Variable GEPs will require math and will disable SROA.
00365   if (SROACandidate)
00366     disableSROA(CostIt);
00367   return false;
00368 }
00369 
00370 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
00371   // Propagate constants through bitcasts.
00372   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00373   if (!COp)
00374     COp = SimplifiedValues.lookup(I.getOperand(0));
00375   if (COp)
00376     if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
00377       SimplifiedValues[&I] = C;
00378       return true;
00379     }
00380 
00381   // Track base/offsets through casts
00382   std::pair<Value *, APInt> BaseAndOffset
00383     = ConstantOffsetPtrs.lookup(I.getOperand(0));
00384   // Casts don't change the offset, just wrap it up.
00385   if (BaseAndOffset.first)
00386     ConstantOffsetPtrs[&I] = BaseAndOffset;
00387 
00388   // Also look for SROA candidates here.
00389   Value *SROAArg;
00390   DenseMap<Value *, int>::iterator CostIt;
00391   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00392     SROAArgValues[&I] = SROAArg;
00393 
00394   // Bitcasts are always zero cost.
00395   return true;
00396 }
00397 
00398 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
00399   const DataLayout *DL = I.getDataLayout();
00400   // Propagate constants through ptrtoint.
00401   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00402   if (!COp)
00403     COp = SimplifiedValues.lookup(I.getOperand(0));
00404   if (COp)
00405     if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
00406       SimplifiedValues[&I] = C;
00407       return true;
00408     }
00409 
00410   // Track base/offset pairs when converted to a plain integer provided the
00411   // integer is large enough to represent the pointer.
00412   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
00413   if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
00414     std::pair<Value *, APInt> BaseAndOffset
00415       = ConstantOffsetPtrs.lookup(I.getOperand(0));
00416     if (BaseAndOffset.first)
00417       ConstantOffsetPtrs[&I] = BaseAndOffset;
00418   }
00419 
00420   // This is really weird. Technically, ptrtoint will disable SROA. However,
00421   // unless that ptrtoint is *used* somewhere in the live basic blocks after
00422   // inlining, it will be nuked, and SROA should proceed. All of the uses which
00423   // would block SROA would also block SROA if applied directly to a pointer,
00424   // and so we can just add the integer in here. The only places where SROA is
00425   // preserved either cannot fire on an integer, or won't in-and-of themselves
00426   // disable SROA (ext) w/o some later use that we would see and disable.
00427   Value *SROAArg;
00428   DenseMap<Value *, int>::iterator CostIt;
00429   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
00430     SROAArgValues[&I] = SROAArg;
00431 
00432   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00433 }
00434 
00435 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
00436   const DataLayout *DL = I.getDataLayout();
00437   // Propagate constants through ptrtoint.
00438   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00439   if (!COp)
00440     COp = SimplifiedValues.lookup(I.getOperand(0));
00441   if (COp)
00442     if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
00443       SimplifiedValues[&I] = C;
00444       return true;
00445     }
00446 
00447   // Track base/offset pairs when round-tripped through a pointer without
00448   // modifications provided the integer is not too large.
00449   Value *Op = I.getOperand(0);
00450   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
00451   if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
00452     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
00453     if (BaseAndOffset.first)
00454       ConstantOffsetPtrs[&I] = BaseAndOffset;
00455   }
00456 
00457   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
00458   Value *SROAArg;
00459   DenseMap<Value *, int>::iterator CostIt;
00460   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
00461     SROAArgValues[&I] = SROAArg;
00462 
00463   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00464 }
00465 
00466 bool CallAnalyzer::visitCastInst(CastInst &I) {
00467   // Propagate constants through ptrtoint.
00468   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
00469   if (!COp)
00470     COp = SimplifiedValues.lookup(I.getOperand(0));
00471   if (COp)
00472     if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
00473       SimplifiedValues[&I] = C;
00474       return true;
00475     }
00476 
00477   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
00478   disableSROA(I.getOperand(0));
00479 
00480   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
00481 }
00482 
00483 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
00484   Value *Operand = I.getOperand(0);
00485   Constant *COp = dyn_cast<Constant>(Operand);
00486   if (!COp)
00487     COp = SimplifiedValues.lookup(Operand);
00488   if (COp)
00489     if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
00490                                                COp, DL)) {
00491       SimplifiedValues[&I] = C;
00492       return true;
00493     }
00494 
00495   // Disable any SROA on the argument to arbitrary unary operators.
00496   disableSROA(Operand);
00497 
00498   return false;
00499 }
00500 
00501 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
00502   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00503   // First try to handle simplified comparisons.
00504   if (!isa<Constant>(LHS))
00505     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00506       LHS = SimpleLHS;
00507   if (!isa<Constant>(RHS))
00508     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00509       RHS = SimpleRHS;
00510   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
00511     if (Constant *CRHS = dyn_cast<Constant>(RHS))
00512       if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
00513         SimplifiedValues[&I] = C;
00514         return true;
00515       }
00516   }
00517 
00518   if (I.getOpcode() == Instruction::FCmp)
00519     return false;
00520 
00521   // Otherwise look for a comparison between constant offset pointers with
00522   // a common base.
00523   Value *LHSBase, *RHSBase;
00524   APInt LHSOffset, RHSOffset;
00525   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00526   if (LHSBase) {
00527     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00528     if (RHSBase && LHSBase == RHSBase) {
00529       // We have common bases, fold the icmp to a constant based on the
00530       // offsets.
00531       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00532       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00533       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
00534         SimplifiedValues[&I] = C;
00535         ++NumConstantPtrCmps;
00536         return true;
00537       }
00538     }
00539   }
00540 
00541   // If the comparison is an equality comparison with null, we can simplify it
00542   // for any alloca-derived argument.
00543   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
00544     if (isAllocaDerivedArg(I.getOperand(0))) {
00545       // We can actually predict the result of comparisons between an
00546       // alloca-derived value and null. Note that this fires regardless of
00547       // SROA firing.
00548       bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
00549       SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
00550                                         : ConstantInt::getFalse(I.getType());
00551       return true;
00552     }
00553 
00554   // Finally check for SROA candidates in comparisons.
00555   Value *SROAArg;
00556   DenseMap<Value *, int>::iterator CostIt;
00557   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
00558     if (isa<ConstantPointerNull>(I.getOperand(1))) {
00559       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00560       return true;
00561     }
00562 
00563     disableSROA(CostIt);
00564   }
00565 
00566   return false;
00567 }
00568 
00569 bool CallAnalyzer::visitSub(BinaryOperator &I) {
00570   // Try to handle a special case: we can fold computing the difference of two
00571   // constant-related pointers.
00572   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00573   Value *LHSBase, *RHSBase;
00574   APInt LHSOffset, RHSOffset;
00575   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
00576   if (LHSBase) {
00577     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
00578     if (RHSBase && LHSBase == RHSBase) {
00579       // We have common bases, fold the subtract to a constant based on the
00580       // offsets.
00581       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
00582       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
00583       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
00584         SimplifiedValues[&I] = C;
00585         ++NumConstantPtrDiffs;
00586         return true;
00587       }
00588     }
00589   }
00590 
00591   // Otherwise, fall back to the generic logic for simplifying and handling
00592   // instructions.
00593   return Base::visitSub(I);
00594 }
00595 
00596 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
00597   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
00598   if (!isa<Constant>(LHS))
00599     if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
00600       LHS = SimpleLHS;
00601   if (!isa<Constant>(RHS))
00602     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
00603       RHS = SimpleRHS;
00604   Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
00605   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
00606     SimplifiedValues[&I] = C;
00607     return true;
00608   }
00609 
00610   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
00611   disableSROA(LHS);
00612   disableSROA(RHS);
00613 
00614   return false;
00615 }
00616 
00617 bool CallAnalyzer::visitLoad(LoadInst &I) {
00618   Value *SROAArg;
00619   DenseMap<Value *, int>::iterator CostIt;
00620   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
00621     if (I.isSimple()) {
00622       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00623       return true;
00624     }
00625 
00626     disableSROA(CostIt);
00627   }
00628 
00629   return false;
00630 }
00631 
00632 bool CallAnalyzer::visitStore(StoreInst &I) {
00633   Value *SROAArg;
00634   DenseMap<Value *, int>::iterator CostIt;
00635   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
00636     if (I.isSimple()) {
00637       accumulateSROACost(CostIt, InlineConstants::InstrCost);
00638       return true;
00639     }
00640 
00641     disableSROA(CostIt);
00642   }
00643 
00644   return false;
00645 }
00646 
00647 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
00648   // Constant folding for extract value is trivial.
00649   Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
00650   if (!C)
00651     C = SimplifiedValues.lookup(I.getAggregateOperand());
00652   if (C) {
00653     SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
00654     return true;
00655   }
00656 
00657   // SROA can look through these but give them a cost.
00658   return false;
00659 }
00660 
00661 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
00662   // Constant folding for insert value is trivial.
00663   Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
00664   if (!AggC)
00665     AggC = SimplifiedValues.lookup(I.getAggregateOperand());
00666   Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
00667   if (!InsertedC)
00668     InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
00669   if (AggC && InsertedC) {
00670     SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
00671                                                         I.getIndices());
00672     return true;
00673   }
00674 
00675   // SROA can look through these but give them a cost.
00676   return false;
00677 }
00678 
00679 /// \brief Try to simplify a call site.
00680 ///
00681 /// Takes a concrete function and callsite and tries to actually simplify it by
00682 /// analyzing the arguments and call itself with instsimplify. Returns true if
00683 /// it has simplified the callsite to some other entity (a constant), making it
00684 /// free.
00685 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
00686   // FIXME: Using the instsimplify logic directly for this is inefficient
00687   // because we have to continually rebuild the argument list even when no
00688   // simplifications can be performed. Until that is fixed with remapping
00689   // inside of instsimplify, directly constant fold calls here.
00690   if (!canConstantFoldCallTo(F))
00691     return false;
00692 
00693   // Try to re-map the arguments to constants.
00694   SmallVector<Constant *, 4> ConstantArgs;
00695   ConstantArgs.reserve(CS.arg_size());
00696   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
00697        I != E; ++I) {
00698     Constant *C = dyn_cast<Constant>(*I);
00699     if (!C)
00700       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
00701     if (!C)
00702       return false; // This argument doesn't map to a constant.
00703 
00704     ConstantArgs.push_back(C);
00705   }
00706   if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
00707     SimplifiedValues[CS.getInstruction()] = C;
00708     return true;
00709   }
00710 
00711   return false;
00712 }
00713 
00714 bool CallAnalyzer::visitCallSite(CallSite CS) {
00715   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
00716       !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
00717                                       Attribute::ReturnsTwice)) {
00718     // This aborts the entire analysis.
00719     ExposesReturnsTwice = true;
00720     return false;
00721   }
00722   if (CS.isCall() &&
00723       cast<CallInst>(CS.getInstruction())->cannotDuplicate())
00724     ContainsNoDuplicateCall = true;
00725 
00726   if (Function *F = CS.getCalledFunction()) {
00727     // When we have a concrete function, first try to simplify it directly.
00728     if (simplifyCallSite(F, CS))
00729       return true;
00730 
00731     // Next check if it is an intrinsic we know about.
00732     // FIXME: Lift this into part of the InstVisitor.
00733     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
00734       switch (II->getIntrinsicID()) {
00735       default:
00736         return Base::visitCallSite(CS);
00737 
00738       case Intrinsic::memset:
00739       case Intrinsic::memcpy:
00740       case Intrinsic::memmove:
00741         // SROA can usually chew through these intrinsics, but they aren't free.
00742         return false;
00743       }
00744     }
00745 
00746     if (F == CS.getInstruction()->getParent()->getParent()) {
00747       // This flag will fully abort the analysis, so don't bother with anything
00748       // else.
00749       IsRecursiveCall = true;
00750       return false;
00751     }
00752 
00753     if (TTI.isLoweredToCall(F)) {
00754       // We account for the average 1 instruction per call argument setup
00755       // here.
00756       Cost += CS.arg_size() * InlineConstants::InstrCost;
00757 
00758       // Everything other than inline ASM will also have a significant cost
00759       // merely from making the call.
00760       if (!isa<InlineAsm>(CS.getCalledValue()))
00761         Cost += InlineConstants::CallPenalty;
00762     }
00763 
00764     return Base::visitCallSite(CS);
00765   }
00766 
00767   // Otherwise we're in a very special case -- an indirect function call. See
00768   // if we can be particularly clever about this.
00769   Value *Callee = CS.getCalledValue();
00770 
00771   // First, pay the price of the argument setup. We account for the average
00772   // 1 instruction per call argument setup here.
00773   Cost += CS.arg_size() * InlineConstants::InstrCost;
00774 
00775   // Next, check if this happens to be an indirect function call to a known
00776   // function in this inline context. If not, we've done all we can.
00777   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
00778   if (!F)
00779     return Base::visitCallSite(CS);
00780 
00781   // If we have a constant that we are calling as a function, we can peer
00782   // through it and see the function target. This happens not infrequently
00783   // during devirtualization and so we want to give it a hefty bonus for
00784   // inlining, but cap that bonus in the event that inlining wouldn't pan
00785   // out. Pretend to inline the function, with a custom threshold.
00786   CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
00787   if (CA.analyzeCall(CS)) {
00788     // We were able to inline the indirect call! Subtract the cost from the
00789     // bonus we want to apply, but don't go below zero.
00790     Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
00791   }
00792 
00793   return Base::visitCallSite(CS);
00794 }
00795 
00796 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
00797   // At least one return instruction will be free after inlining.
00798   bool Free = !HasReturn;
00799   HasReturn = true;
00800   return Free;
00801 }
00802 
00803 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
00804   // We model unconditional branches as essentially free -- they really
00805   // shouldn't exist at all, but handling them makes the behavior of the
00806   // inliner more regular and predictable. Interestingly, conditional branches
00807   // which will fold away are also free.
00808   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
00809          dyn_cast_or_null<ConstantInt>(
00810              SimplifiedValues.lookup(BI.getCondition()));
00811 }
00812 
00813 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
00814   // We model unconditional switches as free, see the comments on handling
00815   // branches.
00816   if (isa<ConstantInt>(SI.getCondition()))
00817     return true;
00818   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
00819     if (isa<ConstantInt>(V))
00820       return true;
00821 
00822   // Otherwise, we need to accumulate a cost proportional to the number of
00823   // distinct successor blocks. This fan-out in the CFG cannot be represented
00824   // for free even if we can represent the core switch as a jumptable that
00825   // takes a single instruction.
00826   //
00827   // NB: We convert large switches which are just used to initialize large phi
00828   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
00829   // inlining those. It will prevent inlining in cases where the optimization
00830   // does not (yet) fire.
00831   SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
00832   SuccessorBlocks.insert(SI.getDefaultDest());
00833   for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
00834     SuccessorBlocks.insert(I.getCaseSuccessor());
00835   // Add cost corresponding to the number of distinct destinations. The first
00836   // we model as free because of fallthrough.
00837   Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
00838   return false;
00839 }
00840 
00841 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
00842   // We never want to inline functions that contain an indirectbr.  This is
00843   // incorrect because all the blockaddress's (in static global initializers
00844   // for example) would be referring to the original function, and this
00845   // indirect jump would jump from the inlined copy of the function into the
00846   // original function which is extremely undefined behavior.
00847   // FIXME: This logic isn't really right; we can safely inline functions with
00848   // indirectbr's as long as no other function or global references the
00849   // blockaddress of a block within the current function.
00850   HasIndirectBr = true;
00851   return false;
00852 }
00853 
00854 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
00855   // FIXME: It's not clear that a single instruction is an accurate model for
00856   // the inline cost of a resume instruction.
00857   return false;
00858 }
00859 
00860 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
00861   // FIXME: It might be reasonably to discount the cost of instructions leading
00862   // to unreachable as they have the lowest possible impact on both runtime and
00863   // code size.
00864   return true; // No actual code is needed for unreachable.
00865 }
00866 
00867 bool CallAnalyzer::visitInstruction(Instruction &I) {
00868   // Some instructions are free. All of the free intrinsics can also be
00869   // handled by SROA, etc.
00870   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
00871     return true;
00872 
00873   // We found something we don't understand or can't handle. Mark any SROA-able
00874   // values in the operand list as no longer viable.
00875   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
00876     disableSROA(*OI);
00877 
00878   return false;
00879 }
00880 
00881 
00882 /// \brief Analyze a basic block for its contribution to the inline cost.
00883 ///
00884 /// This method walks the analyzer over every instruction in the given basic
00885 /// block and accounts for their cost during inlining at this callsite. It
00886 /// aborts early if the threshold has been exceeded or an impossible to inline
00887 /// construct has been detected. It returns false if inlining is no longer
00888 /// viable, and true if inlining remains viable.
00889 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
00890                                 SmallPtrSetImpl<const Value *> &EphValues) {
00891   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
00892     // FIXME: Currently, the number of instructions in a function regardless of
00893     // our ability to simplify them during inline to constants or dead code,
00894     // are actually used by the vector bonus heuristic. As long as that's true,
00895     // we have to special case debug intrinsics here to prevent differences in
00896     // inlining due to debug symbols. Eventually, the number of unsimplified
00897     // instructions shouldn't factor into the cost computation, but until then,
00898     // hack around it here.
00899     if (isa<DbgInfoIntrinsic>(I))
00900       continue;
00901 
00902     // Skip ephemeral values.
00903     if (EphValues.count(I))
00904       continue;
00905 
00906     ++NumInstructions;
00907     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
00908       ++NumVectorInstructions;
00909 
00910     // If the instruction simplified to a constant, there is no cost to this
00911     // instruction. Visit the instructions using our InstVisitor to account for
00912     // all of the per-instruction logic. The visit tree returns true if we
00913     // consumed the instruction in any way, and false if the instruction's base
00914     // cost should count against inlining.
00915     if (Base::visit(I))
00916       ++NumInstructionsSimplified;
00917     else
00918       Cost += InlineConstants::InstrCost;
00919 
00920     // If the visit this instruction detected an uninlinable pattern, abort.
00921     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
00922         HasIndirectBr)
00923       return false;
00924 
00925     // If the caller is a recursive function then we don't want to inline
00926     // functions which allocate a lot of stack space because it would increase
00927     // the caller stack usage dramatically.
00928     if (IsCallerRecursive &&
00929         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
00930       return false;
00931 
00932     if (NumVectorInstructions > NumInstructions/2)
00933       VectorBonus = FiftyPercentVectorBonus;
00934     else if (NumVectorInstructions > NumInstructions/10)
00935       VectorBonus = TenPercentVectorBonus;
00936     else
00937       VectorBonus = 0;
00938 
00939     // Check if we've past the threshold so we don't spin in huge basic
00940     // blocks that will never inline.
00941     if (Cost > (Threshold + VectorBonus))
00942       return false;
00943   }
00944 
00945   return true;
00946 }
00947 
00948 /// \brief Compute the base pointer and cumulative constant offsets for V.
00949 ///
00950 /// This strips all constant offsets off of V, leaving it the base pointer, and
00951 /// accumulates the total constant offset applied in the returned constant. It
00952 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
00953 /// no constant offsets applied.
00954 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
00955   if (!DL || !V->getType()->isPointerTy())
00956     return nullptr;
00957 
00958   unsigned IntPtrWidth = DL->getPointerSizeInBits();
00959   APInt Offset = APInt::getNullValue(IntPtrWidth);
00960 
00961   // Even though we don't look through PHI nodes, we could be called on an
00962   // instruction in an unreachable block, which may be on a cycle.
00963   SmallPtrSet<Value *, 4> Visited;
00964   Visited.insert(V);
00965   do {
00966     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
00967       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
00968         return nullptr;
00969       V = GEP->getPointerOperand();
00970     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
00971       V = cast<Operator>(V)->getOperand(0);
00972     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
00973       if (GA->mayBeOverridden())
00974         break;
00975       V = GA->getAliasee();
00976     } else {
00977       break;
00978     }
00979     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
00980   } while (Visited.insert(V));
00981 
00982   Type *IntPtrTy = DL->getIntPtrType(V->getContext());
00983   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
00984 }
00985 
00986 /// \brief Analyze a call site for potential inlining.
00987 ///
00988 /// Returns true if inlining this call is viable, and false if it is not
00989 /// viable. It computes the cost and adjusts the threshold based on numerous
00990 /// factors and heuristics. If this method returns false but the computed cost
00991 /// is below the computed threshold, then inlining was forcibly disabled by
00992 /// some artifact of the routine.
00993 bool CallAnalyzer::analyzeCall(CallSite CS) {
00994   ++NumCallsAnalyzed;
00995 
00996   // Track whether the post-inlining function would have more than one basic
00997   // block. A single basic block is often intended for inlining. Balloon the
00998   // threshold by 50% until we pass the single-BB phase.
00999   bool SingleBB = true;
01000   int SingleBBBonus = Threshold / 2;
01001   Threshold += SingleBBBonus;
01002 
01003   // Perform some tweaks to the cost and threshold based on the direct
01004   // callsite information.
01005 
01006   // We want to more aggressively inline vector-dense kernels, so up the
01007   // threshold, and we'll lower it if the % of vector instructions gets too
01008   // low.
01009   assert(NumInstructions == 0);
01010   assert(NumVectorInstructions == 0);
01011   FiftyPercentVectorBonus = Threshold;
01012   TenPercentVectorBonus = Threshold / 2;
01013 
01014   // Give out bonuses per argument, as the instructions setting them up will
01015   // be gone after inlining.
01016   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
01017     if (DL && CS.isByValArgument(I)) {
01018       // We approximate the number of loads and stores needed by dividing the
01019       // size of the byval type by the target's pointer size.
01020       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
01021       unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
01022       unsigned PointerSize = DL->getPointerSizeInBits();
01023       // Ceiling division.
01024       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
01025 
01026       // If it generates more than 8 stores it is likely to be expanded as an
01027       // inline memcpy so we take that as an upper bound. Otherwise we assume
01028       // one load and one store per word copied.
01029       // FIXME: The maxStoresPerMemcpy setting from the target should be used
01030       // here instead of a magic number of 8, but it's not available via
01031       // DataLayout.
01032       NumStores = std::min(NumStores, 8U);
01033 
01034       Cost -= 2 * NumStores * InlineConstants::InstrCost;
01035     } else {
01036       // For non-byval arguments subtract off one instruction per call
01037       // argument.
01038       Cost -= InlineConstants::InstrCost;
01039     }
01040   }
01041 
01042   // If there is only one call of the function, and it has internal linkage,
01043   // the cost of inlining it drops dramatically.
01044   bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
01045     &F == CS.getCalledFunction();
01046   if (OnlyOneCallAndLocalLinkage)
01047     Cost += InlineConstants::LastCallToStaticBonus;
01048 
01049   // If the instruction after the call, or if the normal destination of the
01050   // invoke is an unreachable instruction, the function is noreturn. As such,
01051   // there is little point in inlining this unless there is literally zero
01052   // cost.
01053   Instruction *Instr = CS.getInstruction();
01054   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
01055     if (isa<UnreachableInst>(II->getNormalDest()->begin()))
01056       Threshold = 1;
01057   } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
01058     Threshold = 1;
01059 
01060   // If this function uses the coldcc calling convention, prefer not to inline
01061   // it.
01062   if (F.getCallingConv() == CallingConv::Cold)
01063     Cost += InlineConstants::ColdccPenalty;
01064 
01065   // Check if we're done. This can happen due to bonuses and penalties.
01066   if (Cost > Threshold)
01067     return false;
01068 
01069   if (F.empty())
01070     return true;
01071 
01072   Function *Caller = CS.getInstruction()->getParent()->getParent();
01073   // Check if the caller function is recursive itself.
01074   for (User *U : Caller->users()) {
01075     CallSite Site(U);
01076     if (!Site)
01077       continue;
01078     Instruction *I = Site.getInstruction();
01079     if (I->getParent()->getParent() == Caller) {
01080       IsCallerRecursive = true;
01081       break;
01082     }
01083   }
01084 
01085   // Populate our simplified values by mapping from function arguments to call
01086   // arguments with known important simplifications.
01087   CallSite::arg_iterator CAI = CS.arg_begin();
01088   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
01089        FAI != FAE; ++FAI, ++CAI) {
01090     assert(CAI != CS.arg_end());
01091     if (Constant *C = dyn_cast<Constant>(CAI))
01092       SimplifiedValues[FAI] = C;
01093 
01094     Value *PtrArg = *CAI;
01095     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
01096       ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
01097 
01098       // We can SROA any pointer arguments derived from alloca instructions.
01099       if (isa<AllocaInst>(PtrArg)) {
01100         SROAArgValues[FAI] = PtrArg;
01101         SROAArgCosts[PtrArg] = 0;
01102       }
01103     }
01104   }
01105   NumConstantArgs = SimplifiedValues.size();
01106   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
01107   NumAllocaArgs = SROAArgValues.size();
01108 
01109   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
01110   // the ephemeral values multiple times (and they're completely determined by
01111   // the callee, so this is purely duplicate work).
01112   SmallPtrSet<const Value *, 32> EphValues;
01113   CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
01114 
01115   // The worklist of live basic blocks in the callee *after* inlining. We avoid
01116   // adding basic blocks of the callee which can be proven to be dead for this
01117   // particular call site in order to get more accurate cost estimates. This
01118   // requires a somewhat heavyweight iteration pattern: we need to walk the
01119   // basic blocks in a breadth-first order as we insert live successors. To
01120   // accomplish this, prioritizing for small iterations because we exit after
01121   // crossing our threshold, we use a small-size optimized SetVector.
01122   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
01123                                   SmallPtrSet<BasicBlock *, 16> > BBSetVector;
01124   BBSetVector BBWorklist;
01125   BBWorklist.insert(&F.getEntryBlock());
01126   // Note that we *must not* cache the size, this loop grows the worklist.
01127   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
01128     // Bail out the moment we cross the threshold. This means we'll under-count
01129     // the cost, but only when undercounting doesn't matter.
01130     if (Cost > (Threshold + VectorBonus))
01131       break;
01132 
01133     BasicBlock *BB = BBWorklist[Idx];
01134     if (BB->empty())
01135       continue;
01136 
01137     // Disallow inlining a blockaddress. A blockaddress only has defined
01138     // behavior for an indirect branch in the same function, and we do not
01139     // currently support inlining indirect branches. But, the inliner may not
01140     // see an indirect branch that ends up being dead code at a particular call
01141     // site. If the blockaddress escapes the function, e.g., via a global
01142     // variable, inlining may lead to an invalid cross-function reference.
01143     if (BB->hasAddressTaken())
01144       return false;
01145 
01146     // Analyze the cost of this block. If we blow through the threshold, this
01147     // returns false, and we can bail on out.
01148     if (!analyzeBlock(BB, EphValues)) {
01149       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
01150           HasIndirectBr)
01151         return false;
01152 
01153       // If the caller is a recursive function then we don't want to inline
01154       // functions which allocate a lot of stack space because it would increase
01155       // the caller stack usage dramatically.
01156       if (IsCallerRecursive &&
01157           AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
01158         return false;
01159 
01160       break;
01161     }
01162 
01163     TerminatorInst *TI = BB->getTerminator();
01164 
01165     // Add in the live successors by first checking whether we have terminator
01166     // that may be simplified based on the values simplified by this call.
01167     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
01168       if (BI->isConditional()) {
01169         Value *Cond = BI->getCondition();
01170         if (ConstantInt *SimpleCond
01171               = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01172           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
01173           continue;
01174         }
01175       }
01176     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
01177       Value *Cond = SI->getCondition();
01178       if (ConstantInt *SimpleCond
01179             = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
01180         BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
01181         continue;
01182       }
01183     }
01184 
01185     // If we're unable to select a particular successor, just count all of
01186     // them.
01187     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
01188          ++TIdx)
01189       BBWorklist.insert(TI->getSuccessor(TIdx));
01190 
01191     // If we had any successors at this point, than post-inlining is likely to
01192     // have them as well. Note that we assume any basic blocks which existed
01193     // due to branches or switches which folded above will also fold after
01194     // inlining.
01195     if (SingleBB && TI->getNumSuccessors() > 1) {
01196       // Take off the bonus we applied to the threshold.
01197       Threshold -= SingleBBBonus;
01198       SingleBB = false;
01199     }
01200   }
01201 
01202   // If this is a noduplicate call, we can still inline as long as
01203   // inlining this would cause the removal of the caller (so the instruction
01204   // is not actually duplicated, just moved).
01205   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
01206     return false;
01207 
01208   Threshold += VectorBonus;
01209 
01210   return Cost < Threshold;
01211 }
01212 
01213 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
01214 /// \brief Dump stats about this call's analysis.
01215 void CallAnalyzer::dump() {
01216 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
01217   DEBUG_PRINT_STAT(NumConstantArgs);
01218   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
01219   DEBUG_PRINT_STAT(NumAllocaArgs);
01220   DEBUG_PRINT_STAT(NumConstantPtrCmps);
01221   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
01222   DEBUG_PRINT_STAT(NumInstructionsSimplified);
01223   DEBUG_PRINT_STAT(SROACostSavings);
01224   DEBUG_PRINT_STAT(SROACostSavingsLost);
01225   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
01226   DEBUG_PRINT_STAT(Cost);
01227   DEBUG_PRINT_STAT(Threshold);
01228   DEBUG_PRINT_STAT(VectorBonus);
01229 #undef DEBUG_PRINT_STAT
01230 }
01231 #endif
01232 
01233 INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
01234                       true, true)
01235 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
01236 INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
01237 INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
01238                     true, true)
01239 
01240 char InlineCostAnalysis::ID = 0;
01241 
01242 InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
01243 
01244 InlineCostAnalysis::~InlineCostAnalysis() {}
01245 
01246 void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
01247   AU.setPreservesAll();
01248   AU.addRequired<AssumptionTracker>();
01249   AU.addRequired<TargetTransformInfo>();
01250   CallGraphSCCPass::getAnalysisUsage(AU);
01251 }
01252 
01253 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
01254   TTI = &getAnalysis<TargetTransformInfo>();
01255   AT = &getAnalysis<AssumptionTracker>();
01256   return false;
01257 }
01258 
01259 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
01260   return getInlineCost(CS, CS.getCalledFunction(), Threshold);
01261 }
01262 
01263 /// \brief Test that two functions either have or have not the given attribute
01264 ///        at the same time.
01265 static bool attributeMatches(Function *F1, Function *F2,
01266                              Attribute::AttrKind Attr) {
01267   return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
01268 }
01269 
01270 /// \brief Test that there are no attribute conflicts between Caller and Callee
01271 ///        that prevent inlining.
01272 static bool functionsHaveCompatibleAttributes(Function *Caller,
01273                                               Function *Callee) {
01274   return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
01275          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
01276          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
01277 }
01278 
01279 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
01280                                              int Threshold) {
01281   // Cannot inline indirect calls.
01282   if (!Callee)
01283     return llvm::InlineCost::getNever();
01284 
01285   // Calls to functions with always-inline attributes should be inlined
01286   // whenever possible.
01287   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
01288     if (isInlineViable(*Callee))
01289       return llvm::InlineCost::getAlways();
01290     return llvm::InlineCost::getNever();
01291   }
01292 
01293   // Never inline functions with conflicting attributes (unless callee has
01294   // always-inline attribute).
01295   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
01296     return llvm::InlineCost::getNever();
01297 
01298   // Don't inline this call if the caller has the optnone attribute.
01299   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
01300     return llvm::InlineCost::getNever();
01301 
01302   // Don't inline functions which can be redefined at link-time to mean
01303   // something else.  Don't inline functions marked noinline or call sites
01304   // marked noinline.
01305   if (Callee->mayBeOverridden() ||
01306       Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
01307     return llvm::InlineCost::getNever();
01308 
01309   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
01310         << "...\n");
01311 
01312   CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
01313   bool ShouldInline = CA.analyzeCall(CS);
01314 
01315   DEBUG(CA.dump());
01316 
01317   // Check if there was a reason to force inlining or no inlining.
01318   if (!ShouldInline && CA.getCost() < CA.getThreshold())
01319     return InlineCost::getNever();
01320   if (ShouldInline && CA.getCost() >= CA.getThreshold())
01321     return InlineCost::getAlways();
01322 
01323   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
01324 }
01325 
01326 bool InlineCostAnalysis::isInlineViable(Function &F) {
01327   bool ReturnsTwice =
01328     F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
01329                                    Attribute::ReturnsTwice);
01330   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
01331     // Disallow inlining of functions which contain indirect branches or
01332     // blockaddresses.
01333     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
01334       return false;
01335 
01336     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
01337          ++II) {
01338       CallSite CS(II);
01339       if (!CS)
01340         continue;
01341 
01342       // Disallow recursive calls.
01343       if (&F == CS.getCalledFunction())
01344         return false;
01345 
01346       // Disallow calls which expose returns-twice to a function not previously
01347       // attributed as such.
01348       if (!ReturnsTwice && CS.isCall() &&
01349           cast<CallInst>(CS.getInstruction())->canReturnTwice())
01350         return false;
01351     }
01352   }
01353 
01354   return true;
01355 }