LLVM API Documentation
00001 //===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// \file 00010 /// This file provides the implementation of a basic TargetTransformInfo pass 00011 /// predicated on the target abstractions present in the target independent 00012 /// code generator. It uses these (primarily TargetLowering) to model as much 00013 /// of the TTI query interface as possible. It is included by most targets so 00014 /// that they can specialize only a small subset of the query space. 00015 /// 00016 //===----------------------------------------------------------------------===// 00017 00018 #include "llvm/CodeGen/Passes.h" 00019 #include "llvm/Analysis/LoopInfo.h" 00020 #include "llvm/Analysis/TargetTransformInfo.h" 00021 #include "llvm/Support/CommandLine.h" 00022 #include "llvm/Target/TargetLowering.h" 00023 #include "llvm/Target/TargetSubtargetInfo.h" 00024 #include <utility> 00025 using namespace llvm; 00026 00027 static cl::opt<unsigned> 00028 PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), 00029 cl::desc("Threshold for partial unrolling"), cl::Hidden); 00030 00031 #define DEBUG_TYPE "basictti" 00032 00033 namespace { 00034 00035 class BasicTTI final : public ImmutablePass, public TargetTransformInfo { 00036 const TargetMachine *TM; 00037 00038 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 00039 /// are set if the result needs to be inserted and/or extracted from vectors. 00040 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 00041 00042 /// Estimate the cost overhead of SK_Alternate shuffle. 00043 unsigned getAltShuffleOverhead(Type *Ty) const; 00044 00045 const TargetLoweringBase *getTLI() const { 00046 return TM->getSubtargetImpl()->getTargetLowering(); 00047 } 00048 00049 public: 00050 BasicTTI() : ImmutablePass(ID), TM(nullptr) { 00051 llvm_unreachable("This pass cannot be directly constructed"); 00052 } 00053 00054 BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { 00055 initializeBasicTTIPass(*PassRegistry::getPassRegistry()); 00056 } 00057 00058 void initializePass() override { 00059 pushTTIStack(this); 00060 } 00061 00062 void getAnalysisUsage(AnalysisUsage &AU) const override { 00063 TargetTransformInfo::getAnalysisUsage(AU); 00064 } 00065 00066 /// Pass identification. 00067 static char ID; 00068 00069 /// Provide necessary pointer adjustments for the two base classes. 00070 void *getAdjustedAnalysisPointer(const void *ID) override { 00071 if (ID == &TargetTransformInfo::ID) 00072 return (TargetTransformInfo*)this; 00073 return this; 00074 } 00075 00076 bool hasBranchDivergence() const override; 00077 00078 /// \name Scalar TTI Implementations 00079 /// @{ 00080 00081 bool isLegalAddImmediate(int64_t imm) const override; 00082 bool isLegalICmpImmediate(int64_t imm) const override; 00083 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 00084 int64_t BaseOffset, bool HasBaseReg, 00085 int64_t Scale) const override; 00086 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 00087 int64_t BaseOffset, bool HasBaseReg, 00088 int64_t Scale) const override; 00089 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 00090 bool isTypeLegal(Type *Ty) const override; 00091 unsigned getJumpBufAlignment() const override; 00092 unsigned getJumpBufSize() const override; 00093 bool shouldBuildLookupTables() const override; 00094 bool haveFastSqrt(Type *Ty) const override; 00095 void getUnrollingPreferences(const Function *F, Loop *L, 00096 UnrollingPreferences &UP) const override; 00097 00098 /// @} 00099 00100 /// \name Vector TTI Implementations 00101 /// @{ 00102 00103 unsigned getNumberOfRegisters(bool Vector) const override; 00104 unsigned getMaxInterleaveFactor() const override; 00105 unsigned getRegisterBitWidth(bool Vector) const override; 00106 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, 00107 OperandValueKind, OperandValueProperties, 00108 OperandValueProperties) const override; 00109 unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 00110 int Index, Type *SubTp) const override; 00111 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 00112 Type *Src) const override; 00113 unsigned getCFInstrCost(unsigned Opcode) const override; 00114 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 00115 Type *CondTy) const override; 00116 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 00117 unsigned Index) const override; 00118 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 00119 unsigned AddressSpace) const override; 00120 unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, 00121 ArrayRef<Type*> Tys) const override; 00122 unsigned getNumberOfParts(Type *Tp) const override; 00123 unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; 00124 unsigned getReductionCost(unsigned Opcode, Type *Ty, 00125 bool IsPairwise) const override; 00126 00127 /// @} 00128 }; 00129 00130 } 00131 00132 INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", 00133 "Target independent code generator's TTI", true, true, false) 00134 char BasicTTI::ID = 0; 00135 00136 ImmutablePass * 00137 llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { 00138 return new BasicTTI(TM); 00139 } 00140 00141 bool BasicTTI::hasBranchDivergence() const { return false; } 00142 00143 bool BasicTTI::isLegalAddImmediate(int64_t imm) const { 00144 return getTLI()->isLegalAddImmediate(imm); 00145 } 00146 00147 bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { 00148 return getTLI()->isLegalICmpImmediate(imm); 00149 } 00150 00151 bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 00152 int64_t BaseOffset, bool HasBaseReg, 00153 int64_t Scale) const { 00154 TargetLoweringBase::AddrMode AM; 00155 AM.BaseGV = BaseGV; 00156 AM.BaseOffs = BaseOffset; 00157 AM.HasBaseReg = HasBaseReg; 00158 AM.Scale = Scale; 00159 return getTLI()->isLegalAddressingMode(AM, Ty); 00160 } 00161 00162 int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 00163 int64_t BaseOffset, bool HasBaseReg, 00164 int64_t Scale) const { 00165 TargetLoweringBase::AddrMode AM; 00166 AM.BaseGV = BaseGV; 00167 AM.BaseOffs = BaseOffset; 00168 AM.HasBaseReg = HasBaseReg; 00169 AM.Scale = Scale; 00170 return getTLI()->getScalingFactorCost(AM, Ty); 00171 } 00172 00173 bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { 00174 return getTLI()->isTruncateFree(Ty1, Ty2); 00175 } 00176 00177 bool BasicTTI::isTypeLegal(Type *Ty) const { 00178 EVT T = getTLI()->getValueType(Ty); 00179 return getTLI()->isTypeLegal(T); 00180 } 00181 00182 unsigned BasicTTI::getJumpBufAlignment() const { 00183 return getTLI()->getJumpBufAlignment(); 00184 } 00185 00186 unsigned BasicTTI::getJumpBufSize() const { 00187 return getTLI()->getJumpBufSize(); 00188 } 00189 00190 bool BasicTTI::shouldBuildLookupTables() const { 00191 const TargetLoweringBase *TLI = getTLI(); 00192 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 00193 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 00194 } 00195 00196 bool BasicTTI::haveFastSqrt(Type *Ty) const { 00197 const TargetLoweringBase *TLI = getTLI(); 00198 EVT VT = TLI->getValueType(Ty); 00199 return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); 00200 } 00201 00202 void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L, 00203 UnrollingPreferences &UP) const { 00204 // This unrolling functionality is target independent, but to provide some 00205 // motivation for its intended use, for x86: 00206 00207 // According to the Intel 64 and IA-32 Architectures Optimization Reference 00208 // Manual, Intel Core models and later have a loop stream detector 00209 // (and associated uop queue) that can benefit from partial unrolling. 00210 // The relevant requirements are: 00211 // - The loop must have no more than 4 (8 for Nehalem and later) branches 00212 // taken, and none of them may be calls. 00213 // - The loop can have no more than 18 (28 for Nehalem and later) uops. 00214 00215 // According to the Software Optimization Guide for AMD Family 15h Processors, 00216 // models 30h-4fh (Steamroller and later) have a loop predictor and loop 00217 // buffer which can benefit from partial unrolling. 00218 // The relevant requirements are: 00219 // - The loop must have fewer than 16 branches 00220 // - The loop must have less than 40 uops in all executed loop branches 00221 00222 // The number of taken branches in a loop is hard to estimate here, and 00223 // benchmarking has revealed that it is better not to be conservative when 00224 // estimating the branch count. As a result, we'll ignore the branch limits 00225 // until someone finds a case where it matters in practice. 00226 00227 unsigned MaxOps; 00228 const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F); 00229 if (PartialUnrollingThreshold.getNumOccurrences() > 0) 00230 MaxOps = PartialUnrollingThreshold; 00231 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) 00232 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; 00233 else 00234 return; 00235 00236 // Scan the loop: don't unroll loops with calls. 00237 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 00238 I != E; ++I) { 00239 BasicBlock *BB = *I; 00240 00241 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) 00242 if (isa<CallInst>(J) || isa<InvokeInst>(J)) { 00243 ImmutableCallSite CS(J); 00244 if (const Function *F = CS.getCalledFunction()) { 00245 if (!TopTTI->isLoweredToCall(F)) 00246 continue; 00247 } 00248 00249 return; 00250 } 00251 } 00252 00253 // Enable runtime and partial unrolling up to the specified size. 00254 UP.Partial = UP.Runtime = true; 00255 UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; 00256 } 00257 00258 //===----------------------------------------------------------------------===// 00259 // 00260 // Calls used by the vectorizers. 00261 // 00262 //===----------------------------------------------------------------------===// 00263 00264 unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, 00265 bool Extract) const { 00266 assert (Ty->isVectorTy() && "Can only scalarize vectors"); 00267 unsigned Cost = 0; 00268 00269 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 00270 if (Insert) 00271 Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 00272 if (Extract) 00273 Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 00274 } 00275 00276 return Cost; 00277 } 00278 00279 unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { 00280 return 1; 00281 } 00282 00283 unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { 00284 return 32; 00285 } 00286 00287 unsigned BasicTTI::getMaxInterleaveFactor() const { 00288 return 1; 00289 } 00290 00291 unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 00292 OperandValueKind, OperandValueKind, 00293 OperandValueProperties, 00294 OperandValueProperties) const { 00295 // Check if any of the operands are vector operands. 00296 const TargetLoweringBase *TLI = getTLI(); 00297 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00298 assert(ISD && "Invalid opcode"); 00299 00300 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 00301 00302 bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); 00303 // Assume that floating point arithmetic operations cost twice as much as 00304 // integer operations. 00305 unsigned OpCost = (IsFloat ? 2 : 1); 00306 00307 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 00308 // The operation is legal. Assume it costs 1. 00309 // If the type is split to multiple registers, assume that there is some 00310 // overhead to this. 00311 // TODO: Once we have extract/insert subvector cost we need to use them. 00312 if (LT.first > 1) 00313 return LT.first * 2 * OpCost; 00314 return LT.first * 1 * OpCost; 00315 } 00316 00317 if (!TLI->isOperationExpand(ISD, LT.second)) { 00318 // If the operation is custom lowered then assume 00319 // thare the code is twice as expensive. 00320 return LT.first * 2 * OpCost; 00321 } 00322 00323 // Else, assume that we need to scalarize this op. 00324 if (Ty->isVectorTy()) { 00325 unsigned Num = Ty->getVectorNumElements(); 00326 unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); 00327 // return the cost of multiple scalar invocation plus the cost of inserting 00328 // and extracting the values. 00329 return getScalarizationOverhead(Ty, true, true) + Num * Cost; 00330 } 00331 00332 // We don't know anything about this scalar instruction. 00333 return OpCost; 00334 } 00335 00336 unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { 00337 assert(Ty->isVectorTy() && "Can only shuffle vectors"); 00338 unsigned Cost = 0; 00339 // Shuffle cost is equal to the cost of extracting element from its argument 00340 // plus the cost of inserting them onto the result vector. 00341 00342 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index 00343 // 0 of first vector, index 1 of second vector,index 2 of first vector and 00344 // finally index 3 of second vector and insert them at index <0,1,2,3> of 00345 // result vector. 00346 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 00347 Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 00348 Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 00349 } 00350 return Cost; 00351 } 00352 00353 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 00354 Type *SubTp) const { 00355 if (Kind == SK_Alternate) { 00356 return getAltShuffleOverhead(Tp); 00357 } 00358 return 1; 00359 } 00360 00361 unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, 00362 Type *Src) const { 00363 const TargetLoweringBase *TLI = getTLI(); 00364 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00365 assert(ISD && "Invalid opcode"); 00366 00367 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); 00368 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); 00369 00370 // Check for NOOP conversions. 00371 if (SrcLT.first == DstLT.first && 00372 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 00373 00374 // Bitcast between types that are legalized to the same type are free. 00375 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) 00376 return 0; 00377 } 00378 00379 if (Opcode == Instruction::Trunc && 00380 TLI->isTruncateFree(SrcLT.second, DstLT.second)) 00381 return 0; 00382 00383 if (Opcode == Instruction::ZExt && 00384 TLI->isZExtFree(SrcLT.second, DstLT.second)) 00385 return 0; 00386 00387 // If the cast is marked as legal (or promote) then assume low cost. 00388 if (SrcLT.first == DstLT.first && 00389 TLI->isOperationLegalOrPromote(ISD, DstLT.second)) 00390 return 1; 00391 00392 // Handle scalar conversions. 00393 if (!Src->isVectorTy() && !Dst->isVectorTy()) { 00394 00395 // Scalar bitcasts are usually free. 00396 if (Opcode == Instruction::BitCast) 00397 return 0; 00398 00399 // Just check the op cost. If the operation is legal then assume it costs 1. 00400 if (!TLI->isOperationExpand(ISD, DstLT.second)) 00401 return 1; 00402 00403 // Assume that illegal scalar instruction are expensive. 00404 return 4; 00405 } 00406 00407 // Check vector-to-vector casts. 00408 if (Dst->isVectorTy() && Src->isVectorTy()) { 00409 00410 // If the cast is between same-sized registers, then the check is simple. 00411 if (SrcLT.first == DstLT.first && 00412 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 00413 00414 // Assume that Zext is done using AND. 00415 if (Opcode == Instruction::ZExt) 00416 return 1; 00417 00418 // Assume that sext is done using SHL and SRA. 00419 if (Opcode == Instruction::SExt) 00420 return 2; 00421 00422 // Just check the op cost. If the operation is legal then assume it costs 00423 // 1 and multiply by the type-legalization overhead. 00424 if (!TLI->isOperationExpand(ISD, DstLT.second)) 00425 return SrcLT.first * 1; 00426 } 00427 00428 // If we are converting vectors and the operation is illegal, or 00429 // if the vectors are legalized to different types, estimate the 00430 // scalarization costs. 00431 unsigned Num = Dst->getVectorNumElements(); 00432 unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), 00433 Src->getScalarType()); 00434 00435 // Return the cost of multiple scalar invocation plus the cost of 00436 // inserting and extracting the values. 00437 return getScalarizationOverhead(Dst, true, true) + Num * Cost; 00438 } 00439 00440 // We already handled vector-to-vector and scalar-to-scalar conversions. This 00441 // is where we handle bitcast between vectors and scalars. We need to assume 00442 // that the conversion is scalarized in one way or another. 00443 if (Opcode == Instruction::BitCast) 00444 // Illegal bitcasts are done by storing and loading from a stack slot. 00445 return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + 00446 (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); 00447 00448 llvm_unreachable("Unhandled cast"); 00449 } 00450 00451 unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { 00452 // Branches are assumed to be predicted. 00453 return 0; 00454 } 00455 00456 unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 00457 Type *CondTy) const { 00458 const TargetLoweringBase *TLI = getTLI(); 00459 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00460 assert(ISD && "Invalid opcode"); 00461 00462 // Selects on vectors are actually vector selects. 00463 if (ISD == ISD::SELECT) { 00464 assert(CondTy && "CondTy must exist"); 00465 if (CondTy->isVectorTy()) 00466 ISD = ISD::VSELECT; 00467 } 00468 00469 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); 00470 00471 if (!(ValTy->isVectorTy() && !LT.second.isVector()) && 00472 !TLI->isOperationExpand(ISD, LT.second)) { 00473 // The operation is legal. Assume it costs 1. Multiply 00474 // by the type-legalization overhead. 00475 return LT.first * 1; 00476 } 00477 00478 // Otherwise, assume that the cast is scalarized. 00479 if (ValTy->isVectorTy()) { 00480 unsigned Num = ValTy->getVectorNumElements(); 00481 if (CondTy) 00482 CondTy = CondTy->getScalarType(); 00483 unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), 00484 CondTy); 00485 00486 // Return the cost of multiple scalar invocation plus the cost of inserting 00487 // and extracting the values. 00488 return getScalarizationOverhead(ValTy, true, false) + Num * Cost; 00489 } 00490 00491 // Unknown scalar opcode. 00492 return 1; 00493 } 00494 00495 unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 00496 unsigned Index) const { 00497 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); 00498 00499 return LT.first; 00500 } 00501 00502 unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, 00503 unsigned Alignment, 00504 unsigned AddressSpace) const { 00505 assert(!Src->isVoidTy() && "Invalid type"); 00506 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); 00507 00508 // Assuming that all loads of legal types cost 1. 00509 unsigned Cost = LT.first; 00510 00511 if (Src->isVectorTy() && 00512 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { 00513 // This is a vector load that legalizes to a larger type than the vector 00514 // itself. Unless the corresponding extending load or truncating store is 00515 // legal, then this will scalarize. 00516 TargetLowering::LegalizeAction LA = TargetLowering::Expand; 00517 EVT MemVT = getTLI()->getValueType(Src, true); 00518 if (MemVT.isSimple() && MemVT != MVT::Other) { 00519 if (Opcode == Instruction::Store) 00520 LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); 00521 else 00522 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); 00523 } 00524 00525 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { 00526 // This is a vector load/store for some illegal type that is scalarized. 00527 // We must account for the cost of building or decomposing the vector. 00528 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, 00529 Opcode == Instruction::Store); 00530 } 00531 } 00532 00533 return Cost; 00534 } 00535 00536 unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, 00537 ArrayRef<Type *> Tys) const { 00538 unsigned ISD = 0; 00539 switch (IID) { 00540 default: { 00541 // Assume that we need to scalarize this intrinsic. 00542 unsigned ScalarizationCost = 0; 00543 unsigned ScalarCalls = 1; 00544 if (RetTy->isVectorTy()) { 00545 ScalarizationCost = getScalarizationOverhead(RetTy, true, false); 00546 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 00547 } 00548 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { 00549 if (Tys[i]->isVectorTy()) { 00550 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); 00551 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 00552 } 00553 } 00554 00555 return ScalarCalls + ScalarizationCost; 00556 } 00557 // Look for intrinsics that can be lowered directly or turned into a scalar 00558 // intrinsic call. 00559 case Intrinsic::sqrt: ISD = ISD::FSQRT; break; 00560 case Intrinsic::sin: ISD = ISD::FSIN; break; 00561 case Intrinsic::cos: ISD = ISD::FCOS; break; 00562 case Intrinsic::exp: ISD = ISD::FEXP; break; 00563 case Intrinsic::exp2: ISD = ISD::FEXP2; break; 00564 case Intrinsic::log: ISD = ISD::FLOG; break; 00565 case Intrinsic::log10: ISD = ISD::FLOG10; break; 00566 case Intrinsic::log2: ISD = ISD::FLOG2; break; 00567 case Intrinsic::fabs: ISD = ISD::FABS; break; 00568 case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; 00569 case Intrinsic::floor: ISD = ISD::FFLOOR; break; 00570 case Intrinsic::ceil: ISD = ISD::FCEIL; break; 00571 case Intrinsic::trunc: ISD = ISD::FTRUNC; break; 00572 case Intrinsic::nearbyint: 00573 ISD = ISD::FNEARBYINT; break; 00574 case Intrinsic::rint: ISD = ISD::FRINT; break; 00575 case Intrinsic::round: ISD = ISD::FROUND; break; 00576 case Intrinsic::pow: ISD = ISD::FPOW; break; 00577 case Intrinsic::fma: ISD = ISD::FMA; break; 00578 case Intrinsic::fmuladd: ISD = ISD::FMA; break; 00579 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. 00580 case Intrinsic::lifetime_start: 00581 case Intrinsic::lifetime_end: 00582 return 0; 00583 } 00584 00585 const TargetLoweringBase *TLI = getTLI(); 00586 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); 00587 00588 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 00589 // The operation is legal. Assume it costs 1. 00590 // If the type is split to multiple registers, assume that thre is some 00591 // overhead to this. 00592 // TODO: Once we have extract/insert subvector cost we need to use them. 00593 if (LT.first > 1) 00594 return LT.first * 2; 00595 return LT.first * 1; 00596 } 00597 00598 if (!TLI->isOperationExpand(ISD, LT.second)) { 00599 // If the operation is custom lowered then assume 00600 // thare the code is twice as expensive. 00601 return LT.first * 2; 00602 } 00603 00604 // If we can't lower fmuladd into an FMA estimate the cost as a floating 00605 // point mul followed by an add. 00606 if (IID == Intrinsic::fmuladd) 00607 return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + 00608 TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); 00609 00610 // Else, assume that we need to scalarize this intrinsic. For math builtins 00611 // this will emit a costly libcall, adding call overhead and spills. Make it 00612 // very expensive. 00613 if (RetTy->isVectorTy()) { 00614 unsigned Num = RetTy->getVectorNumElements(); 00615 unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), 00616 Tys); 00617 return 10 * Cost * Num; 00618 } 00619 00620 // This is going to be turned into a library call, make it expensive. 00621 return 10; 00622 } 00623 00624 unsigned BasicTTI::getNumberOfParts(Type *Tp) const { 00625 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); 00626 return LT.first; 00627 } 00628 00629 unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 00630 return 0; 00631 } 00632 00633 unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, 00634 bool IsPairwise) const { 00635 assert(Ty->isVectorTy() && "Expect a vector type"); 00636 unsigned NumVecElts = Ty->getVectorNumElements(); 00637 unsigned NumReduxLevels = Log2_32(NumVecElts); 00638 unsigned ArithCost = NumReduxLevels * 00639 TopTTI->getArithmeticInstrCost(Opcode, Ty); 00640 // Assume the pairwise shuffles add a cost. 00641 unsigned ShuffleCost = 00642 NumReduxLevels * (IsPairwise + 1) * 00643 TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); 00644 return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); 00645 }