LLVM API Documentation

PPCTargetTransformInfo.cpp
Go to the documentation of this file.
00001 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 /// \file
00010 /// This file implements a TargetTransformInfo analysis pass specific to the
00011 /// PPC target machine. It uses the target's detailed information to provide
00012 /// more precise answers to certain TTI queries, while letting the target
00013 /// independent and default TTI implementations handle the rest.
00014 ///
00015 //===----------------------------------------------------------------------===//
00016 
00017 #include "PPC.h"
00018 #include "PPCTargetMachine.h"
00019 #include "llvm/Analysis/TargetTransformInfo.h"
00020 #include "llvm/Support/CommandLine.h"
00021 #include "llvm/Support/Debug.h"
00022 #include "llvm/Target/CostTable.h"
00023 #include "llvm/Target/TargetLowering.h"
00024 using namespace llvm;
00025 
00026 #define DEBUG_TYPE "ppctti"
00027 
00028 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
00029 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
00030 
00031 // Declare the pass initialization routine locally as target-specific passes
00032 // don't have a target-wide initialization entry point, and so we rely on the
00033 // pass constructor initialization.
00034 namespace llvm {
00035 void initializePPCTTIPass(PassRegistry &);
00036 }
00037 
00038 namespace {
00039 
00040 class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
00041   const TargetMachine *TM;
00042   const PPCSubtarget *ST;
00043   const PPCTargetLowering *TLI;
00044 
00045 public:
00046   PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
00047     llvm_unreachable("This pass cannot be directly constructed");
00048   }
00049 
00050   PPCTTI(const PPCTargetMachine *TM)
00051       : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
00052         TLI(TM->getSubtargetImpl()->getTargetLowering()) {
00053     initializePPCTTIPass(*PassRegistry::getPassRegistry());
00054   }
00055 
00056   void initializePass() override {
00057     pushTTIStack(this);
00058   }
00059 
00060   void getAnalysisUsage(AnalysisUsage &AU) const override {
00061     TargetTransformInfo::getAnalysisUsage(AU);
00062   }
00063 
00064   /// Pass identification.
00065   static char ID;
00066 
00067   /// Provide necessary pointer adjustments for the two base classes.
00068   void *getAdjustedAnalysisPointer(const void *ID) override {
00069     if (ID == &TargetTransformInfo::ID)
00070       return (TargetTransformInfo*)this;
00071     return this;
00072   }
00073 
00074   /// \name Scalar TTI Implementations
00075   /// @{
00076   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
00077 
00078   unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
00079                          Type *Ty) const override;
00080   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
00081                          Type *Ty) const override;
00082 
00083   PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
00084   void getUnrollingPreferences(const Function *F, Loop *L,
00085                                UnrollingPreferences &UP) const override;
00086 
00087   /// @}
00088 
00089   /// \name Vector TTI Implementations
00090   /// @{
00091 
00092   unsigned getNumberOfRegisters(bool Vector) const override;
00093   unsigned getRegisterBitWidth(bool Vector) const override;
00094   unsigned getMaxInterleaveFactor() const override;
00095   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
00096                                   OperandValueKind, OperandValueProperties,
00097                                   OperandValueProperties) const override;
00098   unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
00099                           int Index, Type *SubTp) const override;
00100   unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
00101                             Type *Src) const override;
00102   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
00103                               Type *CondTy) const override;
00104   unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
00105                               unsigned Index) const override;
00106   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
00107                            unsigned AddressSpace) const override;
00108 
00109   /// @}
00110 };
00111 
00112 } // end anonymous namespace
00113 
00114 INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
00115                    "PPC Target Transform Info", true, true, false)
00116 char PPCTTI::ID = 0;
00117 
00118 ImmutablePass *
00119 llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
00120   return new PPCTTI(TM);
00121 }
00122 
00123 
00124 //===----------------------------------------------------------------------===//
00125 //
00126 // PPC cost model.
00127 //
00128 //===----------------------------------------------------------------------===//
00129 
00130 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
00131   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
00132   if (ST->hasPOPCNTD() && TyWidth <= 64)
00133     return PSK_FastHardware;
00134   return PSK_Software;
00135 }
00136 
00137 unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
00138   if (DisablePPCConstHoist)
00139     return TargetTransformInfo::getIntImmCost(Imm, Ty);
00140 
00141   assert(Ty->isIntegerTy());
00142 
00143   unsigned BitSize = Ty->getPrimitiveSizeInBits();
00144   if (BitSize == 0)
00145     return ~0U;
00146 
00147   if (Imm == 0)
00148     return TCC_Free;
00149 
00150   if (Imm.getBitWidth() <= 64) {
00151     if (isInt<16>(Imm.getSExtValue()))
00152       return TCC_Basic;
00153 
00154     if (isInt<32>(Imm.getSExtValue())) {
00155       // A constant that can be materialized using lis.
00156       if ((Imm.getZExtValue() & 0xFFFF) == 0)
00157         return TCC_Basic;
00158 
00159       return 2 * TCC_Basic;
00160     }
00161   }
00162 
00163   return 4 * TCC_Basic;
00164 }
00165 
00166 unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
00167                                const APInt &Imm, Type *Ty) const {
00168   if (DisablePPCConstHoist)
00169     return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
00170 
00171   assert(Ty->isIntegerTy());
00172 
00173   unsigned BitSize = Ty->getPrimitiveSizeInBits();
00174   if (BitSize == 0)
00175     return ~0U;
00176 
00177   switch (IID) {
00178   default: return TCC_Free;
00179   case Intrinsic::sadd_with_overflow:
00180   case Intrinsic::uadd_with_overflow:
00181   case Intrinsic::ssub_with_overflow:
00182   case Intrinsic::usub_with_overflow:
00183     if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
00184       return TCC_Free;
00185     break;
00186   }
00187   return PPCTTI::getIntImmCost(Imm, Ty);
00188 }
00189 
00190 unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
00191                                Type *Ty) const {
00192   if (DisablePPCConstHoist)
00193     return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
00194 
00195   assert(Ty->isIntegerTy());
00196 
00197   unsigned BitSize = Ty->getPrimitiveSizeInBits();
00198   if (BitSize == 0)
00199     return ~0U;
00200 
00201   unsigned ImmIdx = ~0U;
00202   bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
00203        ZeroFree = false;
00204   switch (Opcode) {
00205   default: return TCC_Free;
00206   case Instruction::GetElementPtr:
00207     // Always hoist the base address of a GetElementPtr. This prevents the
00208     // creation of new constants for every base constant that gets constant
00209     // folded with the offset.
00210     if (Idx == 0)
00211       return 2 * TCC_Basic;
00212     return TCC_Free;
00213   case Instruction::And:
00214     RunFree = true; // (for the rotate-and-mask instructions)
00215     // Fallthrough...
00216   case Instruction::Add:
00217   case Instruction::Or:
00218   case Instruction::Xor:
00219     ShiftedFree = true;
00220     // Fallthrough...
00221   case Instruction::Sub:
00222   case Instruction::Mul:
00223   case Instruction::Shl:
00224   case Instruction::LShr:
00225   case Instruction::AShr:
00226     ImmIdx = 1;
00227     break;
00228   case Instruction::ICmp:
00229     UnsignedFree = true;
00230     ImmIdx = 1;
00231     // Fallthrough... (zero comparisons can use record-form instructions)
00232   case Instruction::Select:
00233     ZeroFree = true;
00234     break;
00235   case Instruction::PHI:
00236   case Instruction::Call:
00237   case Instruction::Ret:
00238   case Instruction::Load:
00239   case Instruction::Store:
00240     break;
00241   }
00242 
00243   if (ZeroFree && Imm == 0)
00244     return TCC_Free;
00245 
00246   if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
00247     if (isInt<16>(Imm.getSExtValue()))
00248       return TCC_Free;
00249 
00250     if (RunFree) {
00251       if (Imm.getBitWidth() <= 32 &&
00252           (isShiftedMask_32(Imm.getZExtValue()) ||
00253            isShiftedMask_32(~Imm.getZExtValue())))
00254         return TCC_Free;
00255 
00256 
00257       if (ST->isPPC64() &&
00258           (isShiftedMask_64(Imm.getZExtValue()) ||
00259            isShiftedMask_64(~Imm.getZExtValue())))
00260         return TCC_Free;
00261     }
00262 
00263     if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
00264       return TCC_Free;
00265 
00266     if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
00267       return TCC_Free;
00268   }
00269 
00270   return PPCTTI::getIntImmCost(Imm, Ty);
00271 }
00272 
00273 void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L,
00274                                      UnrollingPreferences &UP) const {
00275   if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) {
00276     // The A2 is in-order with a deep pipeline, and concatenation unrolling
00277     // helps expose latency-hiding opportunities to the instruction scheduler.
00278     UP.Partial = UP.Runtime = true;
00279   }
00280 }
00281 
00282 unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
00283   if (Vector && !ST->hasAltivec())
00284     return 0;
00285   return ST->hasVSX() ? 64 : 32;
00286 }
00287 
00288 unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
00289   if (Vector) {
00290     if (ST->hasAltivec()) return 128;
00291     return 0;
00292   }
00293 
00294   if (ST->isPPC64())
00295     return 64;
00296   return 32;
00297 
00298 }
00299 
00300 unsigned PPCTTI::getMaxInterleaveFactor() const {
00301   unsigned Directive = ST->getDarwinDirective();
00302   // The 440 has no SIMD support, but floating-point instructions
00303   // have a 5-cycle latency, so unroll by 5x for latency hiding.
00304   if (Directive == PPC::DIR_440)
00305     return 5;
00306 
00307   // The A2 has no SIMD support, but floating-point instructions
00308   // have a 6-cycle latency, so unroll by 6x for latency hiding.
00309   if (Directive == PPC::DIR_A2)
00310     return 6;
00311 
00312   // FIXME: For lack of any better information, do no harm...
00313   if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
00314     return 1;
00315 
00316   // For most things, modern systems have two execution units (and
00317   // out-of-order execution).
00318   return 2;
00319 }
00320 
00321 unsigned PPCTTI::getArithmeticInstrCost(
00322     unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
00323     OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
00324     OperandValueProperties Opd2PropInfo) const {
00325   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
00326 
00327   // Fallback to the default implementation.
00328   return TargetTransformInfo::getArithmeticInstrCost(
00329       Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
00330 }
00331 
00332 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
00333                                 Type *SubTp) const {
00334   return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
00335 }
00336 
00337 unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
00338   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
00339 
00340   return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
00341 }
00342 
00343 unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
00344                                     Type *CondTy) const {
00345   return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
00346 }
00347 
00348 unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
00349                                     unsigned Index) const {
00350   assert(Val->isVectorTy() && "This must be a vector type");
00351 
00352   int ISD = TLI->InstructionOpcodeToISD(Opcode);
00353   assert(ISD && "Invalid opcode");
00354 
00355   if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
00356     // Double-precision scalars are already located in index #0.
00357     if (Index == 0)
00358       return 0;
00359 
00360     return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
00361   }
00362 
00363   // Estimated cost of a load-hit-store delay.  This was obtained
00364   // experimentally as a minimum needed to prevent unprofitable
00365   // vectorization for the paq8p benchmark.  It may need to be
00366   // raised further if other unprofitable cases remain.
00367   unsigned LHSPenalty = 2;
00368   if (ISD == ISD::INSERT_VECTOR_ELT)
00369     LHSPenalty += 7;
00370 
00371   // Vector element insert/extract with Altivec is very expensive,
00372   // because they require store and reload with the attendant
00373   // processor stall for load-hit-store.  Until VSX is available,
00374   // these need to be estimated as very costly.
00375   if (ISD == ISD::EXTRACT_VECTOR_ELT ||
00376       ISD == ISD::INSERT_VECTOR_ELT)
00377     return LHSPenalty +
00378       TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
00379 
00380   return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
00381 }
00382 
00383 unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
00384                                  unsigned AddressSpace) const {
00385   // Legalize the type.
00386   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
00387   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
00388          "Invalid Opcode");
00389 
00390   unsigned Cost =
00391     TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
00392 
00393   // VSX loads/stores support unaligned access.
00394   if (ST->hasVSX()) {
00395     if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
00396       return Cost;
00397   }
00398 
00399   bool UnalignedAltivec =
00400     Src->isVectorTy() &&
00401     Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
00402     LT.second.getSizeInBits() == 128 &&
00403     Opcode == Instruction::Load;
00404 
00405   // PPC in general does not support unaligned loads and stores. They'll need
00406   // to be decomposed based on the alignment factor.
00407   unsigned SrcBytes = LT.second.getStoreSize();
00408   if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
00409     Cost += LT.first*(SrcBytes/Alignment-1);
00410 
00411     // For a vector type, there is also scalarization overhead (only for
00412     // stores, loads are expanded using the vector-load + permutation sequence,
00413     // which is much less expensive).
00414     if (Src->isVectorTy() && Opcode == Instruction::Store)
00415       for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
00416         Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
00417   }
00418 
00419   return Cost;
00420 }
00421