LLVM API Documentation

ARMTargetTransformInfo.cpp
Go to the documentation of this file.
00001 //===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 /// \file
00010 /// This file implements a TargetTransformInfo analysis pass specific to the
00011 /// ARM target machine. It uses the target's detailed information to provide
00012 /// more precise answers to certain TTI queries, while letting the target
00013 /// independent and default TTI implementations handle the rest.
00014 ///
00015 //===----------------------------------------------------------------------===//
00016 
00017 #include "ARM.h"
00018 #include "ARMTargetMachine.h"
00019 #include "llvm/Analysis/TargetTransformInfo.h"
00020 #include "llvm/Support/Debug.h"
00021 #include "llvm/Target/CostTable.h"
00022 #include "llvm/Target/TargetLowering.h"
00023 using namespace llvm;
00024 
00025 #define DEBUG_TYPE "armtti"
00026 
00027 // Declare the pass initialization routine locally as target-specific passes
00028 // don't have a target-wide initialization entry point, and so we rely on the
00029 // pass constructor initialization.
00030 namespace llvm {
00031 void initializeARMTTIPass(PassRegistry &);
00032 }
00033 
00034 namespace {
00035 
00036 class ARMTTI final : public ImmutablePass, public TargetTransformInfo {
00037   const ARMBaseTargetMachine *TM;
00038   const ARMSubtarget *ST;
00039   const ARMTargetLowering *TLI;
00040 
00041   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
00042   /// are set if the result needs to be inserted and/or extracted from vectors.
00043   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
00044 
00045 public:
00046   ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
00047     llvm_unreachable("This pass cannot be directly constructed");
00048   }
00049 
00050   ARMTTI(const ARMBaseTargetMachine *TM)
00051       : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
00052         TLI(TM->getSubtargetImpl()->getTargetLowering()) {
00053     initializeARMTTIPass(*PassRegistry::getPassRegistry());
00054   }
00055 
00056   void initializePass() override {
00057     pushTTIStack(this);
00058   }
00059 
00060   void getAnalysisUsage(AnalysisUsage &AU) const override {
00061     TargetTransformInfo::getAnalysisUsage(AU);
00062   }
00063 
00064   /// Pass identification.
00065   static char ID;
00066 
00067   /// Provide necessary pointer adjustments for the two base classes.
00068   void *getAdjustedAnalysisPointer(const void *ID) override {
00069     if (ID == &TargetTransformInfo::ID)
00070       return (TargetTransformInfo*)this;
00071     return this;
00072   }
00073 
00074   /// \name Scalar TTI Implementations
00075   /// @{
00076   using TargetTransformInfo::getIntImmCost;
00077   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
00078 
00079   /// @}
00080 
00081 
00082   /// \name Vector TTI Implementations
00083   /// @{
00084 
00085   unsigned getNumberOfRegisters(bool Vector) const override {
00086     if (Vector) {
00087       if (ST->hasNEON())
00088         return 16;
00089       return 0;
00090     }
00091 
00092     if (ST->isThumb1Only())
00093       return 8;
00094     return 13;
00095   }
00096 
00097   unsigned getRegisterBitWidth(bool Vector) const override {
00098     if (Vector) {
00099       if (ST->hasNEON())
00100         return 128;
00101       return 0;
00102     }
00103 
00104     return 32;
00105   }
00106 
00107   unsigned getMaxInterleaveFactor() const override {
00108     // These are out of order CPUs:
00109     if (ST->isCortexA15() || ST->isSwift())
00110       return 2;
00111     return 1;
00112   }
00113 
00114   unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
00115                           int Index, Type *SubTp) const override;
00116 
00117   unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
00118                             Type *Src) const override;
00119 
00120   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
00121                               Type *CondTy) const override;
00122 
00123   unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
00124                               unsigned Index) const override;
00125 
00126   unsigned getAddressComputationCost(Type *Val,
00127                                      bool IsComplex) const override;
00128 
00129   unsigned getArithmeticInstrCost(
00130       unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue,
00131       OperandValueKind Op2Info = OK_AnyValue,
00132       OperandValueProperties Opd1PropInfo = OP_None,
00133       OperandValueProperties Opd2PropInfo = OP_None) const override;
00134 
00135   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
00136                            unsigned AddressSpace) const override;
00137   /// @}
00138 };
00139 
00140 } // end anonymous namespace
00141 
00142 INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti",
00143                    "ARM Target Transform Info", true, true, false)
00144 char ARMTTI::ID = 0;
00145 
00146 ImmutablePass *
00147 llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) {
00148   return new ARMTTI(TM);
00149 }
00150 
00151 
00152 unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
00153   assert(Ty->isIntegerTy());
00154 
00155   unsigned Bits = Ty->getPrimitiveSizeInBits();
00156   if (Bits == 0 || Bits > 32)
00157     return 4;
00158 
00159   int32_t SImmVal = Imm.getSExtValue();
00160   uint32_t ZImmVal = Imm.getZExtValue();
00161   if (!ST->isThumb()) {
00162     if ((SImmVal >= 0 && SImmVal < 65536) ||
00163         (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
00164         (ARM_AM::getSOImmVal(~ZImmVal) != -1))
00165       return 1;
00166     return ST->hasV6T2Ops() ? 2 : 3;
00167   }
00168   if (ST->isThumb2()) {
00169     if ((SImmVal >= 0 && SImmVal < 65536) ||
00170         (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
00171         (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
00172       return 1;
00173     return ST->hasV6T2Ops() ? 2 : 3;
00174   }
00175   // Thumb1.
00176   if (SImmVal >= 0 && SImmVal < 256)
00177     return 1;
00178   if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
00179     return 2;
00180   // Load from constantpool.
00181   return 3;
00182 }
00183 
00184 unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
00185                                   Type *Src) const {
00186   int ISD = TLI->InstructionOpcodeToISD(Opcode);
00187   assert(ISD && "Invalid opcode");
00188 
00189   // Single to/from double precision conversions.
00190   static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
00191     // Vector fptrunc/fpext conversions.
00192     { ISD::FP_ROUND,   MVT::v2f64, 2 },
00193     { ISD::FP_EXTEND,  MVT::v2f32, 2 },
00194     { ISD::FP_EXTEND,  MVT::v4f32, 4 }
00195   };
00196 
00197   if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
00198                                           ISD == ISD::FP_EXTEND)) {
00199     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
00200     int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
00201     if (Idx != -1)
00202       return LT.first * NEONFltDblTbl[Idx].Cost;
00203   }
00204 
00205   EVT SrcTy = TLI->getValueType(Src);
00206   EVT DstTy = TLI->getValueType(Dst);
00207 
00208   if (!SrcTy.isSimple() || !DstTy.isSimple())
00209     return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
00210 
00211   // Some arithmetic, load and store operations have specific instructions
00212   // to cast up/down their types automatically at no extra cost.
00213   // TODO: Get these tables to know at least what the related operations are.
00214   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
00215   NEONVectorConversionTbl[] = {
00216     { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
00217     { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
00218     { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
00219     { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
00220     { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 0 },
00221     { ISD::TRUNCATE,    MVT::v4i16, MVT::v4i32, 1 },
00222 
00223     // The number of vmovl instructions for the extension.
00224     { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
00225     { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
00226     { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
00227     { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
00228     { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
00229     { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
00230     { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
00231     { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
00232     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
00233     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
00234 
00235     // Operations that we legalize using splitting.
00236     { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 6 },
00237     { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 3 },
00238 
00239     // Vector float <-> i32 conversions.
00240     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
00241     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
00242 
00243     { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
00244     { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
00245     { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
00246     { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
00247     { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
00248     { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
00249     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
00250     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
00251     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
00252     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
00253     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
00254     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
00255     { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
00256     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
00257     { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
00258     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
00259     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
00260     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
00261     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
00262     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
00263 
00264     { ISD::FP_TO_SINT,  MVT::v4i32, MVT::v4f32, 1 },
00265     { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f32, 1 },
00266     { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32, 3 },
00267     { ISD::FP_TO_UINT,  MVT::v4i8, MVT::v4f32, 3 },
00268     { ISD::FP_TO_SINT,  MVT::v4i16, MVT::v4f32, 2 },
00269     { ISD::FP_TO_UINT,  MVT::v4i16, MVT::v4f32, 2 },
00270 
00271     // Vector double <-> i32 conversions.
00272     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
00273     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
00274 
00275     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
00276     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
00277     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
00278     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
00279     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
00280     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
00281 
00282     { ISD::FP_TO_SINT,  MVT::v2i32, MVT::v2f64, 2 },
00283     { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 },
00284     { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 4 },
00285     { ISD::FP_TO_UINT,  MVT::v8i16, MVT::v8f32, 4 },
00286     { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f32, 8 },
00287     { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 8 }
00288   };
00289 
00290   if (SrcTy.isVector() && ST->hasNEON()) {
00291     int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
00292                                      DstTy.getSimpleVT(), SrcTy.getSimpleVT());
00293     if (Idx != -1)
00294       return NEONVectorConversionTbl[Idx].Cost;
00295   }
00296 
00297   // Scalar float to integer conversions.
00298   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
00299   NEONFloatConversionTbl[] = {
00300     { ISD::FP_TO_SINT,  MVT::i1, MVT::f32, 2 },
00301     { ISD::FP_TO_UINT,  MVT::i1, MVT::f32, 2 },
00302     { ISD::FP_TO_SINT,  MVT::i1, MVT::f64, 2 },
00303     { ISD::FP_TO_UINT,  MVT::i1, MVT::f64, 2 },
00304     { ISD::FP_TO_SINT,  MVT::i8, MVT::f32, 2 },
00305     { ISD::FP_TO_UINT,  MVT::i8, MVT::f32, 2 },
00306     { ISD::FP_TO_SINT,  MVT::i8, MVT::f64, 2 },
00307     { ISD::FP_TO_UINT,  MVT::i8, MVT::f64, 2 },
00308     { ISD::FP_TO_SINT,  MVT::i16, MVT::f32, 2 },
00309     { ISD::FP_TO_UINT,  MVT::i16, MVT::f32, 2 },
00310     { ISD::FP_TO_SINT,  MVT::i16, MVT::f64, 2 },
00311     { ISD::FP_TO_UINT,  MVT::i16, MVT::f64, 2 },
00312     { ISD::FP_TO_SINT,  MVT::i32, MVT::f32, 2 },
00313     { ISD::FP_TO_UINT,  MVT::i32, MVT::f32, 2 },
00314     { ISD::FP_TO_SINT,  MVT::i32, MVT::f64, 2 },
00315     { ISD::FP_TO_UINT,  MVT::i32, MVT::f64, 2 },
00316     { ISD::FP_TO_SINT,  MVT::i64, MVT::f32, 10 },
00317     { ISD::FP_TO_UINT,  MVT::i64, MVT::f32, 10 },
00318     { ISD::FP_TO_SINT,  MVT::i64, MVT::f64, 10 },
00319     { ISD::FP_TO_UINT,  MVT::i64, MVT::f64, 10 }
00320   };
00321   if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
00322     int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
00323                                      DstTy.getSimpleVT(), SrcTy.getSimpleVT());
00324     if (Idx != -1)
00325         return NEONFloatConversionTbl[Idx].Cost;
00326   }
00327 
00328   // Scalar integer to float conversions.
00329   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
00330   NEONIntegerConversionTbl[] = {
00331     { ISD::SINT_TO_FP,  MVT::f32, MVT::i1, 2 },
00332     { ISD::UINT_TO_FP,  MVT::f32, MVT::i1, 2 },
00333     { ISD::SINT_TO_FP,  MVT::f64, MVT::i1, 2 },
00334     { ISD::UINT_TO_FP,  MVT::f64, MVT::i1, 2 },
00335     { ISD::SINT_TO_FP,  MVT::f32, MVT::i8, 2 },
00336     { ISD::UINT_TO_FP,  MVT::f32, MVT::i8, 2 },
00337     { ISD::SINT_TO_FP,  MVT::f64, MVT::i8, 2 },
00338     { ISD::UINT_TO_FP,  MVT::f64, MVT::i8, 2 },
00339     { ISD::SINT_TO_FP,  MVT::f32, MVT::i16, 2 },
00340     { ISD::UINT_TO_FP,  MVT::f32, MVT::i16, 2 },
00341     { ISD::SINT_TO_FP,  MVT::f64, MVT::i16, 2 },
00342     { ISD::UINT_TO_FP,  MVT::f64, MVT::i16, 2 },
00343     { ISD::SINT_TO_FP,  MVT::f32, MVT::i32, 2 },
00344     { ISD::UINT_TO_FP,  MVT::f32, MVT::i32, 2 },
00345     { ISD::SINT_TO_FP,  MVT::f64, MVT::i32, 2 },
00346     { ISD::UINT_TO_FP,  MVT::f64, MVT::i32, 2 },
00347     { ISD::SINT_TO_FP,  MVT::f32, MVT::i64, 10 },
00348     { ISD::UINT_TO_FP,  MVT::f32, MVT::i64, 10 },
00349     { ISD::SINT_TO_FP,  MVT::f64, MVT::i64, 10 },
00350     { ISD::UINT_TO_FP,  MVT::f64, MVT::i64, 10 }
00351   };
00352 
00353   if (SrcTy.isInteger() && ST->hasNEON()) {
00354     int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
00355                                      DstTy.getSimpleVT(), SrcTy.getSimpleVT());
00356     if (Idx != -1)
00357       return NEONIntegerConversionTbl[Idx].Cost;
00358   }
00359 
00360   // Scalar integer conversion costs.
00361   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
00362   ARMIntegerConversionTbl[] = {
00363     // i16 -> i64 requires two dependent operations.
00364     { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
00365 
00366     // Truncates on i64 are assumed to be free.
00367     { ISD::TRUNCATE,    MVT::i32, MVT::i64, 0 },
00368     { ISD::TRUNCATE,    MVT::i16, MVT::i64, 0 },
00369     { ISD::TRUNCATE,    MVT::i8,  MVT::i64, 0 },
00370     { ISD::TRUNCATE,    MVT::i1,  MVT::i64, 0 }
00371   };
00372 
00373   if (SrcTy.isInteger()) {
00374     int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
00375                                      DstTy.getSimpleVT(), SrcTy.getSimpleVT());
00376     if (Idx != -1)
00377       return ARMIntegerConversionTbl[Idx].Cost;
00378   }
00379 
00380   return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
00381 }
00382 
00383 unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy,
00384                                     unsigned Index) const {
00385   // Penalize inserting into an D-subregister. We end up with a three times
00386   // lower estimated throughput on swift.
00387   if (ST->isSwift() &&
00388       Opcode == Instruction::InsertElement &&
00389       ValTy->isVectorTy() &&
00390       ValTy->getScalarSizeInBits() <= 32)
00391     return 3;
00392 
00393   // Cross-class copies are expensive on many microarchitectures,
00394   // so assume they are expensive by default.
00395   if ((Opcode == Instruction::InsertElement ||
00396        Opcode == Instruction::ExtractElement) &&
00397       ValTy->getVectorElementType()->isIntegerTy())
00398     return 3;
00399 
00400   return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index);
00401 }
00402 
00403 unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
00404                                     Type *CondTy) const {
00405 
00406   int ISD = TLI->InstructionOpcodeToISD(Opcode);
00407   // On NEON a a vector select gets lowered to vbsl.
00408   if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
00409     // Lowering of some vector selects is currently far from perfect.
00410     static const TypeConversionCostTblEntry<MVT::SimpleValueType>
00411     NEONVectorSelectTbl[] = {
00412       { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
00413       { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
00414       { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
00415       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
00416       { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
00417       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
00418     };
00419 
00420     EVT SelCondTy = TLI->getValueType(CondTy);
00421     EVT SelValTy = TLI->getValueType(ValTy);
00422     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
00423       int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
00424                                        SelCondTy.getSimpleVT(),
00425                                        SelValTy.getSimpleVT());
00426       if (Idx != -1)
00427         return NEONVectorSelectTbl[Idx].Cost;
00428     }
00429 
00430     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
00431     return LT.first;
00432   }
00433 
00434   return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
00435 }
00436 
00437 unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
00438   // Address computations in vectorized code with non-consecutive addresses will
00439   // likely result in more instructions compared to scalar code where the
00440   // computation can more often be merged into the index mode. The resulting
00441   // extra micro-ops can significantly decrease throughput.
00442   unsigned NumVectorInstToHideOverhead = 10;
00443 
00444   if (Ty->isVectorTy() && IsComplex)
00445     return NumVectorInstToHideOverhead;
00446 
00447   // In many cases the address computation is not merged into the instruction
00448   // addressing mode.
00449   return 1;
00450 }
00451 
00452 unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
00453                                 Type *SubTp) const {
00454   // We only handle costs of reverse and alternate shuffles for now.
00455   if (Kind != SK_Reverse && Kind != SK_Alternate)
00456     return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
00457 
00458   if (Kind == SK_Reverse) {
00459     static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
00460         // Reverse shuffle cost one instruction if we are shuffling within a
00461         // double word (vrev) or two if we shuffle a quad word (vrev, vext).
00462         {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
00463         {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
00464         {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
00465         {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
00466 
00467         {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
00468         {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
00469         {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
00470         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
00471 
00472     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
00473 
00474     int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
00475     if (Idx == -1)
00476       return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
00477 
00478     return LT.first * NEONShuffleTbl[Idx].Cost;
00479   }
00480   if (Kind == SK_Alternate) {
00481     static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
00482         // Alt shuffle cost table for ARM. Cost is the number of instructions
00483         // required to create the shuffled vector.
00484 
00485         {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
00486         {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
00487         {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
00488         {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
00489 
00490         {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
00491         {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
00492         {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
00493 
00494         {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
00495 
00496         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
00497 
00498     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
00499     int Idx =
00500         CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
00501     if (Idx == -1)
00502       return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
00503     return LT.first * NEONAltShuffleTbl[Idx].Cost;
00504   }
00505   return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
00506 }
00507 
00508 unsigned ARMTTI::getArithmeticInstrCost(
00509     unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
00510     OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
00511     OperandValueProperties Opd2PropInfo) const {
00512 
00513   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
00514   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
00515 
00516   const unsigned FunctionCallDivCost = 20;
00517   const unsigned ReciprocalDivCost = 10;
00518   static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
00519     // Division.
00520     // These costs are somewhat random. Choose a cost of 20 to indicate that
00521     // vectorizing devision (added function call) is going to be very expensive.
00522     // Double registers types.
00523     { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
00524     { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
00525     { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
00526     { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
00527     { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
00528     { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
00529     { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
00530     { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
00531     { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
00532     { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
00533     { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
00534     { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
00535     { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
00536     { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
00537     { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
00538     { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
00539     // Quad register types.
00540     { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
00541     { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
00542     { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
00543     { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
00544     { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
00545     { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
00546     { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
00547     { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
00548     { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
00549     { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
00550     { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
00551     { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
00552     { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
00553     { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
00554     { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
00555     { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
00556     // Multiplication.
00557   };
00558 
00559   int Idx = -1;
00560 
00561   if (ST->hasNEON())
00562     Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
00563 
00564   if (Idx != -1)
00565     return LT.first * CostTbl[Idx].Cost;
00566 
00567   unsigned Cost = TargetTransformInfo::getArithmeticInstrCost(
00568       Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
00569 
00570   // This is somewhat of a hack. The problem that we are facing is that SROA
00571   // creates a sequence of shift, and, or instructions to construct values.
00572   // These sequences are recognized by the ISel and have zero-cost. Not so for
00573   // the vectorized code. Because we have support for v2i64 but not i64 those
00574   // sequences look particularly beneficial to vectorize.
00575   // To work around this we increase the cost of v2i64 operations to make them
00576   // seem less beneficial.
00577   if (LT.second == MVT::v2i64 &&
00578       Op2Info == TargetTransformInfo::OK_UniformConstantValue)
00579     Cost += 4;
00580 
00581   return Cost;
00582 }
00583 
00584 unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
00585                                  unsigned AddressSpace) const {
00586   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
00587 
00588   if (Src->isVectorTy() && Alignment != 16 &&
00589       Src->getVectorElementType()->isDoubleTy()) {
00590     // Unaligned loads/stores are extremely inefficient.
00591     // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
00592     return LT.first * 4;
00593   }
00594   return LT.first;
00595 }