LLVM API Documentation
00001 //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// \file 00010 /// This file implements a TargetTransformInfo analysis pass specific to the 00011 /// AArch64 target machine. It uses the target's detailed information to provide 00012 /// more precise answers to certain TTI queries, while letting the target 00013 /// independent and default TTI implementations handle the rest. 00014 /// 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "AArch64.h" 00018 #include "AArch64TargetMachine.h" 00019 #include "MCTargetDesc/AArch64AddressingModes.h" 00020 #include "llvm/Analysis/TargetTransformInfo.h" 00021 #include "llvm/Support/Debug.h" 00022 #include "llvm/Target/CostTable.h" 00023 #include "llvm/Target/TargetLowering.h" 00024 #include <algorithm> 00025 using namespace llvm; 00026 00027 #define DEBUG_TYPE "aarch64tti" 00028 00029 // Declare the pass initialization routine locally as target-specific passes 00030 // don't have a target-wide initialization entry point, and so we rely on the 00031 // pass constructor initialization. 00032 namespace llvm { 00033 void initializeAArch64TTIPass(PassRegistry &); 00034 } 00035 00036 namespace { 00037 00038 class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { 00039 const AArch64TargetMachine *TM; 00040 const AArch64Subtarget *ST; 00041 const AArch64TargetLowering *TLI; 00042 00043 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 00044 /// are set if the result needs to be inserted and/or extracted from vectors. 00045 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 00046 00047 public: 00048 AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { 00049 llvm_unreachable("This pass cannot be directly constructed"); 00050 } 00051 00052 AArch64TTI(const AArch64TargetMachine *TM) 00053 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 00054 TLI(TM->getSubtargetImpl()->getTargetLowering()) { 00055 initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); 00056 } 00057 00058 void initializePass() override { pushTTIStack(this); } 00059 00060 void getAnalysisUsage(AnalysisUsage &AU) const override { 00061 TargetTransformInfo::getAnalysisUsage(AU); 00062 } 00063 00064 /// Pass identification. 00065 static char ID; 00066 00067 /// Provide necessary pointer adjustments for the two base classes. 00068 void *getAdjustedAnalysisPointer(const void *ID) override { 00069 if (ID == &TargetTransformInfo::ID) 00070 return (TargetTransformInfo *)this; 00071 return this; 00072 } 00073 00074 /// \name Scalar TTI Implementations 00075 /// @{ 00076 unsigned getIntImmCost(int64_t Val) const; 00077 unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; 00078 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 00079 Type *Ty) const override; 00080 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 00081 Type *Ty) const override; 00082 PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; 00083 00084 /// @} 00085 00086 /// \name Vector TTI Implementations 00087 /// @{ 00088 00089 unsigned getNumberOfRegisters(bool Vector) const override { 00090 if (Vector) { 00091 if (ST->hasNEON()) 00092 return 32; 00093 return 0; 00094 } 00095 return 31; 00096 } 00097 00098 unsigned getRegisterBitWidth(bool Vector) const override { 00099 if (Vector) { 00100 if (ST->hasNEON()) 00101 return 128; 00102 return 0; 00103 } 00104 return 64; 00105 } 00106 00107 unsigned getMaxInterleaveFactor() const override; 00108 00109 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const 00110 override; 00111 00112 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const 00113 override; 00114 00115 unsigned getArithmeticInstrCost( 00116 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, 00117 OperandValueKind Opd2Info = OK_AnyValue, 00118 OperandValueProperties Opd1PropInfo = OP_None, 00119 OperandValueProperties Opd2PropInfo = OP_None) const override; 00120 00121 unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; 00122 00123 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const 00124 override; 00125 00126 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 00127 unsigned AddressSpace) const override; 00128 00129 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override; 00130 00131 /// @} 00132 }; 00133 00134 } // end anonymous namespace 00135 00136 INITIALIZE_AG_PASS(AArch64TTI, TargetTransformInfo, "aarch64tti", 00137 "AArch64 Target Transform Info", true, true, false) 00138 char AArch64TTI::ID = 0; 00139 00140 ImmutablePass * 00141 llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { 00142 return new AArch64TTI(TM); 00143 } 00144 00145 /// \brief Calculate the cost of materializing a 64-bit value. This helper 00146 /// method might only calculate a fraction of a larger immediate. Therefore it 00147 /// is valid to return a cost of ZERO. 00148 unsigned AArch64TTI::getIntImmCost(int64_t Val) const { 00149 // Check if the immediate can be encoded within an instruction. 00150 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) 00151 return 0; 00152 00153 if (Val < 0) 00154 Val = ~Val; 00155 00156 // Calculate how many moves we will need to materialize this constant. 00157 unsigned LZ = countLeadingZeros((uint64_t)Val); 00158 return (64 - LZ + 15) / 16; 00159 } 00160 00161 /// \brief Calculate the cost of materializing the given constant. 00162 unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { 00163 assert(Ty->isIntegerTy()); 00164 00165 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 00166 if (BitSize == 0) 00167 return ~0U; 00168 00169 // Sign-extend all constants to a multiple of 64-bit. 00170 APInt ImmVal = Imm; 00171 if (BitSize & 0x3f) 00172 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); 00173 00174 // Split the constant into 64-bit chunks and calculate the cost for each 00175 // chunk. 00176 unsigned Cost = 0; 00177 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { 00178 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); 00179 int64_t Val = Tmp.getSExtValue(); 00180 Cost += getIntImmCost(Val); 00181 } 00182 // We need at least one instruction to materialze the constant. 00183 return std::max(1U, Cost); 00184 } 00185 00186 unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, 00187 const APInt &Imm, Type *Ty) const { 00188 assert(Ty->isIntegerTy()); 00189 00190 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 00191 // There is no cost model for constants with a bit size of 0. Return TCC_Free 00192 // here, so that constant hoisting will ignore this constant. 00193 if (BitSize == 0) 00194 return TCC_Free; 00195 00196 unsigned ImmIdx = ~0U; 00197 switch (Opcode) { 00198 default: 00199 return TCC_Free; 00200 case Instruction::GetElementPtr: 00201 // Always hoist the base address of a GetElementPtr. 00202 if (Idx == 0) 00203 return 2 * TCC_Basic; 00204 return TCC_Free; 00205 case Instruction::Store: 00206 ImmIdx = 0; 00207 break; 00208 case Instruction::Add: 00209 case Instruction::Sub: 00210 case Instruction::Mul: 00211 case Instruction::UDiv: 00212 case Instruction::SDiv: 00213 case Instruction::URem: 00214 case Instruction::SRem: 00215 case Instruction::And: 00216 case Instruction::Or: 00217 case Instruction::Xor: 00218 case Instruction::ICmp: 00219 ImmIdx = 1; 00220 break; 00221 // Always return TCC_Free for the shift value of a shift instruction. 00222 case Instruction::Shl: 00223 case Instruction::LShr: 00224 case Instruction::AShr: 00225 if (Idx == 1) 00226 return TCC_Free; 00227 break; 00228 case Instruction::Trunc: 00229 case Instruction::ZExt: 00230 case Instruction::SExt: 00231 case Instruction::IntToPtr: 00232 case Instruction::PtrToInt: 00233 case Instruction::BitCast: 00234 case Instruction::PHI: 00235 case Instruction::Call: 00236 case Instruction::Select: 00237 case Instruction::Ret: 00238 case Instruction::Load: 00239 break; 00240 } 00241 00242 if (Idx == ImmIdx) { 00243 unsigned NumConstants = (BitSize + 63) / 64; 00244 unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); 00245 return (Cost <= NumConstants * TCC_Basic) 00246 ? static_cast<unsigned>(TCC_Free) : Cost; 00247 } 00248 return AArch64TTI::getIntImmCost(Imm, Ty); 00249 } 00250 00251 unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, 00252 const APInt &Imm, Type *Ty) const { 00253 assert(Ty->isIntegerTy()); 00254 00255 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 00256 // There is no cost model for constants with a bit size of 0. Return TCC_Free 00257 // here, so that constant hoisting will ignore this constant. 00258 if (BitSize == 0) 00259 return TCC_Free; 00260 00261 switch (IID) { 00262 default: 00263 return TCC_Free; 00264 case Intrinsic::sadd_with_overflow: 00265 case Intrinsic::uadd_with_overflow: 00266 case Intrinsic::ssub_with_overflow: 00267 case Intrinsic::usub_with_overflow: 00268 case Intrinsic::smul_with_overflow: 00269 case Intrinsic::umul_with_overflow: 00270 if (Idx == 1) { 00271 unsigned NumConstants = (BitSize + 63) / 64; 00272 unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); 00273 return (Cost <= NumConstants * TCC_Basic) 00274 ? static_cast<unsigned>(TCC_Free) : Cost; 00275 } 00276 break; 00277 case Intrinsic::experimental_stackmap: 00278 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 00279 return TCC_Free; 00280 break; 00281 case Intrinsic::experimental_patchpoint_void: 00282 case Intrinsic::experimental_patchpoint_i64: 00283 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) 00284 return TCC_Free; 00285 break; 00286 } 00287 return AArch64TTI::getIntImmCost(Imm, Ty); 00288 } 00289 00290 AArch64TTI::PopcntSupportKind 00291 AArch64TTI::getPopcntSupport(unsigned TyWidth) const { 00292 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 00293 if (TyWidth == 32 || TyWidth == 64) 00294 return PSK_FastHardware; 00295 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. 00296 return PSK_Software; 00297 } 00298 00299 unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, 00300 Type *Src) const { 00301 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00302 assert(ISD && "Invalid opcode"); 00303 00304 EVT SrcTy = TLI->getValueType(Src); 00305 EVT DstTy = TLI->getValueType(Dst); 00306 00307 if (!SrcTy.isSimple() || !DstTy.isSimple()) 00308 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 00309 00310 static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { 00311 // LowerVectorINT_TO_FP: 00312 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 00313 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 00314 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, 00315 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, 00316 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, 00317 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, 00318 00319 // Complex: to v2f32 00320 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 00321 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, 00322 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, 00323 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, 00324 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, 00325 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, 00326 00327 // Complex: to v4f32 00328 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, 00329 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 00330 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, 00331 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, 00332 00333 // Complex: to v2f64 00334 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 00335 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, 00336 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 00337 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, 00338 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, 00339 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, 00340 00341 00342 // LowerVectorFP_TO_INT 00343 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, 00344 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, 00345 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, 00346 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, 00347 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, 00348 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, 00349 00350 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). 00351 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, 00352 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, 00353 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, 00354 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, 00355 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, 00356 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, 00357 00358 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 00359 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, 00360 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, 00361 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, 00362 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, 00363 00364 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. 00365 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, 00366 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, 00367 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, 00368 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, 00369 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, 00370 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, 00371 }; 00372 00373 int Idx = ConvertCostTableLookup<MVT>( 00374 ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), 00375 SrcTy.getSimpleVT()); 00376 if (Idx != -1) 00377 return ConversionTbl[Idx].Cost; 00378 00379 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 00380 } 00381 00382 unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, 00383 unsigned Index) const { 00384 assert(Val->isVectorTy() && "This must be a vector type"); 00385 00386 if (Index != -1U) { 00387 // Legalize the type. 00388 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); 00389 00390 // This type is legalized to a scalar type. 00391 if (!LT.second.isVector()) 00392 return 0; 00393 00394 // The type may be split. Normalize the index to the new type. 00395 unsigned Width = LT.second.getVectorNumElements(); 00396 Index = Index % Width; 00397 00398 // The element at index zero is already inside the vector. 00399 if (Index == 0) 00400 return 0; 00401 } 00402 00403 // All other insert/extracts cost this much. 00404 return 2; 00405 } 00406 00407 unsigned AArch64TTI::getArithmeticInstrCost( 00408 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, 00409 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, 00410 OperandValueProperties Opd2PropInfo) const { 00411 // Legalize the type. 00412 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 00413 00414 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00415 00416 switch (ISD) { 00417 default: 00418 return TargetTransformInfo::getArithmeticInstrCost( 00419 Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); 00420 case ISD::ADD: 00421 case ISD::MUL: 00422 case ISD::XOR: 00423 case ISD::OR: 00424 case ISD::AND: 00425 // These nodes are marked as 'custom' for combining purposes only. 00426 // We know that they are legal. See LowerAdd in ISelLowering. 00427 return 1 * LT.first; 00428 } 00429 } 00430 00431 unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 00432 // Address computations in vectorized code with non-consecutive addresses will 00433 // likely result in more instructions compared to scalar code where the 00434 // computation can more often be merged into the index mode. The resulting 00435 // extra micro-ops can significantly decrease throughput. 00436 unsigned NumVectorInstToHideOverhead = 10; 00437 00438 if (Ty->isVectorTy() && IsComplex) 00439 return NumVectorInstToHideOverhead; 00440 00441 // In many cases the address computation is not merged into the instruction 00442 // addressing mode. 00443 return 1; 00444 } 00445 00446 unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 00447 Type *CondTy) const { 00448 00449 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00450 // We don't lower vector selects well that are wider than the register width. 00451 if (ValTy->isVectorTy() && ISD == ISD::SELECT) { 00452 // We would need this many instructions to hide the scalarization happening. 00453 unsigned AmortizationCost = 20; 00454 static const TypeConversionCostTblEntry<MVT::SimpleValueType> 00455 VectorSelectTbl[] = { 00456 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, 00457 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, 00458 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, 00459 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, 00460 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, 00461 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } 00462 }; 00463 00464 EVT SelCondTy = TLI->getValueType(CondTy); 00465 EVT SelValTy = TLI->getValueType(ValTy); 00466 if (SelCondTy.isSimple() && SelValTy.isSimple()) { 00467 int Idx = 00468 ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), 00469 SelValTy.getSimpleVT()); 00470 if (Idx != -1) 00471 return VectorSelectTbl[Idx].Cost; 00472 } 00473 } 00474 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 00475 } 00476 00477 unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, 00478 unsigned Alignment, 00479 unsigned AddressSpace) const { 00480 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 00481 00482 if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && 00483 Src->getVectorElementType()->isIntegerTy(64)) { 00484 // Unaligned stores are extremely inefficient. We don't split 00485 // unaligned v2i64 stores because the negative impact that has shown in 00486 // practice on inlined memcpy code. 00487 // We make v2i64 stores expensive so that we will only vectorize if there 00488 // are 6 other instructions getting vectorized. 00489 unsigned AmortizationCost = 6; 00490 00491 return LT.first * 2 * AmortizationCost; 00492 } 00493 00494 if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) && 00495 Src->getVectorNumElements() < 8) { 00496 // We scalarize the loads/stores because there is not v.4b register and we 00497 // have to promote the elements to v.4h. 00498 unsigned NumVecElts = Src->getVectorNumElements(); 00499 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; 00500 // We generate 2 instructions per vector element. 00501 return NumVectorizableInstsToAmortize * NumVecElts * 2; 00502 } 00503 00504 return LT.first; 00505 } 00506 00507 unsigned AArch64TTI::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const { 00508 unsigned Cost = 0; 00509 for (auto *I : Tys) { 00510 if (!I->isVectorTy()) 00511 continue; 00512 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128) 00513 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) + 00514 getMemoryOpCost(Instruction::Load, I, 128, 0); 00515 } 00516 return Cost; 00517 } 00518 00519 unsigned AArch64TTI::getMaxInterleaveFactor() const { 00520 if (ST->isCortexA57()) 00521 return 4; 00522 return 2; 00523 }