LLVM API Documentation
00001 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// \file 00010 /// This file implements a TargetTransformInfo analysis pass specific to the 00011 /// PPC target machine. It uses the target's detailed information to provide 00012 /// more precise answers to certain TTI queries, while letting the target 00013 /// independent and default TTI implementations handle the rest. 00014 /// 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "PPC.h" 00018 #include "PPCTargetMachine.h" 00019 #include "llvm/Analysis/TargetTransformInfo.h" 00020 #include "llvm/Support/CommandLine.h" 00021 #include "llvm/Support/Debug.h" 00022 #include "llvm/Target/CostTable.h" 00023 #include "llvm/Target/TargetLowering.h" 00024 using namespace llvm; 00025 00026 #define DEBUG_TYPE "ppctti" 00027 00028 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting", 00029 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); 00030 00031 // Declare the pass initialization routine locally as target-specific passes 00032 // don't have a target-wide initialization entry point, and so we rely on the 00033 // pass constructor initialization. 00034 namespace llvm { 00035 void initializePPCTTIPass(PassRegistry &); 00036 } 00037 00038 namespace { 00039 00040 class PPCTTI final : public ImmutablePass, public TargetTransformInfo { 00041 const TargetMachine *TM; 00042 const PPCSubtarget *ST; 00043 const PPCTargetLowering *TLI; 00044 00045 public: 00046 PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) { 00047 llvm_unreachable("This pass cannot be directly constructed"); 00048 } 00049 00050 PPCTTI(const PPCTargetMachine *TM) 00051 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 00052 TLI(TM->getSubtargetImpl()->getTargetLowering()) { 00053 initializePPCTTIPass(*PassRegistry::getPassRegistry()); 00054 } 00055 00056 void initializePass() override { 00057 pushTTIStack(this); 00058 } 00059 00060 void getAnalysisUsage(AnalysisUsage &AU) const override { 00061 TargetTransformInfo::getAnalysisUsage(AU); 00062 } 00063 00064 /// Pass identification. 00065 static char ID; 00066 00067 /// Provide necessary pointer adjustments for the two base classes. 00068 void *getAdjustedAnalysisPointer(const void *ID) override { 00069 if (ID == &TargetTransformInfo::ID) 00070 return (TargetTransformInfo*)this; 00071 return this; 00072 } 00073 00074 /// \name Scalar TTI Implementations 00075 /// @{ 00076 unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; 00077 00078 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 00079 Type *Ty) const override; 00080 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 00081 Type *Ty) const override; 00082 00083 PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; 00084 void getUnrollingPreferences(const Function *F, Loop *L, 00085 UnrollingPreferences &UP) const override; 00086 00087 /// @} 00088 00089 /// \name Vector TTI Implementations 00090 /// @{ 00091 00092 unsigned getNumberOfRegisters(bool Vector) const override; 00093 unsigned getRegisterBitWidth(bool Vector) const override; 00094 unsigned getMaxInterleaveFactor() const override; 00095 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, 00096 OperandValueKind, OperandValueProperties, 00097 OperandValueProperties) const override; 00098 unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 00099 int Index, Type *SubTp) const override; 00100 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 00101 Type *Src) const override; 00102 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 00103 Type *CondTy) const override; 00104 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 00105 unsigned Index) const override; 00106 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 00107 unsigned AddressSpace) const override; 00108 00109 /// @} 00110 }; 00111 00112 } // end anonymous namespace 00113 00114 INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", 00115 "PPC Target Transform Info", true, true, false) 00116 char PPCTTI::ID = 0; 00117 00118 ImmutablePass * 00119 llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { 00120 return new PPCTTI(TM); 00121 } 00122 00123 00124 //===----------------------------------------------------------------------===// 00125 // 00126 // PPC cost model. 00127 // 00128 //===----------------------------------------------------------------------===// 00129 00130 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { 00131 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 00132 if (ST->hasPOPCNTD() && TyWidth <= 64) 00133 return PSK_FastHardware; 00134 return PSK_Software; 00135 } 00136 00137 unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { 00138 if (DisablePPCConstHoist) 00139 return TargetTransformInfo::getIntImmCost(Imm, Ty); 00140 00141 assert(Ty->isIntegerTy()); 00142 00143 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 00144 if (BitSize == 0) 00145 return ~0U; 00146 00147 if (Imm == 0) 00148 return TCC_Free; 00149 00150 if (Imm.getBitWidth() <= 64) { 00151 if (isInt<16>(Imm.getSExtValue())) 00152 return TCC_Basic; 00153 00154 if (isInt<32>(Imm.getSExtValue())) { 00155 // A constant that can be materialized using lis. 00156 if ((Imm.getZExtValue() & 0xFFFF) == 0) 00157 return TCC_Basic; 00158 00159 return 2 * TCC_Basic; 00160 } 00161 } 00162 00163 return 4 * TCC_Basic; 00164 } 00165 00166 unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, 00167 const APInt &Imm, Type *Ty) const { 00168 if (DisablePPCConstHoist) 00169 return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty); 00170 00171 assert(Ty->isIntegerTy()); 00172 00173 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 00174 if (BitSize == 0) 00175 return ~0U; 00176 00177 switch (IID) { 00178 default: return TCC_Free; 00179 case Intrinsic::sadd_with_overflow: 00180 case Intrinsic::uadd_with_overflow: 00181 case Intrinsic::ssub_with_overflow: 00182 case Intrinsic::usub_with_overflow: 00183 if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) 00184 return TCC_Free; 00185 break; 00186 } 00187 return PPCTTI::getIntImmCost(Imm, Ty); 00188 } 00189 00190 unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 00191 Type *Ty) const { 00192 if (DisablePPCConstHoist) 00193 return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty); 00194 00195 assert(Ty->isIntegerTy()); 00196 00197 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 00198 if (BitSize == 0) 00199 return ~0U; 00200 00201 unsigned ImmIdx = ~0U; 00202 bool ShiftedFree = false, RunFree = false, UnsignedFree = false, 00203 ZeroFree = false; 00204 switch (Opcode) { 00205 default: return TCC_Free; 00206 case Instruction::GetElementPtr: 00207 // Always hoist the base address of a GetElementPtr. This prevents the 00208 // creation of new constants for every base constant that gets constant 00209 // folded with the offset. 00210 if (Idx == 0) 00211 return 2 * TCC_Basic; 00212 return TCC_Free; 00213 case Instruction::And: 00214 RunFree = true; // (for the rotate-and-mask instructions) 00215 // Fallthrough... 00216 case Instruction::Add: 00217 case Instruction::Or: 00218 case Instruction::Xor: 00219 ShiftedFree = true; 00220 // Fallthrough... 00221 case Instruction::Sub: 00222 case Instruction::Mul: 00223 case Instruction::Shl: 00224 case Instruction::LShr: 00225 case Instruction::AShr: 00226 ImmIdx = 1; 00227 break; 00228 case Instruction::ICmp: 00229 UnsignedFree = true; 00230 ImmIdx = 1; 00231 // Fallthrough... (zero comparisons can use record-form instructions) 00232 case Instruction::Select: 00233 ZeroFree = true; 00234 break; 00235 case Instruction::PHI: 00236 case Instruction::Call: 00237 case Instruction::Ret: 00238 case Instruction::Load: 00239 case Instruction::Store: 00240 break; 00241 } 00242 00243 if (ZeroFree && Imm == 0) 00244 return TCC_Free; 00245 00246 if (Idx == ImmIdx && Imm.getBitWidth() <= 64) { 00247 if (isInt<16>(Imm.getSExtValue())) 00248 return TCC_Free; 00249 00250 if (RunFree) { 00251 if (Imm.getBitWidth() <= 32 && 00252 (isShiftedMask_32(Imm.getZExtValue()) || 00253 isShiftedMask_32(~Imm.getZExtValue()))) 00254 return TCC_Free; 00255 00256 00257 if (ST->isPPC64() && 00258 (isShiftedMask_64(Imm.getZExtValue()) || 00259 isShiftedMask_64(~Imm.getZExtValue()))) 00260 return TCC_Free; 00261 } 00262 00263 if (UnsignedFree && isUInt<16>(Imm.getZExtValue())) 00264 return TCC_Free; 00265 00266 if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0) 00267 return TCC_Free; 00268 } 00269 00270 return PPCTTI::getIntImmCost(Imm, Ty); 00271 } 00272 00273 void PPCTTI::getUnrollingPreferences(const Function *F, Loop *L, 00274 UnrollingPreferences &UP) const { 00275 if (TM->getSubtarget<PPCSubtarget>(F).getDarwinDirective() == PPC::DIR_A2) { 00276 // The A2 is in-order with a deep pipeline, and concatenation unrolling 00277 // helps expose latency-hiding opportunities to the instruction scheduler. 00278 UP.Partial = UP.Runtime = true; 00279 } 00280 } 00281 00282 unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { 00283 if (Vector && !ST->hasAltivec()) 00284 return 0; 00285 return ST->hasVSX() ? 64 : 32; 00286 } 00287 00288 unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { 00289 if (Vector) { 00290 if (ST->hasAltivec()) return 128; 00291 return 0; 00292 } 00293 00294 if (ST->isPPC64()) 00295 return 64; 00296 return 32; 00297 00298 } 00299 00300 unsigned PPCTTI::getMaxInterleaveFactor() const { 00301 unsigned Directive = ST->getDarwinDirective(); 00302 // The 440 has no SIMD support, but floating-point instructions 00303 // have a 5-cycle latency, so unroll by 5x for latency hiding. 00304 if (Directive == PPC::DIR_440) 00305 return 5; 00306 00307 // The A2 has no SIMD support, but floating-point instructions 00308 // have a 6-cycle latency, so unroll by 6x for latency hiding. 00309 if (Directive == PPC::DIR_A2) 00310 return 6; 00311 00312 // FIXME: For lack of any better information, do no harm... 00313 if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) 00314 return 1; 00315 00316 // For most things, modern systems have two execution units (and 00317 // out-of-order execution). 00318 return 2; 00319 } 00320 00321 unsigned PPCTTI::getArithmeticInstrCost( 00322 unsigned Opcode, Type *Ty, OperandValueKind Op1Info, 00323 OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, 00324 OperandValueProperties Opd2PropInfo) const { 00325 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 00326 00327 // Fallback to the default implementation. 00328 return TargetTransformInfo::getArithmeticInstrCost( 00329 Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); 00330 } 00331 00332 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 00333 Type *SubTp) const { 00334 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 00335 } 00336 00337 unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { 00338 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 00339 00340 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 00341 } 00342 00343 unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 00344 Type *CondTy) const { 00345 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 00346 } 00347 00348 unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 00349 unsigned Index) const { 00350 assert(Val->isVectorTy() && "This must be a vector type"); 00351 00352 int ISD = TLI->InstructionOpcodeToISD(Opcode); 00353 assert(ISD && "Invalid opcode"); 00354 00355 if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { 00356 // Double-precision scalars are already located in index #0. 00357 if (Index == 0) 00358 return 0; 00359 00360 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 00361 } 00362 00363 // Estimated cost of a load-hit-store delay. This was obtained 00364 // experimentally as a minimum needed to prevent unprofitable 00365 // vectorization for the paq8p benchmark. It may need to be 00366 // raised further if other unprofitable cases remain. 00367 unsigned LHSPenalty = 2; 00368 if (ISD == ISD::INSERT_VECTOR_ELT) 00369 LHSPenalty += 7; 00370 00371 // Vector element insert/extract with Altivec is very expensive, 00372 // because they require store and reload with the attendant 00373 // processor stall for load-hit-store. Until VSX is available, 00374 // these need to be estimated as very costly. 00375 if (ISD == ISD::EXTRACT_VECTOR_ELT || 00376 ISD == ISD::INSERT_VECTOR_ELT) 00377 return LHSPenalty + 00378 TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 00379 00380 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 00381 } 00382 00383 unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 00384 unsigned AddressSpace) const { 00385 // Legalize the type. 00386 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 00387 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && 00388 "Invalid Opcode"); 00389 00390 unsigned Cost = 00391 TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); 00392 00393 // VSX loads/stores support unaligned access. 00394 if (ST->hasVSX()) { 00395 if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64) 00396 return Cost; 00397 } 00398 00399 bool UnalignedAltivec = 00400 Src->isVectorTy() && 00401 Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() && 00402 LT.second.getSizeInBits() == 128 && 00403 Opcode == Instruction::Load; 00404 00405 // PPC in general does not support unaligned loads and stores. They'll need 00406 // to be decomposed based on the alignment factor. 00407 unsigned SrcBytes = LT.second.getStoreSize(); 00408 if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) { 00409 Cost += LT.first*(SrcBytes/Alignment-1); 00410 00411 // For a vector type, there is also scalarization overhead (only for 00412 // stores, loads are expanded using the vector-load + permutation sequence, 00413 // which is much less expensive). 00414 if (Src->isVectorTy() && Opcode == Instruction::Store) 00415 for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i) 00416 Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); 00417 } 00418 00419 return Cost; 00420 } 00421