LLVM API Documentation
00001 //===- InstCombineCalls.cpp -----------------------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the visitCall and visitInvoke functions. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "InstCombine.h" 00015 #include "llvm/ADT/Statistic.h" 00016 #include "llvm/Analysis/MemoryBuiltins.h" 00017 #include "llvm/IR/CallSite.h" 00018 #include "llvm/IR/DataLayout.h" 00019 #include "llvm/IR/PatternMatch.h" 00020 #include "llvm/Transforms/Utils/BuildLibCalls.h" 00021 #include "llvm/Transforms/Utils/Local.h" 00022 using namespace llvm; 00023 using namespace PatternMatch; 00024 00025 #define DEBUG_TYPE "instcombine" 00026 00027 STATISTIC(NumSimplified, "Number of library calls simplified"); 00028 00029 /// getPromotedType - Return the specified type promoted as it would be to pass 00030 /// though a va_arg area. 00031 static Type *getPromotedType(Type *Ty) { 00032 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 00033 if (ITy->getBitWidth() < 32) 00034 return Type::getInt32Ty(Ty->getContext()); 00035 } 00036 return Ty; 00037 } 00038 00039 /// reduceToSingleValueType - Given an aggregate type which ultimately holds a 00040 /// single scalar element, like {{{type}}} or [1 x type], return type. 00041 static Type *reduceToSingleValueType(Type *T) { 00042 while (!T->isSingleValueType()) { 00043 if (StructType *STy = dyn_cast<StructType>(T)) { 00044 if (STy->getNumElements() == 1) 00045 T = STy->getElementType(0); 00046 else 00047 break; 00048 } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) { 00049 if (ATy->getNumElements() == 1) 00050 T = ATy->getElementType(); 00051 else 00052 break; 00053 } else 00054 break; 00055 } 00056 00057 return T; 00058 } 00059 00060 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { 00061 unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AT, MI, DT); 00062 unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AT, MI, DT); 00063 unsigned MinAlign = std::min(DstAlign, SrcAlign); 00064 unsigned CopyAlign = MI->getAlignment(); 00065 00066 if (CopyAlign < MinAlign) { 00067 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 00068 MinAlign, false)); 00069 return MI; 00070 } 00071 00072 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 00073 // load/store. 00074 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); 00075 if (!MemOpLength) return nullptr; 00076 00077 // Source and destination pointer types are always "i8*" for intrinsic. See 00078 // if the size is something we can handle with a single primitive load/store. 00079 // A single load+store correctly handles overlapping memory in the memmove 00080 // case. 00081 uint64_t Size = MemOpLength->getLimitedValue(); 00082 assert(Size && "0-sized memory transferring should be removed already."); 00083 00084 if (Size > 8 || (Size&(Size-1))) 00085 return nullptr; // If not 1/2/4/8 bytes, exit. 00086 00087 // Use an integer load+store unless we can find something better. 00088 unsigned SrcAddrSp = 00089 cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); 00090 unsigned DstAddrSp = 00091 cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); 00092 00093 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); 00094 Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); 00095 Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); 00096 00097 // Memcpy forces the use of i8* for the source and destination. That means 00098 // that if you're using memcpy to move one double around, you'll get a cast 00099 // from double* to i8*. We'd much rather use a double load+store rather than 00100 // an i64 load+store, here because this improves the odds that the source or 00101 // dest address will be promotable. See if we can find a better type than the 00102 // integer datatype. 00103 Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); 00104 MDNode *CopyMD = nullptr; 00105 if (StrippedDest != MI->getArgOperand(0)) { 00106 Type *SrcETy = cast<PointerType>(StrippedDest->getType()) 00107 ->getElementType(); 00108 if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) { 00109 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip 00110 // down through these levels if so. 00111 SrcETy = reduceToSingleValueType(SrcETy); 00112 00113 if (SrcETy->isSingleValueType()) { 00114 NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); 00115 NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); 00116 00117 // If the memcpy has metadata describing the members, see if we can 00118 // get the TBAA tag describing our copy. 00119 if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { 00120 if (M->getNumOperands() == 3 && 00121 M->getOperand(0) && 00122 isa<ConstantInt>(M->getOperand(0)) && 00123 cast<ConstantInt>(M->getOperand(0))->isNullValue() && 00124 M->getOperand(1) && 00125 isa<ConstantInt>(M->getOperand(1)) && 00126 cast<ConstantInt>(M->getOperand(1))->getValue() == Size && 00127 M->getOperand(2) && 00128 isa<MDNode>(M->getOperand(2))) 00129 CopyMD = cast<MDNode>(M->getOperand(2)); 00130 } 00131 } 00132 } 00133 } 00134 00135 // If the memcpy/memmove provides better alignment info than we can 00136 // infer, use it. 00137 SrcAlign = std::max(SrcAlign, CopyAlign); 00138 DstAlign = std::max(DstAlign, CopyAlign); 00139 00140 Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); 00141 Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); 00142 LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); 00143 L->setAlignment(SrcAlign); 00144 if (CopyMD) 00145 L->setMetadata(LLVMContext::MD_tbaa, CopyMD); 00146 StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); 00147 S->setAlignment(DstAlign); 00148 if (CopyMD) 00149 S->setMetadata(LLVMContext::MD_tbaa, CopyMD); 00150 00151 // Set the size of the copy to 0, it will be deleted on the next iteration. 00152 MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); 00153 return MI; 00154 } 00155 00156 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { 00157 unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AT, MI, DT); 00158 if (MI->getAlignment() < Alignment) { 00159 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 00160 Alignment, false)); 00161 return MI; 00162 } 00163 00164 // Extract the length and alignment and fill if they are constant. 00165 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 00166 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 00167 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) 00168 return nullptr; 00169 uint64_t Len = LenC->getLimitedValue(); 00170 Alignment = MI->getAlignment(); 00171 assert(Len && "0-sized memory setting should be removed already."); 00172 00173 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 00174 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 00175 Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 00176 00177 Value *Dest = MI->getDest(); 00178 unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); 00179 Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); 00180 Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); 00181 00182 // Alignment 0 is identity for alignment 1 for memset, but not store. 00183 if (Alignment == 0) Alignment = 1; 00184 00185 // Extract the fill value and store. 00186 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 00187 StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, 00188 MI->isVolatile()); 00189 S->setAlignment(Alignment); 00190 00191 // Set the size of the copy to 0, it will be deleted on the next iteration. 00192 MI->setLength(Constant::getNullValue(LenC->getType())); 00193 return MI; 00194 } 00195 00196 return nullptr; 00197 } 00198 00199 /// visitCallInst - CallInst simplification. This mostly only handles folding 00200 /// of intrinsic instructions. For normal calls, it allows visitCallSite to do 00201 /// the heavy lifting. 00202 /// 00203 Instruction *InstCombiner::visitCallInst(CallInst &CI) { 00204 if (isFreeCall(&CI, TLI)) 00205 return visitFree(CI); 00206 00207 // If the caller function is nounwind, mark the call as nounwind, even if the 00208 // callee isn't. 00209 if (CI.getParent()->getParent()->doesNotThrow() && 00210 !CI.doesNotThrow()) { 00211 CI.setDoesNotThrow(); 00212 return &CI; 00213 } 00214 00215 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 00216 if (!II) return visitCallSite(&CI); 00217 00218 // Intrinsics cannot occur in an invoke, so handle them here instead of in 00219 // visitCallSite. 00220 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { 00221 bool Changed = false; 00222 00223 // memmove/cpy/set of zero bytes is a noop. 00224 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 00225 if (NumBytes->isNullValue()) 00226 return EraseInstFromFunction(CI); 00227 00228 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 00229 if (CI->getZExtValue() == 1) { 00230 // Replace the instruction with just byte operations. We would 00231 // transform other cases to loads/stores, but we don't know if 00232 // alignment is sufficient. 00233 } 00234 } 00235 00236 // No other transformations apply to volatile transfers. 00237 if (MI->isVolatile()) 00238 return nullptr; 00239 00240 // If we have a memmove and the source operation is a constant global, 00241 // then the source and dest pointers can't alias, so we can change this 00242 // into a call to memcpy. 00243 if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 00244 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 00245 if (GVSrc->isConstant()) { 00246 Module *M = CI.getParent()->getParent()->getParent(); 00247 Intrinsic::ID MemCpyID = Intrinsic::memcpy; 00248 Type *Tys[3] = { CI.getArgOperand(0)->getType(), 00249 CI.getArgOperand(1)->getType(), 00250 CI.getArgOperand(2)->getType() }; 00251 CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); 00252 Changed = true; 00253 } 00254 } 00255 00256 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 00257 // memmove(x,x,size) -> noop. 00258 if (MTI->getSource() == MTI->getDest()) 00259 return EraseInstFromFunction(CI); 00260 } 00261 00262 // If we can determine a pointer alignment that is bigger than currently 00263 // set, update the alignment. 00264 if (isa<MemTransferInst>(MI)) { 00265 if (Instruction *I = SimplifyMemTransfer(MI)) 00266 return I; 00267 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { 00268 if (Instruction *I = SimplifyMemSet(MSI)) 00269 return I; 00270 } 00271 00272 if (Changed) return II; 00273 } 00274 00275 switch (II->getIntrinsicID()) { 00276 default: break; 00277 case Intrinsic::objectsize: { 00278 uint64_t Size; 00279 if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) 00280 return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); 00281 return nullptr; 00282 } 00283 case Intrinsic::bswap: { 00284 Value *IIOperand = II->getArgOperand(0); 00285 Value *X = nullptr; 00286 00287 // bswap(bswap(x)) -> x 00288 if (match(IIOperand, m_BSwap(m_Value(X)))) 00289 return ReplaceInstUsesWith(CI, X); 00290 00291 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 00292 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { 00293 unsigned C = X->getType()->getPrimitiveSizeInBits() - 00294 IIOperand->getType()->getPrimitiveSizeInBits(); 00295 Value *CV = ConstantInt::get(X->getType(), C); 00296 Value *V = Builder->CreateLShr(X, CV); 00297 return new TruncInst(V, IIOperand->getType()); 00298 } 00299 break; 00300 } 00301 00302 case Intrinsic::powi: 00303 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 00304 // powi(x, 0) -> 1.0 00305 if (Power->isZero()) 00306 return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); 00307 // powi(x, 1) -> x 00308 if (Power->isOne()) 00309 return ReplaceInstUsesWith(CI, II->getArgOperand(0)); 00310 // powi(x, -1) -> 1/x 00311 if (Power->isAllOnesValue()) 00312 return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 00313 II->getArgOperand(0)); 00314 } 00315 break; 00316 case Intrinsic::cttz: { 00317 // If all bits below the first known one are known zero, 00318 // this value is constant. 00319 IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); 00320 // FIXME: Try to simplify vectors of integers. 00321 if (!IT) break; 00322 uint32_t BitWidth = IT->getBitWidth(); 00323 APInt KnownZero(BitWidth, 0); 00324 APInt KnownOne(BitWidth, 0); 00325 computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II); 00326 unsigned TrailingZeros = KnownOne.countTrailingZeros(); 00327 APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); 00328 if ((Mask & KnownZero) == Mask) 00329 return ReplaceInstUsesWith(CI, ConstantInt::get(IT, 00330 APInt(BitWidth, TrailingZeros))); 00331 00332 } 00333 break; 00334 case Intrinsic::ctlz: { 00335 // If all bits above the first known one are known zero, 00336 // this value is constant. 00337 IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); 00338 // FIXME: Try to simplify vectors of integers. 00339 if (!IT) break; 00340 uint32_t BitWidth = IT->getBitWidth(); 00341 APInt KnownZero(BitWidth, 0); 00342 APInt KnownOne(BitWidth, 0); 00343 computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II); 00344 unsigned LeadingZeros = KnownOne.countLeadingZeros(); 00345 APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); 00346 if ((Mask & KnownZero) == Mask) 00347 return ReplaceInstUsesWith(CI, ConstantInt::get(IT, 00348 APInt(BitWidth, LeadingZeros))); 00349 00350 } 00351 break; 00352 case Intrinsic::uadd_with_overflow: { 00353 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 00354 IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); 00355 uint32_t BitWidth = IT->getBitWidth(); 00356 APInt LHSKnownZero(BitWidth, 0); 00357 APInt LHSKnownOne(BitWidth, 0); 00358 computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II); 00359 bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; 00360 bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; 00361 00362 if (LHSKnownNegative || LHSKnownPositive) { 00363 APInt RHSKnownZero(BitWidth, 0); 00364 APInt RHSKnownOne(BitWidth, 0); 00365 computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II); 00366 bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; 00367 bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; 00368 if (LHSKnownNegative && RHSKnownNegative) { 00369 // The sign bit is set in both cases: this MUST overflow. 00370 // Create a simple add instruction, and insert it into the struct. 00371 Value *Add = Builder->CreateAdd(LHS, RHS); 00372 Add->takeName(&CI); 00373 Constant *V[] = { 00374 UndefValue::get(LHS->getType()), 00375 ConstantInt::getTrue(II->getContext()) 00376 }; 00377 StructType *ST = cast<StructType>(II->getType()); 00378 Constant *Struct = ConstantStruct::get(ST, V); 00379 return InsertValueInst::Create(Struct, Add, 0); 00380 } 00381 00382 if (LHSKnownPositive && RHSKnownPositive) { 00383 // The sign bit is clear in both cases: this CANNOT overflow. 00384 // Create a simple add instruction, and insert it into the struct. 00385 Value *Add = Builder->CreateNUWAdd(LHS, RHS); 00386 Add->takeName(&CI); 00387 Constant *V[] = { 00388 UndefValue::get(LHS->getType()), 00389 ConstantInt::getFalse(II->getContext()) 00390 }; 00391 StructType *ST = cast<StructType>(II->getType()); 00392 Constant *Struct = ConstantStruct::get(ST, V); 00393 return InsertValueInst::Create(Struct, Add, 0); 00394 } 00395 } 00396 } 00397 // FALL THROUGH uadd into sadd 00398 case Intrinsic::sadd_with_overflow: 00399 // Canonicalize constants into the RHS. 00400 if (isa<Constant>(II->getArgOperand(0)) && 00401 !isa<Constant>(II->getArgOperand(1))) { 00402 Value *LHS = II->getArgOperand(0); 00403 II->setArgOperand(0, II->getArgOperand(1)); 00404 II->setArgOperand(1, LHS); 00405 return II; 00406 } 00407 00408 // X + undef -> undef 00409 if (isa<UndefValue>(II->getArgOperand(1))) 00410 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 00411 00412 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 00413 // X + 0 -> {X, false} 00414 if (RHS->isZero()) { 00415 Constant *V[] = { 00416 UndefValue::get(II->getArgOperand(0)->getType()), 00417 ConstantInt::getFalse(II->getContext()) 00418 }; 00419 Constant *Struct = 00420 ConstantStruct::get(cast<StructType>(II->getType()), V); 00421 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 00422 } 00423 } 00424 00425 // We can strength reduce reduce this signed add into a regular add if we 00426 // can prove that it will never overflow. 00427 if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { 00428 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 00429 if (WillNotOverflowSignedAdd(LHS, RHS, II)) { 00430 Value *Add = Builder->CreateNSWAdd(LHS, RHS); 00431 Add->takeName(&CI); 00432 Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()}; 00433 StructType *ST = cast<StructType>(II->getType()); 00434 Constant *Struct = ConstantStruct::get(ST, V); 00435 return InsertValueInst::Create(Struct, Add, 0); 00436 } 00437 } 00438 00439 break; 00440 case Intrinsic::usub_with_overflow: 00441 case Intrinsic::ssub_with_overflow: 00442 // undef - X -> undef 00443 // X - undef -> undef 00444 if (isa<UndefValue>(II->getArgOperand(0)) || 00445 isa<UndefValue>(II->getArgOperand(1))) 00446 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 00447 00448 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 00449 // X - 0 -> {X, false} 00450 if (RHS->isZero()) { 00451 Constant *V[] = { 00452 UndefValue::get(II->getArgOperand(0)->getType()), 00453 ConstantInt::getFalse(II->getContext()) 00454 }; 00455 Constant *Struct = 00456 ConstantStruct::get(cast<StructType>(II->getType()), V); 00457 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 00458 } 00459 } 00460 break; 00461 case Intrinsic::umul_with_overflow: { 00462 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 00463 unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); 00464 00465 APInt LHSKnownZero(BitWidth, 0); 00466 APInt LHSKnownOne(BitWidth, 0); 00467 computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II); 00468 APInt RHSKnownZero(BitWidth, 0); 00469 APInt RHSKnownOne(BitWidth, 0); 00470 computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II); 00471 00472 // Get the largest possible values for each operand. 00473 APInt LHSMax = ~LHSKnownZero; 00474 APInt RHSMax = ~RHSKnownZero; 00475 00476 // If multiplying the maximum values does not overflow then we can turn 00477 // this into a plain NUW mul. 00478 bool Overflow; 00479 LHSMax.umul_ov(RHSMax, Overflow); 00480 if (!Overflow) { 00481 Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); 00482 Constant *V[] = { 00483 UndefValue::get(LHS->getType()), 00484 Builder->getFalse() 00485 }; 00486 Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); 00487 return InsertValueInst::Create(Struct, Mul, 0); 00488 } 00489 } // FALL THROUGH 00490 case Intrinsic::smul_with_overflow: 00491 // Canonicalize constants into the RHS. 00492 if (isa<Constant>(II->getArgOperand(0)) && 00493 !isa<Constant>(II->getArgOperand(1))) { 00494 Value *LHS = II->getArgOperand(0); 00495 II->setArgOperand(0, II->getArgOperand(1)); 00496 II->setArgOperand(1, LHS); 00497 return II; 00498 } 00499 00500 // X * undef -> undef 00501 if (isa<UndefValue>(II->getArgOperand(1))) 00502 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 00503 00504 if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 00505 // X*0 -> {0, false} 00506 if (RHSI->isZero()) 00507 return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); 00508 00509 // X * 1 -> {X, false} 00510 if (RHSI->equalsInt(1)) { 00511 Constant *V[] = { 00512 UndefValue::get(II->getArgOperand(0)->getType()), 00513 ConstantInt::getFalse(II->getContext()) 00514 }; 00515 Constant *Struct = 00516 ConstantStruct::get(cast<StructType>(II->getType()), V); 00517 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 00518 } 00519 } 00520 break; 00521 case Intrinsic::ppc_altivec_lvx: 00522 case Intrinsic::ppc_altivec_lvxl: 00523 // Turn PPC lvx -> load if the pointer is known aligned. 00524 if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, 00525 DL, AT, II, DT) >= 16) { 00526 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), 00527 PointerType::getUnqual(II->getType())); 00528 return new LoadInst(Ptr); 00529 } 00530 break; 00531 case Intrinsic::ppc_altivec_stvx: 00532 case Intrinsic::ppc_altivec_stvxl: 00533 // Turn stvx -> store if the pointer is known aligned. 00534 if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, 00535 DL, AT, II, DT) >= 16) { 00536 Type *OpPtrTy = 00537 PointerType::getUnqual(II->getArgOperand(0)->getType()); 00538 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); 00539 return new StoreInst(II->getArgOperand(0), Ptr); 00540 } 00541 break; 00542 case Intrinsic::x86_sse_storeu_ps: 00543 case Intrinsic::x86_sse2_storeu_pd: 00544 case Intrinsic::x86_sse2_storeu_dq: 00545 // Turn X86 storeu -> store if the pointer is known aligned. 00546 if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, 00547 DL, AT, II, DT) >= 16) { 00548 Type *OpPtrTy = 00549 PointerType::getUnqual(II->getArgOperand(1)->getType()); 00550 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); 00551 return new StoreInst(II->getArgOperand(1), Ptr); 00552 } 00553 break; 00554 00555 case Intrinsic::x86_sse_cvtss2si: 00556 case Intrinsic::x86_sse_cvtss2si64: 00557 case Intrinsic::x86_sse_cvttss2si: 00558 case Intrinsic::x86_sse_cvttss2si64: 00559 case Intrinsic::x86_sse2_cvtsd2si: 00560 case Intrinsic::x86_sse2_cvtsd2si64: 00561 case Intrinsic::x86_sse2_cvttsd2si: 00562 case Intrinsic::x86_sse2_cvttsd2si64: { 00563 // These intrinsics only demand the 0th element of their input vectors. If 00564 // we can simplify the input based on that, do so now. 00565 unsigned VWidth = 00566 cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); 00567 APInt DemandedElts(VWidth, 1); 00568 APInt UndefElts(VWidth, 0); 00569 if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), 00570 DemandedElts, UndefElts)) { 00571 II->setArgOperand(0, V); 00572 return II; 00573 } 00574 break; 00575 } 00576 00577 // Constant fold <A x Bi> << Ci. 00578 // FIXME: We don't handle _dq because it's a shift of an i128, but is 00579 // represented in the IR as <2 x i64>. A per element shift is wrong. 00580 case Intrinsic::x86_sse2_psll_d: 00581 case Intrinsic::x86_sse2_psll_q: 00582 case Intrinsic::x86_sse2_psll_w: 00583 case Intrinsic::x86_sse2_pslli_d: 00584 case Intrinsic::x86_sse2_pslli_q: 00585 case Intrinsic::x86_sse2_pslli_w: 00586 case Intrinsic::x86_avx2_psll_d: 00587 case Intrinsic::x86_avx2_psll_q: 00588 case Intrinsic::x86_avx2_psll_w: 00589 case Intrinsic::x86_avx2_pslli_d: 00590 case Intrinsic::x86_avx2_pslli_q: 00591 case Intrinsic::x86_avx2_pslli_w: 00592 case Intrinsic::x86_sse2_psrl_d: 00593 case Intrinsic::x86_sse2_psrl_q: 00594 case Intrinsic::x86_sse2_psrl_w: 00595 case Intrinsic::x86_sse2_psrli_d: 00596 case Intrinsic::x86_sse2_psrli_q: 00597 case Intrinsic::x86_sse2_psrli_w: 00598 case Intrinsic::x86_avx2_psrl_d: 00599 case Intrinsic::x86_avx2_psrl_q: 00600 case Intrinsic::x86_avx2_psrl_w: 00601 case Intrinsic::x86_avx2_psrli_d: 00602 case Intrinsic::x86_avx2_psrli_q: 00603 case Intrinsic::x86_avx2_psrli_w: { 00604 // Simplify if count is constant. To 0 if >= BitWidth, 00605 // otherwise to shl/lshr. 00606 auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); 00607 auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); 00608 if (!CDV && !CInt) 00609 break; 00610 ConstantInt *Count; 00611 if (CDV) 00612 Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); 00613 else 00614 Count = CInt; 00615 00616 auto Vec = II->getArgOperand(0); 00617 auto VT = cast<VectorType>(Vec->getType()); 00618 if (Count->getZExtValue() > 00619 VT->getElementType()->getPrimitiveSizeInBits() - 1) 00620 return ReplaceInstUsesWith( 00621 CI, ConstantAggregateZero::get(Vec->getType())); 00622 00623 bool isPackedShiftLeft = true; 00624 switch (II->getIntrinsicID()) { 00625 default : break; 00626 case Intrinsic::x86_sse2_psrl_d: 00627 case Intrinsic::x86_sse2_psrl_q: 00628 case Intrinsic::x86_sse2_psrl_w: 00629 case Intrinsic::x86_sse2_psrli_d: 00630 case Intrinsic::x86_sse2_psrli_q: 00631 case Intrinsic::x86_sse2_psrli_w: 00632 case Intrinsic::x86_avx2_psrl_d: 00633 case Intrinsic::x86_avx2_psrl_q: 00634 case Intrinsic::x86_avx2_psrl_w: 00635 case Intrinsic::x86_avx2_psrli_d: 00636 case Intrinsic::x86_avx2_psrli_q: 00637 case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; 00638 } 00639 00640 unsigned VWidth = VT->getNumElements(); 00641 // Get a constant vector of the same type as the first operand. 00642 auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); 00643 if (isPackedShiftLeft) 00644 return BinaryOperator::CreateShl(Vec, 00645 Builder->CreateVectorSplat(VWidth, VTCI)); 00646 00647 return BinaryOperator::CreateLShr(Vec, 00648 Builder->CreateVectorSplat(VWidth, VTCI)); 00649 } 00650 00651 case Intrinsic::x86_sse41_pmovsxbw: 00652 case Intrinsic::x86_sse41_pmovsxwd: 00653 case Intrinsic::x86_sse41_pmovsxdq: 00654 case Intrinsic::x86_sse41_pmovzxbw: 00655 case Intrinsic::x86_sse41_pmovzxwd: 00656 case Intrinsic::x86_sse41_pmovzxdq: { 00657 // pmov{s|z}x ignores the upper half of their input vectors. 00658 unsigned VWidth = 00659 cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); 00660 unsigned LowHalfElts = VWidth / 2; 00661 APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); 00662 APInt UndefElts(VWidth, 0); 00663 if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), 00664 InputDemandedElts, 00665 UndefElts)) { 00666 II->setArgOperand(0, TmpV); 00667 return II; 00668 } 00669 break; 00670 } 00671 00672 case Intrinsic::x86_sse4a_insertqi: { 00673 // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top 00674 // ones undef 00675 // TODO: eventually we should lower this intrinsic to IR 00676 if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) { 00677 if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { 00678 if (CIWidth->equalsInt(64) && CIStart->isZero()) { 00679 Value *Vec = II->getArgOperand(1); 00680 Value *Undef = UndefValue::get(Vec->getType()); 00681 const uint32_t Mask[] = { 0, 2 }; 00682 return ReplaceInstUsesWith( 00683 CI, 00684 Builder->CreateShuffleVector( 00685 Vec, Undef, ConstantDataVector::get( 00686 II->getContext(), makeArrayRef(Mask)))); 00687 00688 } else if (auto Source = 00689 dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 00690 if (Source->hasOneUse() && 00691 Source->getArgOperand(1) == II->getArgOperand(1)) { 00692 // If the source of the insert has only one use and it's another 00693 // insert (and they're both inserting from the same vector), try to 00694 // bundle both together. 00695 auto CISourceWidth = 00696 dyn_cast<ConstantInt>(Source->getArgOperand(2)); 00697 auto CISourceStart = 00698 dyn_cast<ConstantInt>(Source->getArgOperand(3)); 00699 if (CISourceStart && CISourceWidth) { 00700 unsigned Start = CIStart->getZExtValue(); 00701 unsigned Width = CIWidth->getZExtValue(); 00702 unsigned End = Start + Width; 00703 unsigned SourceStart = CISourceStart->getZExtValue(); 00704 unsigned SourceWidth = CISourceWidth->getZExtValue(); 00705 unsigned SourceEnd = SourceStart + SourceWidth; 00706 unsigned NewStart, NewWidth; 00707 bool ShouldReplace = false; 00708 if (Start <= SourceStart && SourceStart <= End) { 00709 NewStart = Start; 00710 NewWidth = std::max(End, SourceEnd) - NewStart; 00711 ShouldReplace = true; 00712 } else if (SourceStart <= Start && Start <= SourceEnd) { 00713 NewStart = SourceStart; 00714 NewWidth = std::max(SourceEnd, End) - NewStart; 00715 ShouldReplace = true; 00716 } 00717 00718 if (ShouldReplace) { 00719 Constant *ConstantWidth = ConstantInt::get( 00720 II->getArgOperand(2)->getType(), NewWidth, false); 00721 Constant *ConstantStart = ConstantInt::get( 00722 II->getArgOperand(3)->getType(), NewStart, false); 00723 Value *Args[4] = { Source->getArgOperand(0), 00724 II->getArgOperand(1), ConstantWidth, 00725 ConstantStart }; 00726 Module *M = CI.getParent()->getParent()->getParent(); 00727 Value *F = 00728 Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); 00729 return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); 00730 } 00731 } 00732 } 00733 } 00734 } 00735 } 00736 break; 00737 } 00738 00739 case Intrinsic::x86_sse41_pblendvb: 00740 case Intrinsic::x86_sse41_blendvps: 00741 case Intrinsic::x86_sse41_blendvpd: 00742 case Intrinsic::x86_avx_blendv_ps_256: 00743 case Intrinsic::x86_avx_blendv_pd_256: 00744 case Intrinsic::x86_avx2_pblendvb: { 00745 // Convert blendv* to vector selects if the mask is constant. 00746 // This optimization is convoluted because the intrinsic is defined as 00747 // getting a vector of floats or doubles for the ps and pd versions. 00748 // FIXME: That should be changed. 00749 Value *Mask = II->getArgOperand(2); 00750 if (auto C = dyn_cast<ConstantDataVector>(Mask)) { 00751 auto Tyi1 = Builder->getInt1Ty(); 00752 auto SelectorType = cast<VectorType>(Mask->getType()); 00753 auto EltTy = SelectorType->getElementType(); 00754 unsigned Size = SelectorType->getNumElements(); 00755 unsigned BitWidth = 00756 EltTy->isFloatTy() 00757 ? 32 00758 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); 00759 assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && 00760 "Wrong arguments for variable blend intrinsic"); 00761 SmallVector<Constant *, 32> Selectors; 00762 for (unsigned I = 0; I < Size; ++I) { 00763 // The intrinsics only read the top bit 00764 uint64_t Selector; 00765 if (BitWidth == 8) 00766 Selector = C->getElementAsInteger(I); 00767 else 00768 Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); 00769 Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); 00770 } 00771 auto NewSelector = ConstantVector::get(Selectors); 00772 return SelectInst::Create(NewSelector, II->getArgOperand(1), 00773 II->getArgOperand(0), "blendv"); 00774 } else { 00775 break; 00776 } 00777 } 00778 00779 case Intrinsic::x86_avx_vpermilvar_ps: 00780 case Intrinsic::x86_avx_vpermilvar_ps_256: 00781 case Intrinsic::x86_avx_vpermilvar_pd: 00782 case Intrinsic::x86_avx_vpermilvar_pd_256: { 00783 // Convert vpermil* to shufflevector if the mask is constant. 00784 Value *V = II->getArgOperand(1); 00785 unsigned Size = cast<VectorType>(V->getType())->getNumElements(); 00786 assert(Size == 8 || Size == 4 || Size == 2); 00787 uint32_t Indexes[8]; 00788 if (auto C = dyn_cast<ConstantDataVector>(V)) { 00789 // The intrinsics only read one or two bits, clear the rest. 00790 for (unsigned I = 0; I < Size; ++I) { 00791 uint32_t Index = C->getElementAsInteger(I) & 0x3; 00792 if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || 00793 II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) 00794 Index >>= 1; 00795 Indexes[I] = Index; 00796 } 00797 } else if (isa<ConstantAggregateZero>(V)) { 00798 for (unsigned I = 0; I < Size; ++I) 00799 Indexes[I] = 0; 00800 } else { 00801 break; 00802 } 00803 // The _256 variants are a bit trickier since the mask bits always index 00804 // into the corresponding 128 half. In order to convert to a generic 00805 // shuffle, we have to make that explicit. 00806 if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || 00807 II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { 00808 for (unsigned I = Size / 2; I < Size; ++I) 00809 Indexes[I] += Size / 2; 00810 } 00811 auto NewC = 00812 ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); 00813 auto V1 = II->getArgOperand(0); 00814 auto V2 = UndefValue::get(V1->getType()); 00815 auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); 00816 return ReplaceInstUsesWith(CI, Shuffle); 00817 } 00818 00819 case Intrinsic::ppc_altivec_vperm: 00820 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 00821 // Note that ppc_altivec_vperm has a big-endian bias, so when creating 00822 // a vectorshuffle for little endian, we must undo the transformation 00823 // performed on vec_perm in altivec.h. That is, we must complement 00824 // the permutation mask with respect to 31 and reverse the order of 00825 // V1 and V2. 00826 if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { 00827 assert(Mask->getType()->getVectorNumElements() == 16 && 00828 "Bad type for intrinsic!"); 00829 00830 // Check that all of the elements are integer constants or undefs. 00831 bool AllEltsOk = true; 00832 for (unsigned i = 0; i != 16; ++i) { 00833 Constant *Elt = Mask->getAggregateElement(i); 00834 if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { 00835 AllEltsOk = false; 00836 break; 00837 } 00838 } 00839 00840 if (AllEltsOk) { 00841 // Cast the input vectors to byte vectors. 00842 Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), 00843 Mask->getType()); 00844 Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), 00845 Mask->getType()); 00846 Value *Result = UndefValue::get(Op0->getType()); 00847 00848 // Only extract each element once. 00849 Value *ExtractedElts[32]; 00850 memset(ExtractedElts, 0, sizeof(ExtractedElts)); 00851 00852 for (unsigned i = 0; i != 16; ++i) { 00853 if (isa<UndefValue>(Mask->getAggregateElement(i))) 00854 continue; 00855 unsigned Idx = 00856 cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); 00857 Idx &= 31; // Match the hardware behavior. 00858 if (DL && DL->isLittleEndian()) 00859 Idx = 31 - Idx; 00860 00861 if (!ExtractedElts[Idx]) { 00862 Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; 00863 Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; 00864 ExtractedElts[Idx] = 00865 Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, 00866 Builder->getInt32(Idx&15)); 00867 } 00868 00869 // Insert this value into the result vector. 00870 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], 00871 Builder->getInt32(i)); 00872 } 00873 return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 00874 } 00875 } 00876 break; 00877 00878 case Intrinsic::arm_neon_vld1: 00879 case Intrinsic::arm_neon_vld2: 00880 case Intrinsic::arm_neon_vld3: 00881 case Intrinsic::arm_neon_vld4: 00882 case Intrinsic::arm_neon_vld2lane: 00883 case Intrinsic::arm_neon_vld3lane: 00884 case Intrinsic::arm_neon_vld4lane: 00885 case Intrinsic::arm_neon_vst1: 00886 case Intrinsic::arm_neon_vst2: 00887 case Intrinsic::arm_neon_vst3: 00888 case Intrinsic::arm_neon_vst4: 00889 case Intrinsic::arm_neon_vst2lane: 00890 case Intrinsic::arm_neon_vst3lane: 00891 case Intrinsic::arm_neon_vst4lane: { 00892 unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AT, II, DT); 00893 unsigned AlignArg = II->getNumArgOperands() - 1; 00894 ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); 00895 if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { 00896 II->setArgOperand(AlignArg, 00897 ConstantInt::get(Type::getInt32Ty(II->getContext()), 00898 MemAlign, false)); 00899 return II; 00900 } 00901 break; 00902 } 00903 00904 case Intrinsic::arm_neon_vmulls: 00905 case Intrinsic::arm_neon_vmullu: 00906 case Intrinsic::aarch64_neon_smull: 00907 case Intrinsic::aarch64_neon_umull: { 00908 Value *Arg0 = II->getArgOperand(0); 00909 Value *Arg1 = II->getArgOperand(1); 00910 00911 // Handle mul by zero first: 00912 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { 00913 return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); 00914 } 00915 00916 // Check for constant LHS & RHS - in this case we just simplify. 00917 bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || 00918 II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); 00919 VectorType *NewVT = cast<VectorType>(II->getType()); 00920 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { 00921 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { 00922 CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext); 00923 CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext); 00924 00925 return ReplaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1)); 00926 } 00927 00928 // Couldn't simplify - canonicalize constant to the RHS. 00929 std::swap(Arg0, Arg1); 00930 } 00931 00932 // Handle mul by one: 00933 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) 00934 if (ConstantInt *Splat = 00935 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) 00936 if (Splat->isOne()) 00937 return CastInst::CreateIntegerCast(Arg0, II->getType(), 00938 /*isSigned=*/!Zext); 00939 00940 break; 00941 } 00942 00943 case Intrinsic::AMDGPU_rcp: { 00944 if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) { 00945 const APFloat &ArgVal = C->getValueAPF(); 00946 APFloat Val(ArgVal.getSemantics(), 1.0); 00947 APFloat::opStatus Status = Val.divide(ArgVal, 00948 APFloat::rmNearestTiesToEven); 00949 // Only do this if it was exact and therefore not dependent on the 00950 // rounding mode. 00951 if (Status == APFloat::opOK) 00952 return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); 00953 } 00954 00955 break; 00956 } 00957 case Intrinsic::stackrestore: { 00958 // If the save is right next to the restore, remove the restore. This can 00959 // happen when variable allocas are DCE'd. 00960 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 00961 if (SS->getIntrinsicID() == Intrinsic::stacksave) { 00962 BasicBlock::iterator BI = SS; 00963 if (&*++BI == II) 00964 return EraseInstFromFunction(CI); 00965 } 00966 } 00967 00968 // Scan down this block to see if there is another stack restore in the 00969 // same block without an intervening call/alloca. 00970 BasicBlock::iterator BI = II; 00971 TerminatorInst *TI = II->getParent()->getTerminator(); 00972 bool CannotRemove = false; 00973 for (++BI; &*BI != TI; ++BI) { 00974 if (isa<AllocaInst>(BI)) { 00975 CannotRemove = true; 00976 break; 00977 } 00978 if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 00979 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { 00980 // If there is a stackrestore below this one, remove this one. 00981 if (II->getIntrinsicID() == Intrinsic::stackrestore) 00982 return EraseInstFromFunction(CI); 00983 // Otherwise, ignore the intrinsic. 00984 } else { 00985 // If we found a non-intrinsic call, we can't remove the stack 00986 // restore. 00987 CannotRemove = true; 00988 break; 00989 } 00990 } 00991 } 00992 00993 // If the stack restore is in a return, resume, or unwind block and if there 00994 // are no allocas or calls between the restore and the return, nuke the 00995 // restore. 00996 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) 00997 return EraseInstFromFunction(CI); 00998 break; 00999 } 01000 case Intrinsic::assume: { 01001 // Canonicalize assume(a && b) -> assume(a); assume(b); 01002 // Note: New assumption intrinsics created here are registered by 01003 // the InstCombineIRInserter object. 01004 Value *IIOperand = II->getArgOperand(0), *A, *B, 01005 *AssumeIntrinsic = II->getCalledValue(); 01006 if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { 01007 Builder->CreateCall(AssumeIntrinsic, A, II->getName()); 01008 Builder->CreateCall(AssumeIntrinsic, B, II->getName()); 01009 return EraseInstFromFunction(*II); 01010 } 01011 // assume(!(a || b)) -> assume(!a); assume(!b); 01012 if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { 01013 Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A), 01014 II->getName()); 01015 Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B), 01016 II->getName()); 01017 return EraseInstFromFunction(*II); 01018 } 01019 break; 01020 } 01021 } 01022 01023 return visitCallSite(II); 01024 } 01025 01026 // InvokeInst simplification 01027 // 01028 Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 01029 return visitCallSite(&II); 01030 } 01031 01032 /// isSafeToEliminateVarargsCast - If this cast does not affect the value 01033 /// passed through the varargs area, we can eliminate the use of the cast. 01034 static bool isSafeToEliminateVarargsCast(const CallSite CS, 01035 const CastInst * const CI, 01036 const DataLayout * const DL, 01037 const int ix) { 01038 if (!CI->isLosslessCast()) 01039 return false; 01040 01041 // The size of ByVal or InAlloca arguments is derived from the type, so we 01042 // can't change to a type with a different size. If the size were 01043 // passed explicitly we could avoid this check. 01044 if (!CS.isByValOrInAllocaArgument(ix)) 01045 return true; 01046 01047 Type* SrcTy = 01048 cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 01049 Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); 01050 if (!SrcTy->isSized() || !DstTy->isSized()) 01051 return false; 01052 if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy)) 01053 return false; 01054 return true; 01055 } 01056 01057 // Try to fold some different type of calls here. 01058 // Currently we're only working with the checking functions, memcpy_chk, 01059 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, 01060 // strcat_chk and strncat_chk. 01061 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { 01062 if (!CI->getCalledFunction()) return nullptr; 01063 01064 if (Value *With = Simplifier->optimizeCall(CI)) { 01065 ++NumSimplified; 01066 return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); 01067 } 01068 01069 return nullptr; 01070 } 01071 01072 static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { 01073 // Strip off at most one level of pointer casts, looking for an alloca. This 01074 // is good enough in practice and simpler than handling any number of casts. 01075 Value *Underlying = TrampMem->stripPointerCasts(); 01076 if (Underlying != TrampMem && 01077 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) 01078 return nullptr; 01079 if (!isa<AllocaInst>(Underlying)) 01080 return nullptr; 01081 01082 IntrinsicInst *InitTrampoline = nullptr; 01083 for (User *U : TrampMem->users()) { 01084 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 01085 if (!II) 01086 return nullptr; 01087 if (II->getIntrinsicID() == Intrinsic::init_trampoline) { 01088 if (InitTrampoline) 01089 // More than one init_trampoline writes to this value. Give up. 01090 return nullptr; 01091 InitTrampoline = II; 01092 continue; 01093 } 01094 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) 01095 // Allow any number of calls to adjust.trampoline. 01096 continue; 01097 return nullptr; 01098 } 01099 01100 // No call to init.trampoline found. 01101 if (!InitTrampoline) 01102 return nullptr; 01103 01104 // Check that the alloca is being used in the expected way. 01105 if (InitTrampoline->getOperand(0) != TrampMem) 01106 return nullptr; 01107 01108 return InitTrampoline; 01109 } 01110 01111 static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, 01112 Value *TrampMem) { 01113 // Visit all the previous instructions in the basic block, and try to find a 01114 // init.trampoline which has a direct path to the adjust.trampoline. 01115 for (BasicBlock::iterator I = AdjustTramp, 01116 E = AdjustTramp->getParent()->begin(); I != E; ) { 01117 Instruction *Inst = --I; 01118 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 01119 if (II->getIntrinsicID() == Intrinsic::init_trampoline && 01120 II->getOperand(0) == TrampMem) 01121 return II; 01122 if (Inst->mayWriteToMemory()) 01123 return nullptr; 01124 } 01125 return nullptr; 01126 } 01127 01128 // Given a call to llvm.adjust.trampoline, find and return the corresponding 01129 // call to llvm.init.trampoline if the call to the trampoline can be optimized 01130 // to a direct call to a function. Otherwise return NULL. 01131 // 01132 static IntrinsicInst *FindInitTrampoline(Value *Callee) { 01133 Callee = Callee->stripPointerCasts(); 01134 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); 01135 if (!AdjustTramp || 01136 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) 01137 return nullptr; 01138 01139 Value *TrampMem = AdjustTramp->getOperand(0); 01140 01141 if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) 01142 return IT; 01143 if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) 01144 return IT; 01145 return nullptr; 01146 } 01147 01148 // visitCallSite - Improvements for call and invoke instructions. 01149 // 01150 Instruction *InstCombiner::visitCallSite(CallSite CS) { 01151 if (isAllocLikeFn(CS.getInstruction(), TLI)) 01152 return visitAllocSite(*CS.getInstruction()); 01153 01154 bool Changed = false; 01155 01156 // If the callee is a pointer to a function, attempt to move any casts to the 01157 // arguments of the call/invoke. 01158 Value *Callee = CS.getCalledValue(); 01159 if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) 01160 return nullptr; 01161 01162 if (Function *CalleeF = dyn_cast<Function>(Callee)) 01163 // If the call and callee calling conventions don't match, this call must 01164 // be unreachable, as the call is undefined. 01165 if (CalleeF->getCallingConv() != CS.getCallingConv() && 01166 // Only do this for calls to a function with a body. A prototype may 01167 // not actually end up matching the implementation's calling conv for a 01168 // variety of reasons (e.g. it may be written in assembly). 01169 !CalleeF->isDeclaration()) { 01170 Instruction *OldCall = CS.getInstruction(); 01171 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 01172 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 01173 OldCall); 01174 // If OldCall does not return void then replaceAllUsesWith undef. 01175 // This allows ValueHandlers and custom metadata to adjust itself. 01176 if (!OldCall->getType()->isVoidTy()) 01177 ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); 01178 if (isa<CallInst>(OldCall)) 01179 return EraseInstFromFunction(*OldCall); 01180 01181 // We cannot remove an invoke, because it would change the CFG, just 01182 // change the callee to a null pointer. 01183 cast<InvokeInst>(OldCall)->setCalledFunction( 01184 Constant::getNullValue(CalleeF->getType())); 01185 return nullptr; 01186 } 01187 01188 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { 01189 // If CS does not return void then replaceAllUsesWith undef. 01190 // This allows ValueHandlers and custom metadata to adjust itself. 01191 if (!CS.getInstruction()->getType()->isVoidTy()) 01192 ReplaceInstUsesWith(*CS.getInstruction(), 01193 UndefValue::get(CS.getInstruction()->getType())); 01194 01195 if (isa<InvokeInst>(CS.getInstruction())) { 01196 // Can't remove an invoke because we cannot change the CFG. 01197 return nullptr; 01198 } 01199 01200 // This instruction is not reachable, just remove it. We insert a store to 01201 // undef so that we know that this code is not reachable, despite the fact 01202 // that we can't modify the CFG here. 01203 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 01204 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 01205 CS.getInstruction()); 01206 01207 return EraseInstFromFunction(*CS.getInstruction()); 01208 } 01209 01210 if (IntrinsicInst *II = FindInitTrampoline(Callee)) 01211 return transformCallThroughTrampoline(CS, II); 01212 01213 PointerType *PTy = cast<PointerType>(Callee->getType()); 01214 FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 01215 if (FTy->isVarArg()) { 01216 int ix = FTy->getNumParams(); 01217 // See if we can optimize any arguments passed through the varargs area of 01218 // the call. 01219 for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), 01220 E = CS.arg_end(); I != E; ++I, ++ix) { 01221 CastInst *CI = dyn_cast<CastInst>(*I); 01222 if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) { 01223 *I = CI->getOperand(0); 01224 Changed = true; 01225 } 01226 } 01227 } 01228 01229 if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { 01230 // Inline asm calls cannot throw - mark them 'nounwind'. 01231 CS.setDoesNotThrow(); 01232 Changed = true; 01233 } 01234 01235 // Try to optimize the call if possible, we require DataLayout for most of 01236 // this. None of these calls are seen as possibly dead so go ahead and 01237 // delete the instruction now. 01238 if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { 01239 Instruction *I = tryOptimizeCall(CI, DL); 01240 // If we changed something return the result, etc. Otherwise let 01241 // the fallthrough check. 01242 if (I) return EraseInstFromFunction(*I); 01243 } 01244 01245 return Changed ? CS.getInstruction() : nullptr; 01246 } 01247 01248 // transformConstExprCastCall - If the callee is a constexpr cast of a function, 01249 // attempt to move the cast to the arguments of the call/invoke. 01250 // 01251 bool InstCombiner::transformConstExprCastCall(CallSite CS) { 01252 Function *Callee = 01253 dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); 01254 if (!Callee) 01255 return false; 01256 Instruction *Caller = CS.getInstruction(); 01257 const AttributeSet &CallerPAL = CS.getAttributes(); 01258 01259 // Okay, this is a cast from a function to a different type. Unless doing so 01260 // would cause a type conversion of one of our arguments, change this call to 01261 // be a direct call with arguments casted to the appropriate types. 01262 // 01263 FunctionType *FT = Callee->getFunctionType(); 01264 Type *OldRetTy = Caller->getType(); 01265 Type *NewRetTy = FT->getReturnType(); 01266 01267 // Check to see if we are changing the return type... 01268 if (OldRetTy != NewRetTy) { 01269 01270 if (NewRetTy->isStructTy()) 01271 return false; // TODO: Handle multiple return values. 01272 01273 if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) { 01274 if (Callee->isDeclaration()) 01275 return false; // Cannot transform this return value. 01276 01277 if (!Caller->use_empty() && 01278 // void -> non-void is handled specially 01279 !NewRetTy->isVoidTy()) 01280 return false; // Cannot transform this return value. 01281 } 01282 01283 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 01284 AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); 01285 if (RAttrs. 01286 hasAttributes(AttributeFuncs:: 01287 typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), 01288 AttributeSet::ReturnIndex)) 01289 return false; // Attribute not compatible with transformed value. 01290 } 01291 01292 // If the callsite is an invoke instruction, and the return value is used by 01293 // a PHI node in a successor, we cannot change the return type of the call 01294 // because there is no place to put the cast instruction (without breaking 01295 // the critical edge). Bail out in this case. 01296 if (!Caller->use_empty()) 01297 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 01298 for (User *U : II->users()) 01299 if (PHINode *PN = dyn_cast<PHINode>(U)) 01300 if (PN->getParent() == II->getNormalDest() || 01301 PN->getParent() == II->getUnwindDest()) 01302 return false; 01303 } 01304 01305 unsigned NumActualArgs = CS.arg_size(); 01306 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 01307 01308 CallSite::arg_iterator AI = CS.arg_begin(); 01309 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 01310 Type *ParamTy = FT->getParamType(i); 01311 Type *ActTy = (*AI)->getType(); 01312 01313 if (!CastInst::isBitCastable(ActTy, ParamTy)) 01314 return false; // Cannot transform this parameter value. 01315 01316 if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). 01317 hasAttributes(AttributeFuncs:: 01318 typeIncompatible(ParamTy, i + 1), i + 1)) 01319 return false; // Attribute not compatible with transformed value. 01320 01321 if (CS.isInAllocaArgument(i)) 01322 return false; // Cannot transform to and from inalloca. 01323 01324 // If the parameter is passed as a byval argument, then we have to have a 01325 // sized type and the sized type has to have the same size as the old type. 01326 if (ParamTy != ActTy && 01327 CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, 01328 Attribute::ByVal)) { 01329 PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); 01330 if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) 01331 return false; 01332 01333 Type *CurElTy = ActTy->getPointerElementType(); 01334 if (DL->getTypeAllocSize(CurElTy) != 01335 DL->getTypeAllocSize(ParamPTy->getElementType())) 01336 return false; 01337 } 01338 } 01339 01340 if (Callee->isDeclaration()) { 01341 // Do not delete arguments unless we have a function body. 01342 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) 01343 return false; 01344 01345 // If the callee is just a declaration, don't change the varargsness of the 01346 // call. We don't want to introduce a varargs call where one doesn't 01347 // already exist. 01348 PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); 01349 if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) 01350 return false; 01351 01352 // If both the callee and the cast type are varargs, we still have to make 01353 // sure the number of fixed parameters are the same or we have the same 01354 // ABI issues as if we introduce a varargs call. 01355 if (FT->isVarArg() && 01356 cast<FunctionType>(APTy->getElementType())->isVarArg() && 01357 FT->getNumParams() != 01358 cast<FunctionType>(APTy->getElementType())->getNumParams()) 01359 return false; 01360 } 01361 01362 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 01363 !CallerPAL.isEmpty()) 01364 // In this case we have more arguments than the new function type, but we 01365 // won't be dropping them. Check that these extra arguments have attributes 01366 // that are compatible with being a vararg call argument. 01367 for (unsigned i = CallerPAL.getNumSlots(); i; --i) { 01368 unsigned Index = CallerPAL.getSlotIndex(i - 1); 01369 if (Index <= FT->getNumParams()) 01370 break; 01371 01372 // Check if it has an attribute that's incompatible with varargs. 01373 AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); 01374 if (PAttrs.hasAttribute(Index, Attribute::StructRet)) 01375 return false; 01376 } 01377 01378 01379 // Okay, we decided that this is a safe thing to do: go ahead and start 01380 // inserting cast instructions as necessary. 01381 std::vector<Value*> Args; 01382 Args.reserve(NumActualArgs); 01383 SmallVector<AttributeSet, 8> attrVec; 01384 attrVec.reserve(NumCommonArgs); 01385 01386 // Get any return attributes. 01387 AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); 01388 01389 // If the return value is not being used, the type may not be compatible 01390 // with the existing attributes. Wipe out any problematic attributes. 01391 RAttrs. 01392 removeAttributes(AttributeFuncs:: 01393 typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), 01394 AttributeSet::ReturnIndex); 01395 01396 // Add the new return attributes. 01397 if (RAttrs.hasAttributes()) 01398 attrVec.push_back(AttributeSet::get(Caller->getContext(), 01399 AttributeSet::ReturnIndex, RAttrs)); 01400 01401 AI = CS.arg_begin(); 01402 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 01403 Type *ParamTy = FT->getParamType(i); 01404 01405 if ((*AI)->getType() == ParamTy) { 01406 Args.push_back(*AI); 01407 } else { 01408 Args.push_back(Builder->CreateBitCast(*AI, ParamTy)); 01409 } 01410 01411 // Add any parameter attributes. 01412 AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); 01413 if (PAttrs.hasAttributes()) 01414 attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, 01415 PAttrs)); 01416 } 01417 01418 // If the function takes more arguments than the call was taking, add them 01419 // now. 01420 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) 01421 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 01422 01423 // If we are removing arguments to the function, emit an obnoxious warning. 01424 if (FT->getNumParams() < NumActualArgs) { 01425 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 01426 if (FT->isVarArg()) { 01427 // Add all of the arguments in their promoted form to the arg list. 01428 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 01429 Type *PTy = getPromotedType((*AI)->getType()); 01430 if (PTy != (*AI)->getType()) { 01431 // Must promote to pass through va_arg area! 01432 Instruction::CastOps opcode = 01433 CastInst::getCastOpcode(*AI, false, PTy, false); 01434 Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); 01435 } else { 01436 Args.push_back(*AI); 01437 } 01438 01439 // Add any parameter attributes. 01440 AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); 01441 if (PAttrs.hasAttributes()) 01442 attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, 01443 PAttrs)); 01444 } 01445 } 01446 } 01447 01448 AttributeSet FnAttrs = CallerPAL.getFnAttributes(); 01449 if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) 01450 attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); 01451 01452 if (NewRetTy->isVoidTy()) 01453 Caller->setName(""); // Void type should not have a name. 01454 01455 const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), 01456 attrVec); 01457 01458 Instruction *NC; 01459 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 01460 NC = Builder->CreateInvoke(Callee, II->getNormalDest(), 01461 II->getUnwindDest(), Args); 01462 NC->takeName(II); 01463 cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); 01464 cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); 01465 } else { 01466 CallInst *CI = cast<CallInst>(Caller); 01467 NC = Builder->CreateCall(Callee, Args); 01468 NC->takeName(CI); 01469 if (CI->isTailCall()) 01470 cast<CallInst>(NC)->setTailCall(); 01471 cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); 01472 cast<CallInst>(NC)->setAttributes(NewCallerPAL); 01473 } 01474 01475 // Insert a cast of the return type as necessary. 01476 Value *NV = NC; 01477 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 01478 if (!NV->getType()->isVoidTy()) { 01479 NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy); 01480 NC->setDebugLoc(Caller->getDebugLoc()); 01481 01482 // If this is an invoke instruction, we should insert it after the first 01483 // non-phi, instruction in the normal successor block. 01484 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 01485 BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); 01486 InsertNewInstBefore(NC, *I); 01487 } else { 01488 // Otherwise, it's a call, just insert cast right after the call. 01489 InsertNewInstBefore(NC, *Caller); 01490 } 01491 Worklist.AddUsersToWorkList(*Caller); 01492 } else { 01493 NV = UndefValue::get(Caller->getType()); 01494 } 01495 } 01496 01497 if (!Caller->use_empty()) 01498 ReplaceInstUsesWith(*Caller, NV); 01499 else if (Caller->hasValueHandle()) 01500 ValueHandleBase::ValueIsRAUWd(Caller, NV); 01501 01502 EraseInstFromFunction(*Caller); 01503 return true; 01504 } 01505 01506 // transformCallThroughTrampoline - Turn a call to a function created by 01507 // init_trampoline / adjust_trampoline intrinsic pair into a direct call to the 01508 // underlying function. 01509 // 01510 Instruction * 01511 InstCombiner::transformCallThroughTrampoline(CallSite CS, 01512 IntrinsicInst *Tramp) { 01513 Value *Callee = CS.getCalledValue(); 01514 PointerType *PTy = cast<PointerType>(Callee->getType()); 01515 FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 01516 const AttributeSet &Attrs = CS.getAttributes(); 01517 01518 // If the call already has the 'nest' attribute somewhere then give up - 01519 // otherwise 'nest' would occur twice after splicing in the chain. 01520 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 01521 return nullptr; 01522 01523 assert(Tramp && 01524 "transformCallThroughTrampoline called with incorrect CallSite."); 01525 01526 Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); 01527 PointerType *NestFPTy = cast<PointerType>(NestF->getType()); 01528 FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); 01529 01530 const AttributeSet &NestAttrs = NestF->getAttributes(); 01531 if (!NestAttrs.isEmpty()) { 01532 unsigned NestIdx = 1; 01533 Type *NestTy = nullptr; 01534 AttributeSet NestAttr; 01535 01536 // Look for a parameter marked with the 'nest' attribute. 01537 for (FunctionType::param_iterator I = NestFTy->param_begin(), 01538 E = NestFTy->param_end(); I != E; ++NestIdx, ++I) 01539 if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { 01540 // Record the parameter type and any other attributes. 01541 NestTy = *I; 01542 NestAttr = NestAttrs.getParamAttributes(NestIdx); 01543 break; 01544 } 01545 01546 if (NestTy) { 01547 Instruction *Caller = CS.getInstruction(); 01548 std::vector<Value*> NewArgs; 01549 NewArgs.reserve(CS.arg_size() + 1); 01550 01551 SmallVector<AttributeSet, 8> NewAttrs; 01552 NewAttrs.reserve(Attrs.getNumSlots() + 1); 01553 01554 // Insert the nest argument into the call argument list, which may 01555 // mean appending it. Likewise for attributes. 01556 01557 // Add any result attributes. 01558 if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) 01559 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 01560 Attrs.getRetAttributes())); 01561 01562 { 01563 unsigned Idx = 1; 01564 CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 01565 do { 01566 if (Idx == NestIdx) { 01567 // Add the chain argument and attributes. 01568 Value *NestVal = Tramp->getArgOperand(2); 01569 if (NestVal->getType() != NestTy) 01570 NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); 01571 NewArgs.push_back(NestVal); 01572 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 01573 NestAttr)); 01574 } 01575 01576 if (I == E) 01577 break; 01578 01579 // Add the original argument and attributes. 01580 NewArgs.push_back(*I); 01581 AttributeSet Attr = Attrs.getParamAttributes(Idx); 01582 if (Attr.hasAttributes(Idx)) { 01583 AttrBuilder B(Attr, Idx); 01584 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 01585 Idx + (Idx >= NestIdx), B)); 01586 } 01587 01588 ++Idx, ++I; 01589 } while (1); 01590 } 01591 01592 // Add any function attributes. 01593 if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) 01594 NewAttrs.push_back(AttributeSet::get(FTy->getContext(), 01595 Attrs.getFnAttributes())); 01596 01597 // The trampoline may have been bitcast to a bogus type (FTy). 01598 // Handle this by synthesizing a new function type, equal to FTy 01599 // with the chain parameter inserted. 01600 01601 std::vector<Type*> NewTypes; 01602 NewTypes.reserve(FTy->getNumParams()+1); 01603 01604 // Insert the chain's type into the list of parameter types, which may 01605 // mean appending it. 01606 { 01607 unsigned Idx = 1; 01608 FunctionType::param_iterator I = FTy->param_begin(), 01609 E = FTy->param_end(); 01610 01611 do { 01612 if (Idx == NestIdx) 01613 // Add the chain's type. 01614 NewTypes.push_back(NestTy); 01615 01616 if (I == E) 01617 break; 01618 01619 // Add the original type. 01620 NewTypes.push_back(*I); 01621 01622 ++Idx, ++I; 01623 } while (1); 01624 } 01625 01626 // Replace the trampoline call with a direct call. Let the generic 01627 // code sort out any function type mismatches. 01628 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 01629 FTy->isVarArg()); 01630 Constant *NewCallee = 01631 NestF->getType() == PointerType::getUnqual(NewFTy) ? 01632 NestF : ConstantExpr::getBitCast(NestF, 01633 PointerType::getUnqual(NewFTy)); 01634 const AttributeSet &NewPAL = 01635 AttributeSet::get(FTy->getContext(), NewAttrs); 01636 01637 Instruction *NewCaller; 01638 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 01639 NewCaller = InvokeInst::Create(NewCallee, 01640 II->getNormalDest(), II->getUnwindDest(), 01641 NewArgs); 01642 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 01643 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 01644 } else { 01645 NewCaller = CallInst::Create(NewCallee, NewArgs); 01646 if (cast<CallInst>(Caller)->isTailCall()) 01647 cast<CallInst>(NewCaller)->setTailCall(); 01648 cast<CallInst>(NewCaller)-> 01649 setCallingConv(cast<CallInst>(Caller)->getCallingConv()); 01650 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 01651 } 01652 01653 return NewCaller; 01654 } 01655 } 01656 01657 // Replace the trampoline call with a direct call. Since there is no 'nest' 01658 // parameter, there is no need to adjust the argument list. Let the generic 01659 // code sort out any function type mismatches. 01660 Constant *NewCallee = 01661 NestF->getType() == PTy ? NestF : 01662 ConstantExpr::getBitCast(NestF, PTy); 01663 CS.setCalledFunction(NewCallee); 01664 return CS.getInstruction(); 01665 }