LLVM API Documentation
00001 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the visit functions for load, store and alloca. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "InstCombine.h" 00015 #include "llvm/ADT/Statistic.h" 00016 #include "llvm/Analysis/Loads.h" 00017 #include "llvm/IR/DataLayout.h" 00018 #include "llvm/IR/IntrinsicInst.h" 00019 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 00020 #include "llvm/Transforms/Utils/Local.h" 00021 using namespace llvm; 00022 00023 #define DEBUG_TYPE "instcombine" 00024 00025 STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 00026 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 00027 00028 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 00029 /// some part of a constant global variable. This intentionally only accepts 00030 /// constant expressions because we can't rewrite arbitrary instructions. 00031 static bool pointsToConstantGlobal(Value *V) { 00032 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 00033 return GV->isConstant(); 00034 00035 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 00036 if (CE->getOpcode() == Instruction::BitCast || 00037 CE->getOpcode() == Instruction::AddrSpaceCast || 00038 CE->getOpcode() == Instruction::GetElementPtr) 00039 return pointsToConstantGlobal(CE->getOperand(0)); 00040 } 00041 return false; 00042 } 00043 00044 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 00045 /// pointer to an alloca. Ignore any reads of the pointer, return false if we 00046 /// see any stores or other unknown uses. If we see pointer arithmetic, keep 00047 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse 00048 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 00049 /// the alloca, and if the source pointer is a pointer to a constant global, we 00050 /// can optimize this. 00051 static bool 00052 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 00053 SmallVectorImpl<Instruction *> &ToDelete) { 00054 // We track lifetime intrinsics as we encounter them. If we decide to go 00055 // ahead and replace the value with the global, this lets the caller quickly 00056 // eliminate the markers. 00057 00058 SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; 00059 ValuesToInspect.push_back(std::make_pair(V, false)); 00060 while (!ValuesToInspect.empty()) { 00061 auto ValuePair = ValuesToInspect.pop_back_val(); 00062 const bool IsOffset = ValuePair.second; 00063 for (auto &U : ValuePair.first->uses()) { 00064 Instruction *I = cast<Instruction>(U.getUser()); 00065 00066 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 00067 // Ignore non-volatile loads, they are always ok. 00068 if (!LI->isSimple()) return false; 00069 continue; 00070 } 00071 00072 if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { 00073 // If uses of the bitcast are ok, we are ok. 00074 ValuesToInspect.push_back(std::make_pair(I, IsOffset)); 00075 continue; 00076 } 00077 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 00078 // If the GEP has all zero indices, it doesn't offset the pointer. If it 00079 // doesn't, it does. 00080 ValuesToInspect.push_back( 00081 std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); 00082 continue; 00083 } 00084 00085 if (CallSite CS = I) { 00086 // If this is the function being called then we treat it like a load and 00087 // ignore it. 00088 if (CS.isCallee(&U)) 00089 continue; 00090 00091 // Inalloca arguments are clobbered by the call. 00092 unsigned ArgNo = CS.getArgumentNo(&U); 00093 if (CS.isInAllocaArgument(ArgNo)) 00094 return false; 00095 00096 // If this is a readonly/readnone call site, then we know it is just a 00097 // load (but one that potentially returns the value itself), so we can 00098 // ignore it if we know that the value isn't captured. 00099 if (CS.onlyReadsMemory() && 00100 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 00101 continue; 00102 00103 // If this is being passed as a byval argument, the caller is making a 00104 // copy, so it is only a read of the alloca. 00105 if (CS.isByValArgument(ArgNo)) 00106 continue; 00107 } 00108 00109 // Lifetime intrinsics can be handled by the caller. 00110 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 00111 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 00112 II->getIntrinsicID() == Intrinsic::lifetime_end) { 00113 assert(II->use_empty() && "Lifetime markers have no result to use!"); 00114 ToDelete.push_back(II); 00115 continue; 00116 } 00117 } 00118 00119 // If this is isn't our memcpy/memmove, reject it as something we can't 00120 // handle. 00121 MemTransferInst *MI = dyn_cast<MemTransferInst>(I); 00122 if (!MI) 00123 return false; 00124 00125 // If the transfer is using the alloca as a source of the transfer, then 00126 // ignore it since it is a load (unless the transfer is volatile). 00127 if (U.getOperandNo() == 1) { 00128 if (MI->isVolatile()) return false; 00129 continue; 00130 } 00131 00132 // If we already have seen a copy, reject the second one. 00133 if (TheCopy) return false; 00134 00135 // If the pointer has been offset from the start of the alloca, we can't 00136 // safely handle this. 00137 if (IsOffset) return false; 00138 00139 // If the memintrinsic isn't using the alloca as the dest, reject it. 00140 if (U.getOperandNo() != 0) return false; 00141 00142 // If the source of the memcpy/move is not a constant global, reject it. 00143 if (!pointsToConstantGlobal(MI->getSource())) 00144 return false; 00145 00146 // Otherwise, the transform is safe. Remember the copy instruction. 00147 TheCopy = MI; 00148 } 00149 } 00150 return true; 00151 } 00152 00153 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 00154 /// modified by a copy from a constant global. If we can prove this, we can 00155 /// replace any uses of the alloca with uses of the global directly. 00156 static MemTransferInst * 00157 isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 00158 SmallVectorImpl<Instruction *> &ToDelete) { 00159 MemTransferInst *TheCopy = nullptr; 00160 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 00161 return TheCopy; 00162 return nullptr; 00163 } 00164 00165 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 00166 // Ensure that the alloca array size argument has type intptr_t, so that 00167 // any casting is exposed early. 00168 if (DL) { 00169 Type *IntPtrTy = DL->getIntPtrType(AI.getType()); 00170 if (AI.getArraySize()->getType() != IntPtrTy) { 00171 Value *V = Builder->CreateIntCast(AI.getArraySize(), 00172 IntPtrTy, false); 00173 AI.setOperand(0, V); 00174 return &AI; 00175 } 00176 } 00177 00178 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 00179 if (AI.isArrayAllocation()) { // Check C != 1 00180 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 00181 Type *NewTy = 00182 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 00183 AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); 00184 New->setAlignment(AI.getAlignment()); 00185 00186 // Scan to the end of the allocation instructions, to skip over a block of 00187 // allocas if possible...also skip interleaved debug info 00188 // 00189 BasicBlock::iterator It = New; 00190 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 00191 00192 // Now that I is pointing to the first non-allocation-inst in the block, 00193 // insert our getelementptr instruction... 00194 // 00195 Type *IdxTy = DL 00196 ? DL->getIntPtrType(AI.getType()) 00197 : Type::getInt64Ty(AI.getContext()); 00198 Value *NullIdx = Constant::getNullValue(IdxTy); 00199 Value *Idx[2] = { NullIdx, NullIdx }; 00200 Instruction *GEP = 00201 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 00202 InsertNewInstBefore(GEP, *It); 00203 00204 // Now make everything use the getelementptr instead of the original 00205 // allocation. 00206 return ReplaceInstUsesWith(AI, GEP); 00207 } else if (isa<UndefValue>(AI.getArraySize())) { 00208 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 00209 } 00210 } 00211 00212 if (DL && AI.getAllocatedType()->isSized()) { 00213 // If the alignment is 0 (unspecified), assign it the preferred alignment. 00214 if (AI.getAlignment() == 0) 00215 AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); 00216 00217 // Move all alloca's of zero byte objects to the entry block and merge them 00218 // together. Note that we only do this for alloca's, because malloc should 00219 // allocate and return a unique pointer, even for a zero byte allocation. 00220 if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { 00221 // For a zero sized alloca there is no point in doing an array allocation. 00222 // This is helpful if the array size is a complicated expression not used 00223 // elsewhere. 00224 if (AI.isArrayAllocation()) { 00225 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 00226 return &AI; 00227 } 00228 00229 // Get the first instruction in the entry block. 00230 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 00231 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 00232 if (FirstInst != &AI) { 00233 // If the entry block doesn't start with a zero-size alloca then move 00234 // this one to the start of the entry block. There is no problem with 00235 // dominance as the array size was forced to a constant earlier already. 00236 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 00237 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 00238 DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 00239 AI.moveBefore(FirstInst); 00240 return &AI; 00241 } 00242 00243 // If the alignment of the entry block alloca is 0 (unspecified), 00244 // assign it the preferred alignment. 00245 if (EntryAI->getAlignment() == 0) 00246 EntryAI->setAlignment( 00247 DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); 00248 // Replace this zero-sized alloca with the one at the start of the entry 00249 // block after ensuring that the address will be aligned enough for both 00250 // types. 00251 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 00252 AI.getAlignment()); 00253 EntryAI->setAlignment(MaxAlign); 00254 if (AI.getType() != EntryAI->getType()) 00255 return new BitCastInst(EntryAI, AI.getType()); 00256 return ReplaceInstUsesWith(AI, EntryAI); 00257 } 00258 } 00259 } 00260 00261 if (AI.getAlignment()) { 00262 // Check to see if this allocation is only modified by a memcpy/memmove from 00263 // a constant global whose alignment is equal to or exceeds that of the 00264 // allocation. If this is the case, we can change all users to use 00265 // the constant global instead. This is commonly produced by the CFE by 00266 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 00267 // is only subsequently read. 00268 SmallVector<Instruction *, 4> ToDelete; 00269 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 00270 unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 00271 AI.getAlignment(), 00272 DL, AT, &AI, DT); 00273 if (AI.getAlignment() <= SourceAlign) { 00274 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 00275 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 00276 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 00277 EraseInstFromFunction(*ToDelete[i]); 00278 Constant *TheSrc = cast<Constant>(Copy->getSource()); 00279 Constant *Cast 00280 = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 00281 Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 00282 EraseInstFromFunction(*Copy); 00283 ++NumGlobalCopies; 00284 return NewI; 00285 } 00286 } 00287 } 00288 00289 // At last, use the generic allocation site handler to aggressively remove 00290 // unused allocas. 00291 return visitAllocSite(AI); 00292 } 00293 00294 00295 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 00296 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 00297 const DataLayout *DL) { 00298 User *CI = cast<User>(LI.getOperand(0)); 00299 Value *CastOp = CI->getOperand(0); 00300 00301 PointerType *DestTy = cast<PointerType>(CI->getType()); 00302 Type *DestPTy = DestTy->getElementType(); 00303 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 00304 00305 // If the address spaces don't match, don't eliminate the cast. 00306 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 00307 return nullptr; 00308 00309 Type *SrcPTy = SrcTy->getElementType(); 00310 00311 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 00312 DestPTy->isVectorTy()) { 00313 // If the source is an array, the code below will not succeed. Check to 00314 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 00315 // constants. 00316 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 00317 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 00318 if (ASrcTy->getNumElements() != 0) { 00319 Type *IdxTy = DL 00320 ? DL->getIntPtrType(SrcTy) 00321 : Type::getInt64Ty(SrcTy->getContext()); 00322 Value *Idx = Constant::getNullValue(IdxTy); 00323 Value *Idxs[2] = { Idx, Idx }; 00324 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 00325 SrcTy = cast<PointerType>(CastOp->getType()); 00326 SrcPTy = SrcTy->getElementType(); 00327 } 00328 00329 if (IC.getDataLayout() && 00330 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 00331 SrcPTy->isVectorTy()) && 00332 // Do not allow turning this into a load of an integer, which is then 00333 // casted to a pointer, this pessimizes pointer analysis a lot. 00334 (SrcPTy->isPtrOrPtrVectorTy() == 00335 LI.getType()->isPtrOrPtrVectorTy()) && 00336 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 00337 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 00338 00339 // Okay, we are casting from one integer or pointer type to another of 00340 // the same size. Instead of casting the pointer before the load, cast 00341 // the result of the loaded value. 00342 LoadInst *NewLoad = 00343 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 00344 NewLoad->setAlignment(LI.getAlignment()); 00345 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 00346 // Now cast the result of the load. 00347 PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType()); 00348 PointerType *NewTy = dyn_cast<PointerType>(LI.getType()); 00349 if (OldTy && NewTy && 00350 OldTy->getAddressSpace() != NewTy->getAddressSpace()) { 00351 return new AddrSpaceCastInst(NewLoad, LI.getType()); 00352 } 00353 00354 return new BitCastInst(NewLoad, LI.getType()); 00355 } 00356 } 00357 } 00358 return nullptr; 00359 } 00360 00361 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 00362 Value *Op = LI.getOperand(0); 00363 00364 // Attempt to improve the alignment. 00365 if (DL) { 00366 unsigned KnownAlign = 00367 getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()), 00368 DL, AT, &LI, DT); 00369 unsigned LoadAlign = LI.getAlignment(); 00370 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 00371 DL->getABITypeAlignment(LI.getType()); 00372 00373 if (KnownAlign > EffectiveLoadAlign) 00374 LI.setAlignment(KnownAlign); 00375 else if (LoadAlign == 0) 00376 LI.setAlignment(EffectiveLoadAlign); 00377 } 00378 00379 // load (cast X) --> cast (load X) iff safe. 00380 if (isa<CastInst>(Op)) 00381 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 00382 return Res; 00383 00384 // None of the following transforms are legal for volatile/atomic loads. 00385 // FIXME: Some of it is okay for atomic loads; needs refactoring. 00386 if (!LI.isSimple()) return nullptr; 00387 00388 // Do really simple store-to-load forwarding and load CSE, to catch cases 00389 // where there are several consecutive memory accesses to the same location, 00390 // separated by a few arithmetic operations. 00391 BasicBlock::iterator BBI = &LI; 00392 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 00393 return ReplaceInstUsesWith(LI, AvailableVal); 00394 00395 // load(gep null, ...) -> unreachable 00396 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 00397 const Value *GEPI0 = GEPI->getOperand(0); 00398 // TODO: Consider a target hook for valid address spaces for this xform. 00399 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 00400 // Insert a new store to null instruction before the load to indicate 00401 // that this code is not reachable. We do this instead of inserting 00402 // an unreachable instruction directly because we cannot modify the 00403 // CFG. 00404 new StoreInst(UndefValue::get(LI.getType()), 00405 Constant::getNullValue(Op->getType()), &LI); 00406 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 00407 } 00408 } 00409 00410 // load null/undef -> unreachable 00411 // TODO: Consider a target hook for valid address spaces for this xform. 00412 if (isa<UndefValue>(Op) || 00413 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 00414 // Insert a new store to null instruction before the load to indicate that 00415 // this code is not reachable. We do this instead of inserting an 00416 // unreachable instruction directly because we cannot modify the CFG. 00417 new StoreInst(UndefValue::get(LI.getType()), 00418 Constant::getNullValue(Op->getType()), &LI); 00419 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 00420 } 00421 00422 // Instcombine load (constantexpr_cast global) -> cast (load global) 00423 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 00424 if (CE->isCast()) 00425 if (Instruction *Res = InstCombineLoadCast(*this, LI, DL)) 00426 return Res; 00427 00428 if (Op->hasOneUse()) { 00429 // Change select and PHI nodes to select values instead of addresses: this 00430 // helps alias analysis out a lot, allows many others simplifications, and 00431 // exposes redundancy in the code. 00432 // 00433 // Note that we cannot do the transformation unless we know that the 00434 // introduced loads cannot trap! Something like this is valid as long as 00435 // the condition is always false: load (select bool %C, int* null, int* %G), 00436 // but it would not be valid if we transformed it to load from null 00437 // unconditionally. 00438 // 00439 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 00440 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 00441 unsigned Align = LI.getAlignment(); 00442 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && 00443 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { 00444 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 00445 SI->getOperand(1)->getName()+".val"); 00446 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 00447 SI->getOperand(2)->getName()+".val"); 00448 V1->setAlignment(Align); 00449 V2->setAlignment(Align); 00450 return SelectInst::Create(SI->getCondition(), V1, V2); 00451 } 00452 00453 // load (select (cond, null, P)) -> load P 00454 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 00455 if (C->isNullValue()) { 00456 LI.setOperand(0, SI->getOperand(2)); 00457 return &LI; 00458 } 00459 00460 // load (select (cond, P, null)) -> load P 00461 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 00462 if (C->isNullValue()) { 00463 LI.setOperand(0, SI->getOperand(1)); 00464 return &LI; 00465 } 00466 } 00467 } 00468 return nullptr; 00469 } 00470 00471 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 00472 /// when possible. This makes it generally easy to do alias analysis and/or 00473 /// SROA/mem2reg of the memory object. 00474 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 00475 User *CI = cast<User>(SI.getOperand(1)); 00476 Value *CastOp = CI->getOperand(0); 00477 00478 Type *DestPTy = CI->getType()->getPointerElementType(); 00479 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 00480 if (!SrcTy) return nullptr; 00481 00482 Type *SrcPTy = SrcTy->getElementType(); 00483 00484 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 00485 return nullptr; 00486 00487 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 00488 /// to its first element. This allows us to handle things like: 00489 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 00490 /// on 32-bit hosts. 00491 SmallVector<Value*, 4> NewGEPIndices; 00492 00493 // If the source is an array, the code below will not succeed. Check to 00494 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 00495 // constants. 00496 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 00497 // Index through pointer. 00498 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 00499 NewGEPIndices.push_back(Zero); 00500 00501 while (1) { 00502 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 00503 if (!STy->getNumElements()) /* Struct can be empty {} */ 00504 break; 00505 NewGEPIndices.push_back(Zero); 00506 SrcPTy = STy->getElementType(0); 00507 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 00508 NewGEPIndices.push_back(Zero); 00509 SrcPTy = ATy->getElementType(); 00510 } else { 00511 break; 00512 } 00513 } 00514 00515 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 00516 } 00517 00518 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 00519 return nullptr; 00520 00521 // If the pointers point into different address spaces don't do the 00522 // transformation. 00523 if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) 00524 return nullptr; 00525 00526 // If the pointers point to values of different sizes don't do the 00527 // transformation. 00528 if (!IC.getDataLayout() || 00529 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 00530 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 00531 return nullptr; 00532 00533 // If the pointers point to pointers to different address spaces don't do the 00534 // transformation. It is not safe to introduce an addrspacecast instruction in 00535 // this case since, depending on the target, addrspacecast may not be a no-op 00536 // cast. 00537 if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && 00538 SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) 00539 return nullptr; 00540 00541 // Okay, we are casting from one integer or pointer type to another of 00542 // the same size. Instead of casting the pointer before 00543 // the store, cast the value to be stored. 00544 Value *NewCast; 00545 Instruction::CastOps opcode = Instruction::BitCast; 00546 Type* CastSrcTy = DestPTy; 00547 Type* CastDstTy = SrcPTy; 00548 if (CastDstTy->isPointerTy()) { 00549 if (CastSrcTy->isIntegerTy()) 00550 opcode = Instruction::IntToPtr; 00551 } else if (CastDstTy->isIntegerTy()) { 00552 if (CastSrcTy->isPointerTy()) 00553 opcode = Instruction::PtrToInt; 00554 } 00555 00556 // SIOp0 is a pointer to aggregate and this is a store to the first field, 00557 // emit a GEP to index into its first field. 00558 if (!NewGEPIndices.empty()) 00559 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 00560 00561 Value *SIOp0 = SI.getOperand(0); 00562 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 00563 SIOp0->getName()+".c"); 00564 SI.setOperand(0, NewCast); 00565 SI.setOperand(1, CastOp); 00566 return &SI; 00567 } 00568 00569 /// equivalentAddressValues - Test if A and B will obviously have the same 00570 /// value. This includes recognizing that %t0 and %t1 will have the same 00571 /// value in code like this: 00572 /// %t0 = getelementptr \@a, 0, 3 00573 /// store i32 0, i32* %t0 00574 /// %t1 = getelementptr \@a, 0, 3 00575 /// %t2 = load i32* %t1 00576 /// 00577 static bool equivalentAddressValues(Value *A, Value *B) { 00578 // Test if the values are trivially equivalent. 00579 if (A == B) return true; 00580 00581 // Test if the values come form identical arithmetic instructions. 00582 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 00583 // its only used to compare two uses within the same basic block, which 00584 // means that they'll always either have the same value or one of them 00585 // will have an undefined value. 00586 if (isa<BinaryOperator>(A) || 00587 isa<CastInst>(A) || 00588 isa<PHINode>(A) || 00589 isa<GetElementPtrInst>(A)) 00590 if (Instruction *BI = dyn_cast<Instruction>(B)) 00591 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 00592 return true; 00593 00594 // Otherwise they may not be equivalent. 00595 return false; 00596 } 00597 00598 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 00599 Value *Val = SI.getOperand(0); 00600 Value *Ptr = SI.getOperand(1); 00601 00602 // Attempt to improve the alignment. 00603 if (DL) { 00604 unsigned KnownAlign = 00605 getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()), 00606 DL, AT, &SI, DT); 00607 unsigned StoreAlign = SI.getAlignment(); 00608 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 00609 DL->getABITypeAlignment(Val->getType()); 00610 00611 if (KnownAlign > EffectiveStoreAlign) 00612 SI.setAlignment(KnownAlign); 00613 else if (StoreAlign == 0) 00614 SI.setAlignment(EffectiveStoreAlign); 00615 } 00616 00617 // Don't hack volatile/atomic stores. 00618 // FIXME: Some bits are legal for atomic stores; needs refactoring. 00619 if (!SI.isSimple()) return nullptr; 00620 00621 // If the RHS is an alloca with a single use, zapify the store, making the 00622 // alloca dead. 00623 if (Ptr->hasOneUse()) { 00624 if (isa<AllocaInst>(Ptr)) 00625 return EraseInstFromFunction(SI); 00626 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 00627 if (isa<AllocaInst>(GEP->getOperand(0))) { 00628 if (GEP->getOperand(0)->hasOneUse()) 00629 return EraseInstFromFunction(SI); 00630 } 00631 } 00632 } 00633 00634 // Do really simple DSE, to catch cases where there are several consecutive 00635 // stores to the same location, separated by a few arithmetic operations. This 00636 // situation often occurs with bitfield accesses. 00637 BasicBlock::iterator BBI = &SI; 00638 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 00639 --ScanInsts) { 00640 --BBI; 00641 // Don't count debug info directives, lest they affect codegen, 00642 // and we skip pointer-to-pointer bitcasts, which are NOPs. 00643 if (isa<DbgInfoIntrinsic>(BBI) || 00644 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 00645 ScanInsts++; 00646 continue; 00647 } 00648 00649 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 00650 // Prev store isn't volatile, and stores to the same location? 00651 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 00652 SI.getOperand(1))) { 00653 ++NumDeadStore; 00654 ++BBI; 00655 EraseInstFromFunction(*PrevSI); 00656 continue; 00657 } 00658 break; 00659 } 00660 00661 // If this is a load, we have to stop. However, if the loaded value is from 00662 // the pointer we're loading and is producing the pointer we're storing, 00663 // then *this* store is dead (X = load P; store X -> P). 00664 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 00665 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 00666 LI->isSimple()) 00667 return EraseInstFromFunction(SI); 00668 00669 // Otherwise, this is a load from some other location. Stores before it 00670 // may not be dead. 00671 break; 00672 } 00673 00674 // Don't skip over loads or things that can modify memory. 00675 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 00676 break; 00677 } 00678 00679 // store X, null -> turns into 'unreachable' in SimplifyCFG 00680 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 00681 if (!isa<UndefValue>(Val)) { 00682 SI.setOperand(0, UndefValue::get(Val->getType())); 00683 if (Instruction *U = dyn_cast<Instruction>(Val)) 00684 Worklist.Add(U); // Dropped a use. 00685 } 00686 return nullptr; // Do not modify these! 00687 } 00688 00689 // store undef, Ptr -> noop 00690 if (isa<UndefValue>(Val)) 00691 return EraseInstFromFunction(SI); 00692 00693 // If the pointer destination is a cast, see if we can fold the cast into the 00694 // source instead. 00695 if (isa<CastInst>(Ptr)) 00696 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 00697 return Res; 00698 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 00699 if (CE->isCast()) 00700 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 00701 return Res; 00702 00703 00704 // If this store is the last instruction in the basic block (possibly 00705 // excepting debug info instructions), and if the block ends with an 00706 // unconditional branch, try to move it to the successor block. 00707 BBI = &SI; 00708 do { 00709 ++BBI; 00710 } while (isa<DbgInfoIntrinsic>(BBI) || 00711 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 00712 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 00713 if (BI->isUnconditional()) 00714 if (SimplifyStoreAtEndOfBlock(SI)) 00715 return nullptr; // xform done! 00716 00717 return nullptr; 00718 } 00719 00720 /// SimplifyStoreAtEndOfBlock - Turn things like: 00721 /// if () { *P = v1; } else { *P = v2 } 00722 /// into a phi node with a store in the successor. 00723 /// 00724 /// Simplify things like: 00725 /// *P = v1; if () { *P = v2; } 00726 /// into a phi node with a store in the successor. 00727 /// 00728 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 00729 BasicBlock *StoreBB = SI.getParent(); 00730 00731 // Check to see if the successor block has exactly two incoming edges. If 00732 // so, see if the other predecessor contains a store to the same location. 00733 // if so, insert a PHI node (if needed) and move the stores down. 00734 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 00735 00736 // Determine whether Dest has exactly two predecessors and, if so, compute 00737 // the other predecessor. 00738 pred_iterator PI = pred_begin(DestBB); 00739 BasicBlock *P = *PI; 00740 BasicBlock *OtherBB = nullptr; 00741 00742 if (P != StoreBB) 00743 OtherBB = P; 00744 00745 if (++PI == pred_end(DestBB)) 00746 return false; 00747 00748 P = *PI; 00749 if (P != StoreBB) { 00750 if (OtherBB) 00751 return false; 00752 OtherBB = P; 00753 } 00754 if (++PI != pred_end(DestBB)) 00755 return false; 00756 00757 // Bail out if all the relevant blocks aren't distinct (this can happen, 00758 // for example, if SI is in an infinite loop) 00759 if (StoreBB == DestBB || OtherBB == DestBB) 00760 return false; 00761 00762 // Verify that the other block ends in a branch and is not otherwise empty. 00763 BasicBlock::iterator BBI = OtherBB->getTerminator(); 00764 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 00765 if (!OtherBr || BBI == OtherBB->begin()) 00766 return false; 00767 00768 // If the other block ends in an unconditional branch, check for the 'if then 00769 // else' case. there is an instruction before the branch. 00770 StoreInst *OtherStore = nullptr; 00771 if (OtherBr->isUnconditional()) { 00772 --BBI; 00773 // Skip over debugging info. 00774 while (isa<DbgInfoIntrinsic>(BBI) || 00775 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 00776 if (BBI==OtherBB->begin()) 00777 return false; 00778 --BBI; 00779 } 00780 // If this isn't a store, isn't a store to the same location, or is not the 00781 // right kind of store, bail out. 00782 OtherStore = dyn_cast<StoreInst>(BBI); 00783 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 00784 !SI.isSameOperationAs(OtherStore)) 00785 return false; 00786 } else { 00787 // Otherwise, the other block ended with a conditional branch. If one of the 00788 // destinations is StoreBB, then we have the if/then case. 00789 if (OtherBr->getSuccessor(0) != StoreBB && 00790 OtherBr->getSuccessor(1) != StoreBB) 00791 return false; 00792 00793 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 00794 // if/then triangle. See if there is a store to the same ptr as SI that 00795 // lives in OtherBB. 00796 for (;; --BBI) { 00797 // Check to see if we find the matching store. 00798 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 00799 if (OtherStore->getOperand(1) != SI.getOperand(1) || 00800 !SI.isSameOperationAs(OtherStore)) 00801 return false; 00802 break; 00803 } 00804 // If we find something that may be using or overwriting the stored 00805 // value, or if we run out of instructions, we can't do the xform. 00806 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 00807 BBI == OtherBB->begin()) 00808 return false; 00809 } 00810 00811 // In order to eliminate the store in OtherBr, we have to 00812 // make sure nothing reads or overwrites the stored value in 00813 // StoreBB. 00814 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 00815 // FIXME: This should really be AA driven. 00816 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 00817 return false; 00818 } 00819 } 00820 00821 // Insert a PHI node now if we need it. 00822 Value *MergedVal = OtherStore->getOperand(0); 00823 if (MergedVal != SI.getOperand(0)) { 00824 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 00825 PN->addIncoming(SI.getOperand(0), SI.getParent()); 00826 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 00827 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 00828 } 00829 00830 // Advance to a place where it is safe to insert the new store and 00831 // insert it. 00832 BBI = DestBB->getFirstInsertionPt(); 00833 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 00834 SI.isVolatile(), 00835 SI.getAlignment(), 00836 SI.getOrdering(), 00837 SI.getSynchScope()); 00838 InsertNewInstBefore(NewSI, *BBI); 00839 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 00840 00841 // If the two stores had AA tags, merge them. 00842 AAMDNodes AATags; 00843 SI.getAAMetadata(AATags); 00844 if (AATags) { 00845 OtherStore->getAAMetadata(AATags, /* Merge = */ true); 00846 NewSI->setAAMetadata(AATags); 00847 } 00848 00849 // Nuke the old stores. 00850 EraseInstFromFunction(SI); 00851 EraseInstFromFunction(*OtherStore); 00852 return true; 00853 }