LLVM API Documentation
00001 //===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// \file 00010 /// This transformation combines adjacent loads. 00011 /// 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "llvm/Transforms/Scalar.h" 00015 00016 #include "llvm/ADT/DenseMap.h" 00017 #include "llvm/ADT/Statistic.h" 00018 #include "llvm/Analysis/TargetFolder.h" 00019 #include "llvm/Pass.h" 00020 #include "llvm/IR/DataLayout.h" 00021 #include "llvm/IR/Function.h" 00022 #include "llvm/IR/Instructions.h" 00023 #include "llvm/IR/IRBuilder.h" 00024 #include "llvm/Support/Debug.h" 00025 #include "llvm/Support/MathExtras.h" 00026 #include "llvm/Support/raw_ostream.h" 00027 00028 using namespace llvm; 00029 00030 #define DEBUG_TYPE "load-combine" 00031 00032 STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); 00033 STATISTIC(NumLoadsCombined, "Number of loads combined"); 00034 00035 namespace { 00036 struct PointerOffsetPair { 00037 Value *Pointer; 00038 uint64_t Offset; 00039 }; 00040 00041 struct LoadPOPPair { 00042 LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O) 00043 : Load(L), POP(P), InsertOrder(O) {} 00044 LoadPOPPair() {} 00045 LoadInst *Load; 00046 PointerOffsetPair POP; 00047 /// \brief The new load needs to be created before the first load in IR order. 00048 unsigned InsertOrder; 00049 }; 00050 00051 class LoadCombine : public BasicBlockPass { 00052 LLVMContext *C; 00053 const DataLayout *DL; 00054 00055 public: 00056 LoadCombine() 00057 : BasicBlockPass(ID), 00058 C(nullptr), DL(nullptr) { 00059 initializeSROAPass(*PassRegistry::getPassRegistry()); 00060 } 00061 00062 using llvm::Pass::doInitialization; 00063 bool doInitialization(Function &) override; 00064 bool runOnBasicBlock(BasicBlock &BB) override; 00065 void getAnalysisUsage(AnalysisUsage &AU) const override; 00066 00067 const char *getPassName() const override { return "LoadCombine"; } 00068 static char ID; 00069 00070 typedef IRBuilder<true, TargetFolder> BuilderTy; 00071 00072 private: 00073 BuilderTy *Builder; 00074 00075 PointerOffsetPair getPointerOffsetPair(LoadInst &); 00076 bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &); 00077 bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &); 00078 bool combineLoads(SmallVectorImpl<LoadPOPPair> &); 00079 }; 00080 } 00081 00082 bool LoadCombine::doInitialization(Function &F) { 00083 DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); 00084 C = &F.getContext(); 00085 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 00086 if (!DLP) { 00087 DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n"); 00088 return false; 00089 } 00090 DL = &DLP->getDataLayout(); 00091 return true; 00092 } 00093 00094 PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { 00095 PointerOffsetPair POP; 00096 POP.Pointer = LI.getPointerOperand(); 00097 POP.Offset = 0; 00098 while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) { 00099 if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) { 00100 unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType()); 00101 APInt Offset(BitWidth, 0); 00102 if (GEP->accumulateConstantOffset(*DL, Offset)) 00103 POP.Offset += Offset.getZExtValue(); 00104 else 00105 // Can't handle GEPs with variable indices. 00106 return POP; 00107 POP.Pointer = GEP->getPointerOperand(); 00108 } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) 00109 POP.Pointer = BC->getOperand(0); 00110 } 00111 return POP; 00112 } 00113 00114 bool LoadCombine::combineLoads( 00115 DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) { 00116 bool Combined = false; 00117 for (auto &Loads : LoadMap) { 00118 if (Loads.second.size() < 2) 00119 continue; 00120 std::sort(Loads.second.begin(), Loads.second.end(), 00121 [](const LoadPOPPair &A, const LoadPOPPair &B) { 00122 return A.POP.Offset < B.POP.Offset; 00123 }); 00124 if (aggregateLoads(Loads.second)) 00125 Combined = true; 00126 } 00127 return Combined; 00128 } 00129 00130 /// \brief Try to aggregate loads from a sorted list of loads to be combined. 00131 /// 00132 /// It is guaranteed that no writes occur between any of the loads. All loads 00133 /// have the same base pointer. There are at least two loads. 00134 bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { 00135 assert(Loads.size() >= 2 && "Insufficient loads!"); 00136 LoadInst *BaseLoad = nullptr; 00137 SmallVector<LoadPOPPair, 8> AggregateLoads; 00138 bool Combined = false; 00139 uint64_t PrevOffset = -1ull; 00140 uint64_t PrevSize = 0; 00141 for (auto &L : Loads) { 00142 if (PrevOffset == -1ull) { 00143 BaseLoad = L.Load; 00144 PrevOffset = L.POP.Offset; 00145 PrevSize = DL->getTypeStoreSize(L.Load->getType()); 00146 AggregateLoads.push_back(L); 00147 continue; 00148 } 00149 if (L.Load->getAlignment() > BaseLoad->getAlignment()) 00150 continue; 00151 if (L.POP.Offset > PrevOffset + PrevSize) { 00152 // No other load will be combinable 00153 if (combineLoads(AggregateLoads)) 00154 Combined = true; 00155 AggregateLoads.clear(); 00156 PrevOffset = -1; 00157 continue; 00158 } 00159 if (L.POP.Offset != PrevOffset + PrevSize) 00160 // This load is offset less than the size of the last load. 00161 // FIXME: We may want to handle this case. 00162 continue; 00163 PrevOffset = L.POP.Offset; 00164 PrevSize = DL->getTypeStoreSize(L.Load->getType()); 00165 AggregateLoads.push_back(L); 00166 } 00167 if (combineLoads(AggregateLoads)) 00168 Combined = true; 00169 return Combined; 00170 } 00171 00172 /// \brief Given a list of combinable load. Combine the maximum number of them. 00173 bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { 00174 // Remove loads from the end while the size is not a power of 2. 00175 unsigned TotalSize = 0; 00176 for (const auto &L : Loads) 00177 TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); 00178 while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) 00179 TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); 00180 if (Loads.size() < 2) 00181 return false; 00182 00183 DEBUG({ 00184 dbgs() << "***** Combining Loads ******\n"; 00185 for (const auto &L : Loads) { 00186 dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; 00187 } 00188 }); 00189 00190 // Find first load. This is where we put the new load. 00191 LoadPOPPair FirstLP; 00192 FirstLP.InsertOrder = -1u; 00193 for (const auto &L : Loads) 00194 if (L.InsertOrder < FirstLP.InsertOrder) 00195 FirstLP = L; 00196 00197 unsigned AddressSpace = 00198 FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); 00199 00200 Builder->SetInsertPoint(FirstLP.Load); 00201 Value *Ptr = Builder->CreateConstGEP1_64( 00202 Builder->CreatePointerCast(Loads[0].POP.Pointer, 00203 Builder->getInt8PtrTy(AddressSpace)), 00204 Loads[0].POP.Offset); 00205 LoadInst *NewLoad = new LoadInst( 00206 Builder->CreatePointerCast( 00207 Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), 00208 Ptr->getType()->getPointerAddressSpace())), 00209 Twine(Loads[0].Load->getName()) + ".combined", false, 00210 Loads[0].Load->getAlignment(), FirstLP.Load); 00211 00212 for (const auto &L : Loads) { 00213 Builder->SetInsertPoint(L.Load); 00214 Value *V = Builder->CreateExtractInteger( 00215 *DL, NewLoad, cast<IntegerType>(L.Load->getType()), 00216 L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); 00217 L.Load->replaceAllUsesWith(V); 00218 } 00219 00220 NumLoadsCombined = NumLoadsCombined + Loads.size(); 00221 return true; 00222 } 00223 00224 bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { 00225 if (skipOptnoneFunction(BB) || !DL) 00226 return false; 00227 00228 IRBuilder<true, TargetFolder> 00229 TheBuilder(BB.getContext(), TargetFolder(DL)); 00230 Builder = &TheBuilder; 00231 00232 DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; 00233 00234 bool Combined = false; 00235 unsigned Index = 0; 00236 for (auto &I : BB) { 00237 if (I.mayWriteToMemory() || I.mayThrow()) { 00238 if (combineLoads(LoadMap)) 00239 Combined = true; 00240 LoadMap.clear(); 00241 continue; 00242 } 00243 LoadInst *LI = dyn_cast<LoadInst>(&I); 00244 if (!LI) 00245 continue; 00246 ++NumLoadsAnalyzed; 00247 if (!LI->isSimple() || !LI->getType()->isIntegerTy()) 00248 continue; 00249 auto POP = getPointerOffsetPair(*LI); 00250 if (!POP.Pointer) 00251 continue; 00252 LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); 00253 } 00254 if (combineLoads(LoadMap)) 00255 Combined = true; 00256 return Combined; 00257 } 00258 00259 void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { 00260 AU.setPreservesCFG(); 00261 } 00262 00263 char LoadCombine::ID = 0; 00264 00265 BasicBlockPass *llvm::createLoadCombinePass() { 00266 return new LoadCombine(); 00267 } 00268 00269 INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false, 00270 false)