LLVM API Documentation
00001 //===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Loop unrolling may create many similar GEPs for array accesses. 00011 // e.g., a 2-level loop 00012 // 00013 // float a[32][32]; // global variable 00014 // 00015 // for (int i = 0; i < 2; ++i) { 00016 // for (int j = 0; j < 2; ++j) { 00017 // ... 00018 // ... = a[x + i][y + j]; 00019 // ... 00020 // } 00021 // } 00022 // 00023 // will probably be unrolled to: 00024 // 00025 // gep %a, 0, %x, %y; load 00026 // gep %a, 0, %x, %y + 1; load 00027 // gep %a, 0, %x + 1, %y; load 00028 // gep %a, 0, %x + 1, %y + 1; load 00029 // 00030 // LLVM's GVN does not use partial redundancy elimination yet, and is thus 00031 // unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs 00032 // significant slowdown in targets with limited addressing modes. For instance, 00033 // because the PTX target does not support the reg+reg addressing mode, the 00034 // NVPTX backend emits PTX code that literally computes the pointer address of 00035 // each GEP, wasting tons of registers. It emits the following PTX for the 00036 // first load and similar PTX for other loads. 00037 // 00038 // mov.u32 %r1, %x; 00039 // mov.u32 %r2, %y; 00040 // mul.wide.u32 %rl2, %r1, 128; 00041 // mov.u64 %rl3, a; 00042 // add.s64 %rl4, %rl3, %rl2; 00043 // mul.wide.u32 %rl5, %r2, 4; 00044 // add.s64 %rl6, %rl4, %rl5; 00045 // ld.global.f32 %f1, [%rl6]; 00046 // 00047 // To reduce the register pressure, the optimization implemented in this file 00048 // merges the common part of a group of GEPs, so we can compute each pointer 00049 // address by adding a simple offset to the common part, saving many registers. 00050 // 00051 // It works by splitting each GEP into a variadic base and a constant offset. 00052 // The variadic base can be computed once and reused by multiple GEPs, and the 00053 // constant offsets can be nicely folded into the reg+immediate addressing mode 00054 // (supported by most targets) without using any extra register. 00055 // 00056 // For instance, we transform the four GEPs and four loads in the above example 00057 // into: 00058 // 00059 // base = gep a, 0, x, y 00060 // load base 00061 // laod base + 1 * sizeof(float) 00062 // load base + 32 * sizeof(float) 00063 // load base + 33 * sizeof(float) 00064 // 00065 // Given the transformed IR, a backend that supports the reg+immediate 00066 // addressing mode can easily fold the pointer arithmetics into the loads. For 00067 // example, the NVPTX backend can easily fold the pointer arithmetics into the 00068 // ld.global.f32 instructions, and the resultant PTX uses much fewer registers. 00069 // 00070 // mov.u32 %r1, %tid.x; 00071 // mov.u32 %r2, %tid.y; 00072 // mul.wide.u32 %rl2, %r1, 128; 00073 // mov.u64 %rl3, a; 00074 // add.s64 %rl4, %rl3, %rl2; 00075 // mul.wide.u32 %rl5, %r2, 4; 00076 // add.s64 %rl6, %rl4, %rl5; 00077 // ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX 00078 // ld.global.f32 %f2, [%rl6+4]; // much better 00079 // ld.global.f32 %f3, [%rl6+128]; // much better 00080 // ld.global.f32 %f4, [%rl6+132]; // much better 00081 // 00082 //===----------------------------------------------------------------------===// 00083 00084 #include "llvm/Analysis/TargetTransformInfo.h" 00085 #include "llvm/Analysis/ValueTracking.h" 00086 #include "llvm/IR/Constants.h" 00087 #include "llvm/IR/DataLayout.h" 00088 #include "llvm/IR/Instructions.h" 00089 #include "llvm/IR/LLVMContext.h" 00090 #include "llvm/IR/Module.h" 00091 #include "llvm/IR/Operator.h" 00092 #include "llvm/Support/CommandLine.h" 00093 #include "llvm/Support/raw_ostream.h" 00094 #include "llvm/Transforms/Scalar.h" 00095 00096 using namespace llvm; 00097 00098 static cl::opt<bool> DisableSeparateConstOffsetFromGEP( 00099 "disable-separate-const-offset-from-gep", cl::init(false), 00100 cl::desc("Do not separate the constant offset from a GEP instruction"), 00101 cl::Hidden); 00102 00103 namespace { 00104 00105 /// \brief A helper class for separating a constant offset from a GEP index. 00106 /// 00107 /// In real programs, a GEP index may be more complicated than a simple addition 00108 /// of something and a constant integer which can be trivially splitted. For 00109 /// example, to split ((a << 3) | 5) + b, we need to search deeper for the 00110 /// constant offset, so that we can separate the index to (a << 3) + b and 5. 00111 /// 00112 /// Therefore, this class looks into the expression that computes a given GEP 00113 /// index, and tries to find a constant integer that can be hoisted to the 00114 /// outermost level of the expression as an addition. Not every constant in an 00115 /// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a + 00116 /// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case, 00117 /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). 00118 class ConstantOffsetExtractor { 00119 public: 00120 /// Extracts a constant offset from the given GEP index. It outputs the 00121 /// numeric value of the extracted constant offset (0 if failed), and a 00122 /// new index representing the remainder (equal to the original index minus 00123 /// the constant offset). 00124 /// \p Idx The given GEP index 00125 /// \p NewIdx The new index to replace (output) 00126 /// \p DL The datalayout of the module 00127 /// \p GEP The given GEP 00128 static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, 00129 GetElementPtrInst *GEP); 00130 /// Looks for a constant offset without extracting it. The meaning of the 00131 /// arguments and the return value are the same as Extract. 00132 static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP); 00133 00134 private: 00135 ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) 00136 : DL(Layout), IP(InsertionPt) {} 00137 /// Searches the expression that computes V for a non-zero constant C s.t. 00138 /// V can be reassociated into the form V' + C. If the searching is 00139 /// successful, returns C and update UserChain as a def-use chain from C to V; 00140 /// otherwise, UserChain is empty. 00141 /// 00142 /// \p V The given expression 00143 /// \p SignExtended Whether V will be sign-extended in the computation of the 00144 /// GEP index 00145 /// \p ZeroExtended Whether V will be zero-extended in the computation of the 00146 /// GEP index 00147 /// \p NonNegative Whether V is guaranteed to be non-negative. For example, 00148 /// an index of an inbounds GEP is guaranteed to be 00149 /// non-negative. Levaraging this, we can better split 00150 /// inbounds GEPs. 00151 APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); 00152 /// A helper function to look into both operands of a binary operator. 00153 APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, 00154 bool ZeroExtended); 00155 /// After finding the constant offset C from the GEP index I, we build a new 00156 /// index I' s.t. I' + C = I. This function builds and returns the new 00157 /// index I' according to UserChain produced by function "find". 00158 /// 00159 /// The building conceptually takes two steps: 00160 /// 1) iteratively distribute s/zext towards the leaves of the expression tree 00161 /// that computes I 00162 /// 2) reassociate the expression tree to the form I' + C. 00163 /// 00164 /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute 00165 /// sext to a, b and 5 so that we have 00166 /// sext(a) + (sext(b) + 5). 00167 /// Then, we reassociate it to 00168 /// (sext(a) + sext(b)) + 5. 00169 /// Given this form, we know I' is sext(a) + sext(b). 00170 Value *rebuildWithoutConstOffset(); 00171 /// After the first step of rebuilding the GEP index without the constant 00172 /// offset, distribute s/zext to the operands of all operators in UserChain. 00173 /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) => 00174 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))). 00175 /// 00176 /// The function also updates UserChain to point to new subexpressions after 00177 /// distributing s/zext. e.g., the old UserChain of the above example is 00178 /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)), 00179 /// and the new UserChain is 00180 /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) -> 00181 /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5)) 00182 /// 00183 /// \p ChainIndex The index to UserChain. ChainIndex is initially 00184 /// UserChain.size() - 1, and is decremented during 00185 /// the recursion. 00186 Value *distributeExtsAndCloneChain(unsigned ChainIndex); 00187 /// Reassociates the GEP index to the form I' + C and returns I'. 00188 Value *removeConstOffset(unsigned ChainIndex); 00189 /// A helper function to apply ExtInsts, a list of s/zext, to value V. 00190 /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function 00191 /// returns "sext i32 (zext i16 V to i32) to i64". 00192 Value *applyExts(Value *V); 00193 00194 /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0. 00195 bool NoCommonBits(Value *LHS, Value *RHS) const; 00196 /// Computes which bits are known to be one or zero. 00197 /// \p KnownOne Mask of all bits that are known to be one. 00198 /// \p KnownZero Mask of all bits that are known to be zero. 00199 void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const; 00200 /// A helper function that returns whether we can trace into the operands 00201 /// of binary operator BO for a constant offset. 00202 /// 00203 /// \p SignExtended Whether BO is surrounded by sext 00204 /// \p ZeroExtended Whether BO is surrounded by zext 00205 /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound 00206 /// array index. 00207 bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO, 00208 bool NonNegative); 00209 00210 /// The path from the constant offset to the old GEP index. e.g., if the GEP 00211 /// index is "a * b + (c + 5)". After running function find, UserChain[0] will 00212 /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and 00213 /// UserChain[2] will be the entire expression "a * b + (c + 5)". 00214 /// 00215 /// This path helps to rebuild the new GEP index. 00216 SmallVector<User *, 8> UserChain; 00217 /// A data structure used in rebuildWithoutConstOffset. Contains all 00218 /// sext/zext instructions along UserChain. 00219 SmallVector<CastInst *, 16> ExtInsts; 00220 /// The data layout of the module. Used in ComputeKnownBits. 00221 const DataLayout *DL; 00222 Instruction *IP; /// Insertion position of cloned instructions. 00223 }; 00224 00225 /// \brief A pass that tries to split every GEP in the function into a variadic 00226 /// base and a constant offset. It is a FunctionPass because searching for the 00227 /// constant offset may inspect other basic blocks. 00228 class SeparateConstOffsetFromGEP : public FunctionPass { 00229 public: 00230 static char ID; 00231 SeparateConstOffsetFromGEP() : FunctionPass(ID) { 00232 initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); 00233 } 00234 00235 void getAnalysisUsage(AnalysisUsage &AU) const override { 00236 AU.addRequired<DataLayoutPass>(); 00237 AU.addRequired<TargetTransformInfo>(); 00238 } 00239 00240 bool doInitialization(Module &M) override { 00241 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 00242 if (DLP == nullptr) 00243 report_fatal_error("data layout missing"); 00244 DL = &DLP->getDataLayout(); 00245 return false; 00246 } 00247 00248 bool runOnFunction(Function &F) override; 00249 00250 private: 00251 /// Tries to split the given GEP into a variadic base and a constant offset, 00252 /// and returns true if the splitting succeeds. 00253 bool splitGEP(GetElementPtrInst *GEP); 00254 /// Finds the constant offset within each index, and accumulates them. This 00255 /// function only inspects the GEP without changing it. The output 00256 /// NeedsExtraction indicates whether we can extract a non-zero constant 00257 /// offset from any index. 00258 int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); 00259 /// Canonicalize array indices to pointer-size integers. This helps to 00260 /// simplify the logic of splitting a GEP. For example, if a + b is a 00261 /// pointer-size integer, we have 00262 /// gep base, a + b = gep (gep base, a), b 00263 /// However, this equality may not hold if the size of a + b is smaller than 00264 /// the pointer size, because LLVM conceptually sign-extends GEP indices to 00265 /// pointer size before computing the address 00266 /// (http://llvm.org/docs/LangRef.html#id181). 00267 /// 00268 /// This canonicalization is very likely already done in clang and 00269 /// instcombine. Therefore, the program will probably remain the same. 00270 /// 00271 /// Returns true if the module changes. 00272 /// 00273 /// Verified in @i32_add in split-gep.ll 00274 bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); 00275 00276 const DataLayout *DL; 00277 }; 00278 } // anonymous namespace 00279 00280 char SeparateConstOffsetFromGEP::ID = 0; 00281 INITIALIZE_PASS_BEGIN( 00282 SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", 00283 "Split GEPs to a variadic base and a constant offset for better CSE", false, 00284 false) 00285 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 00286 INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) 00287 INITIALIZE_PASS_END( 00288 SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", 00289 "Split GEPs to a variadic base and a constant offset for better CSE", false, 00290 false) 00291 00292 FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { 00293 return new SeparateConstOffsetFromGEP(); 00294 } 00295 00296 bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, 00297 bool ZeroExtended, 00298 BinaryOperator *BO, 00299 bool NonNegative) { 00300 // We only consider ADD, SUB and OR, because a non-zero constant found in 00301 // expressions composed of these operations can be easily hoisted as a 00302 // constant offset by reassociation. 00303 if (BO->getOpcode() != Instruction::Add && 00304 BO->getOpcode() != Instruction::Sub && 00305 BO->getOpcode() != Instruction::Or) { 00306 return false; 00307 } 00308 00309 Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1); 00310 // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS 00311 // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS). 00312 if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS)) 00313 return false; 00314 00315 // In addition, tracing into BO requires that its surrounding s/zext (if 00316 // any) is distributable to both operands. 00317 // 00318 // Suppose BO = A op B. 00319 // SignExtended | ZeroExtended | Distributable? 00320 // --------------+--------------+---------------------------------- 00321 // 0 | 0 | true because no s/zext exists 00322 // 0 | 1 | zext(BO) == zext(A) op zext(B) 00323 // 1 | 0 | sext(BO) == sext(A) op sext(B) 00324 // 1 | 1 | zext(sext(BO)) == 00325 // | | zext(sext(A)) op zext(sext(B)) 00326 if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) { 00327 // If a + b >= 0 and (a >= 0 or b >= 0), then 00328 // sext(a + b) = sext(a) + sext(b) 00329 // even if the addition is not marked nsw. 00330 // 00331 // Leveraging this invarient, we can trace into an sext'ed inbound GEP 00332 // index if the constant offset is non-negative. 00333 // 00334 // Verified in @sext_add in split-gep.ll. 00335 if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) { 00336 if (!ConstLHS->isNegative()) 00337 return true; 00338 } 00339 if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { 00340 if (!ConstRHS->isNegative()) 00341 return true; 00342 } 00343 } 00344 00345 // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) 00346 // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) 00347 if (BO->getOpcode() == Instruction::Add || 00348 BO->getOpcode() == Instruction::Sub) { 00349 if (SignExtended && !BO->hasNoSignedWrap()) 00350 return false; 00351 if (ZeroExtended && !BO->hasNoUnsignedWrap()) 00352 return false; 00353 } 00354 00355 return true; 00356 } 00357 00358 APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, 00359 bool SignExtended, 00360 bool ZeroExtended) { 00361 // BO being non-negative does not shed light on whether its operands are 00362 // non-negative. Clear the NonNegative flag here. 00363 APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, 00364 /* NonNegative */ false); 00365 // If we found a constant offset in the left operand, stop and return that. 00366 // This shortcut might cause us to miss opportunities of combining the 00367 // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. 00368 // However, such cases are probably already handled by -instcombine, 00369 // given this pass runs after the standard optimizations. 00370 if (ConstantOffset != 0) return ConstantOffset; 00371 ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, 00372 /* NonNegative */ false); 00373 // If U is a sub operator, negate the constant offset found in the right 00374 // operand. 00375 if (BO->getOpcode() == Instruction::Sub) 00376 ConstantOffset = -ConstantOffset; 00377 return ConstantOffset; 00378 } 00379 00380 APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, 00381 bool ZeroExtended, bool NonNegative) { 00382 // TODO(jingyue): We could trace into integer/pointer casts, such as 00383 // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only 00384 // integers because it gives good enough results for our benchmarks. 00385 unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 00386 00387 // We cannot do much with Values that are not a User, such as an Argument. 00388 User *U = dyn_cast<User>(V); 00389 if (U == nullptr) return APInt(BitWidth, 0); 00390 00391 APInt ConstantOffset(BitWidth, 0); 00392 if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { 00393 // Hooray, we found it! 00394 ConstantOffset = CI->getValue(); 00395 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) { 00396 // Trace into subexpressions for more hoisting opportunities. 00397 if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) { 00398 ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); 00399 } 00400 } else if (isa<SExtInst>(V)) { 00401 ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, 00402 ZeroExtended, NonNegative).sext(BitWidth); 00403 } else if (isa<ZExtInst>(V)) { 00404 // As an optimization, we can clear the SignExtended flag because 00405 // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. 00406 // 00407 // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. 00408 ConstantOffset = 00409 find(U->getOperand(0), /* SignExtended */ false, 00410 /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); 00411 } 00412 00413 // If we found a non-zero constant offset, add it to the path for 00414 // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't 00415 // help this optimization. 00416 if (ConstantOffset != 0) 00417 UserChain.push_back(U); 00418 return ConstantOffset; 00419 } 00420 00421 Value *ConstantOffsetExtractor::applyExts(Value *V) { 00422 Value *Current = V; 00423 // ExtInsts is built in the use-def order. Therefore, we apply them to V 00424 // in the reversed order. 00425 for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) { 00426 if (Constant *C = dyn_cast<Constant>(Current)) { 00427 // If Current is a constant, apply s/zext using ConstantExpr::getCast. 00428 // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt. 00429 Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType()); 00430 } else { 00431 Instruction *Ext = (*I)->clone(); 00432 Ext->setOperand(0, Current); 00433 Ext->insertBefore(IP); 00434 Current = Ext; 00435 } 00436 } 00437 return Current; 00438 } 00439 00440 Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { 00441 distributeExtsAndCloneChain(UserChain.size() - 1); 00442 // Remove all nullptrs (used to be s/zext) from UserChain. 00443 unsigned NewSize = 0; 00444 for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) { 00445 if (*I != nullptr) { 00446 UserChain[NewSize] = *I; 00447 NewSize++; 00448 } 00449 } 00450 UserChain.resize(NewSize); 00451 return removeConstOffset(UserChain.size() - 1); 00452 } 00453 00454 Value * 00455 ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { 00456 User *U = UserChain[ChainIndex]; 00457 if (ChainIndex == 0) { 00458 assert(isa<ConstantInt>(U)); 00459 // If U is a ConstantInt, applyExts will return a ConstantInt as well. 00460 return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U)); 00461 } 00462 00463 if (CastInst *Cast = dyn_cast<CastInst>(U)) { 00464 assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) && 00465 "We only traced into two types of CastInst: sext and zext"); 00466 ExtInsts.push_back(Cast); 00467 UserChain[ChainIndex] = nullptr; 00468 return distributeExtsAndCloneChain(ChainIndex - 1); 00469 } 00470 00471 // Function find only trace into BinaryOperator and CastInst. 00472 BinaryOperator *BO = cast<BinaryOperator>(U); 00473 // OpNo = which operand of BO is UserChain[ChainIndex - 1] 00474 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); 00475 Value *TheOther = applyExts(BO->getOperand(1 - OpNo)); 00476 Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1); 00477 00478 BinaryOperator *NewBO = nullptr; 00479 if (OpNo == 0) { 00480 NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther, 00481 BO->getName(), IP); 00482 } else { 00483 NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain, 00484 BO->getName(), IP); 00485 } 00486 return UserChain[ChainIndex] = NewBO; 00487 } 00488 00489 Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { 00490 if (ChainIndex == 0) { 00491 assert(isa<ConstantInt>(UserChain[ChainIndex])); 00492 return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); 00493 } 00494 00495 BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]); 00496 unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); 00497 assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]); 00498 Value *NextInChain = removeConstOffset(ChainIndex - 1); 00499 Value *TheOther = BO->getOperand(1 - OpNo); 00500 00501 // If NextInChain is 0 and not the LHS of a sub, we can simplify the 00502 // sub-expression to be just TheOther. 00503 if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) { 00504 if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0)) 00505 return TheOther; 00506 } 00507 00508 if (BO->getOpcode() == Instruction::Or) { 00509 // Rebuild "or" as "add", because "or" may be invalid for the new 00510 // epxression. 00511 // 00512 // For instance, given 00513 // a | (b + 5) where a and b + 5 have no common bits, 00514 // we can extract 5 as the constant offset. 00515 // 00516 // However, reusing the "or" in the new index would give us 00517 // (a | b) + 5 00518 // which does not equal a | (b + 5). 00519 // 00520 // Replacing the "or" with "add" is fine, because 00521 // a | (b + 5) = a + (b + 5) = (a + b) + 5 00522 return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1), 00523 BO->getName(), IP); 00524 } 00525 00526 // We can reuse BO in this case, because the new expression shares the same 00527 // instruction type and BO is used at most once. 00528 assert(BO->getNumUses() <= 1 && 00529 "distributeExtsAndCloneChain clones each BinaryOperator in " 00530 "UserChain, so no one should be used more than " 00531 "once"); 00532 BO->setOperand(OpNo, NextInChain); 00533 BO->setHasNoSignedWrap(false); 00534 BO->setHasNoUnsignedWrap(false); 00535 // Make sure it appears after all instructions we've inserted so far. 00536 BO->moveBefore(IP); 00537 return BO; 00538 } 00539 00540 int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, 00541 const DataLayout *DL, 00542 GetElementPtrInst *GEP) { 00543 ConstantOffsetExtractor Extractor(DL, GEP); 00544 // Find a non-zero constant offset first. 00545 APInt ConstantOffset = 00546 Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, 00547 GEP->isInBounds()); 00548 if (ConstantOffset != 0) { 00549 // Separates the constant offset from the GEP index. 00550 NewIdx = Extractor.rebuildWithoutConstOffset(); 00551 } 00552 return ConstantOffset.getSExtValue(); 00553 } 00554 00555 int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL, 00556 GetElementPtrInst *GEP) { 00557 // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. 00558 return ConstantOffsetExtractor(DL, GEP) 00559 .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, 00560 GEP->isInBounds()) 00561 .getSExtValue(); 00562 } 00563 00564 void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, 00565 APInt &KnownZero) const { 00566 IntegerType *IT = cast<IntegerType>(V->getType()); 00567 KnownOne = APInt(IT->getBitWidth(), 0); 00568 KnownZero = APInt(IT->getBitWidth(), 0); 00569 llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0); 00570 } 00571 00572 bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { 00573 assert(LHS->getType() == RHS->getType() && 00574 "LHS and RHS should have the same type"); 00575 APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero; 00576 ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero); 00577 ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero); 00578 return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); 00579 } 00580 00581 bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( 00582 GetElementPtrInst *GEP) { 00583 bool Changed = false; 00584 Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); 00585 gep_type_iterator GTI = gep_type_begin(*GEP); 00586 for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); 00587 I != E; ++I, ++GTI) { 00588 // Skip struct member indices which must be i32. 00589 if (isa<SequentialType>(*GTI)) { 00590 if ((*I)->getType() != IntPtrTy) { 00591 *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP); 00592 Changed = true; 00593 } 00594 } 00595 } 00596 return Changed; 00597 } 00598 00599 int64_t 00600 SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, 00601 bool &NeedsExtraction) { 00602 NeedsExtraction = false; 00603 int64_t AccumulativeByteOffset = 0; 00604 gep_type_iterator GTI = gep_type_begin(*GEP); 00605 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { 00606 if (isa<SequentialType>(*GTI)) { 00607 // Tries to extract a constant offset from this GEP index. 00608 int64_t ConstantOffset = 00609 ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP); 00610 if (ConstantOffset != 0) { 00611 NeedsExtraction = true; 00612 // A GEP may have multiple indices. We accumulate the extracted 00613 // constant offset to a byte offset, and later offset the remainder of 00614 // the original GEP with this byte offset. 00615 AccumulativeByteOffset += 00616 ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); 00617 } 00618 } 00619 } 00620 return AccumulativeByteOffset; 00621 } 00622 00623 bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { 00624 // Skip vector GEPs. 00625 if (GEP->getType()->isVectorTy()) 00626 return false; 00627 00628 // The backend can already nicely handle the case where all indices are 00629 // constant. 00630 if (GEP->hasAllConstantIndices()) 00631 return false; 00632 00633 bool Changed = canonicalizeArrayIndicesToPointerSize(GEP); 00634 00635 bool NeedsExtraction; 00636 int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); 00637 00638 if (!NeedsExtraction) 00639 return Changed; 00640 // Before really splitting the GEP, check whether the backend supports the 00641 // addressing mode we are about to produce. If no, this splitting probably 00642 // won't be beneficial. 00643 TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); 00644 if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), 00645 /*BaseGV=*/nullptr, AccumulativeByteOffset, 00646 /*HasBaseReg=*/true, /*Scale=*/0)) { 00647 return Changed; 00648 } 00649 00650 // Remove the constant offset in each GEP index. The resultant GEP computes 00651 // the variadic base. 00652 gep_type_iterator GTI = gep_type_begin(*GEP); 00653 for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { 00654 if (isa<SequentialType>(*GTI)) { 00655 Value *NewIdx = nullptr; 00656 // Tries to extract a constant offset from this GEP index. 00657 int64_t ConstantOffset = 00658 ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); 00659 if (ConstantOffset != 0) { 00660 assert(NewIdx != nullptr && 00661 "ConstantOffset != 0 implies NewIdx is set"); 00662 GEP->setOperand(I, NewIdx); 00663 } 00664 } 00665 } 00666 // Clear the inbounds attribute because the new index may be off-bound. 00667 // e.g., 00668 // 00669 // b = add i64 a, 5 00670 // addr = gep inbounds float* p, i64 b 00671 // 00672 // is transformed to: 00673 // 00674 // addr2 = gep float* p, i64 a 00675 // addr = gep float* addr2, i64 5 00676 // 00677 // If a is -4, although the old index b is in bounds, the new index a is 00678 // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the 00679 // inbounds keyword is not present, the offsets are added to the base 00680 // address with silently-wrapping two's complement arithmetic". 00681 // Therefore, the final code will be a semantically equivalent. 00682 // 00683 // TODO(jingyue): do some range analysis to keep as many inbounds as 00684 // possible. GEPs with inbounds are more friendly to alias analysis. 00685 GEP->setIsInBounds(false); 00686 00687 // Offsets the base with the accumulative byte offset. 00688 // 00689 // %gep ; the base 00690 // ... %gep ... 00691 // 00692 // => add the offset 00693 // 00694 // %gep2 ; clone of %gep 00695 // %new.gep = gep %gep2, <offset / sizeof(*%gep)> 00696 // %gep ; will be removed 00697 // ... %gep ... 00698 // 00699 // => replace all uses of %gep with %new.gep and remove %gep 00700 // 00701 // %gep2 ; clone of %gep 00702 // %new.gep = gep %gep2, <offset / sizeof(*%gep)> 00703 // ... %new.gep ... 00704 // 00705 // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an 00706 // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): 00707 // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the 00708 // type of %gep. 00709 // 00710 // %gep2 ; clone of %gep 00711 // %0 = bitcast %gep2 to i8* 00712 // %uglygep = gep %0, <offset> 00713 // %new.gep = bitcast %uglygep to <type of %gep> 00714 // ... %new.gep ... 00715 Instruction *NewGEP = GEP->clone(); 00716 NewGEP->insertBefore(GEP); 00717 00718 uint64_t ElementTypeSizeOfGEP = 00719 DL->getTypeAllocSize(GEP->getType()->getElementType()); 00720 Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); 00721 if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { 00722 // Very likely. As long as %gep is natually aligned, the byte offset we 00723 // extracted should be a multiple of sizeof(*%gep). 00724 // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we 00725 // cast ElementTypeSizeOfGEP to signed. 00726 int64_t Index = 00727 AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP); 00728 NewGEP = GetElementPtrInst::Create( 00729 NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); 00730 } else { 00731 // Unlikely but possible. For example, 00732 // #pragma pack(1) 00733 // struct S { 00734 // int a[3]; 00735 // int64 b[8]; 00736 // }; 00737 // #pragma pack() 00738 // 00739 // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After 00740 // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is 00741 // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of 00742 // sizeof(int64). 00743 // 00744 // Emit an uglygep in this case. 00745 Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(), 00746 GEP->getPointerAddressSpace()); 00747 NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); 00748 NewGEP = GetElementPtrInst::Create( 00749 NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), 00750 "uglygep", GEP); 00751 if (GEP->getType() != I8PtrTy) 00752 NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); 00753 } 00754 00755 GEP->replaceAllUsesWith(NewGEP); 00756 GEP->eraseFromParent(); 00757 00758 return true; 00759 } 00760 00761 bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { 00762 if (DisableSeparateConstOffsetFromGEP) 00763 return false; 00764 00765 bool Changed = false; 00766 for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { 00767 for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) { 00768 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) { 00769 Changed |= splitGEP(GEP); 00770 } 00771 // No need to split GEP ConstantExprs because all its indices are constant 00772 // already. 00773 } 00774 } 00775 return Changed; 00776 }