LLVM API Documentation
00001 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains an optimization for div and rem on architectures that 00011 // execute short instructions significantly faster than longer instructions. 00012 // For example, on Intel Atom 32-bit divides are slow enough that during 00013 // runtime it is profitable to check the value of the operands, and if they are 00014 // positive and less than 256 use an unsigned 8-bit divide. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 00019 #include "llvm/ADT/DenseMap.h" 00020 #include "llvm/IR/Function.h" 00021 #include "llvm/IR/IRBuilder.h" 00022 #include "llvm/IR/Instructions.h" 00023 00024 using namespace llvm; 00025 00026 #define DEBUG_TYPE "bypass-slow-division" 00027 00028 namespace { 00029 struct DivOpInfo { 00030 bool SignedOp; 00031 Value *Dividend; 00032 Value *Divisor; 00033 00034 DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) 00035 : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} 00036 }; 00037 00038 struct DivPhiNodes { 00039 PHINode *Quotient; 00040 PHINode *Remainder; 00041 00042 DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) 00043 : Quotient(InQuotient), Remainder(InRemainder) {} 00044 }; 00045 } 00046 00047 namespace llvm { 00048 template<> 00049 struct DenseMapInfo<DivOpInfo> { 00050 static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { 00051 return Val1.SignedOp == Val2.SignedOp && 00052 Val1.Dividend == Val2.Dividend && 00053 Val1.Divisor == Val2.Divisor; 00054 } 00055 00056 static DivOpInfo getEmptyKey() { 00057 return DivOpInfo(false, nullptr, nullptr); 00058 } 00059 00060 static DivOpInfo getTombstoneKey() { 00061 return DivOpInfo(true, nullptr, nullptr); 00062 } 00063 00064 static unsigned getHashValue(const DivOpInfo &Val) { 00065 return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ 00066 reinterpret_cast<uintptr_t>(Val.Divisor)) ^ 00067 (unsigned)Val.SignedOp; 00068 } 00069 }; 00070 00071 typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; 00072 } 00073 00074 // insertFastDiv - Substitutes the div/rem instruction with code that checks the 00075 // value of the operands and uses a shorter-faster div/rem instruction when 00076 // possible and the longer-slower div/rem instruction otherwise. 00077 static bool insertFastDiv(Function &F, 00078 Function::iterator &I, 00079 BasicBlock::iterator &J, 00080 IntegerType *BypassType, 00081 bool UseDivOp, 00082 bool UseSignedOp, 00083 DivCacheTy &PerBBDivCache) { 00084 // Get instruction operands 00085 Instruction *Instr = J; 00086 Value *Dividend = Instr->getOperand(0); 00087 Value *Divisor = Instr->getOperand(1); 00088 00089 if (isa<ConstantInt>(Divisor) || 00090 (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { 00091 // Operations with immediate values should have 00092 // been solved and replaced during compile time. 00093 return false; 00094 } 00095 00096 // Basic Block is split before divide 00097 BasicBlock *MainBB = I; 00098 BasicBlock *SuccessorBB = I->splitBasicBlock(J); 00099 ++I; //advance iterator I to successorBB 00100 00101 // Add new basic block for slow divide operation 00102 BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", 00103 MainBB->getParent(), SuccessorBB); 00104 SlowBB->moveBefore(SuccessorBB); 00105 IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); 00106 Value *SlowQuotientV; 00107 Value *SlowRemainderV; 00108 if (UseSignedOp) { 00109 SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); 00110 SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); 00111 } else { 00112 SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); 00113 SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); 00114 } 00115 SlowBuilder.CreateBr(SuccessorBB); 00116 00117 // Add new basic block for fast divide operation 00118 BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", 00119 MainBB->getParent(), SuccessorBB); 00120 FastBB->moveBefore(SlowBB); 00121 IRBuilder<> FastBuilder(FastBB, FastBB->begin()); 00122 Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, 00123 BypassType); 00124 Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, 00125 BypassType); 00126 00127 // udiv/urem because optimization only handles positive numbers 00128 Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, 00129 ShortDivisorV); 00130 Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, 00131 ShortDivisorV); 00132 Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, 00133 ShortQuotientV, 00134 Dividend->getType()); 00135 Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, 00136 ShortRemainderV, 00137 Dividend->getType()); 00138 FastBuilder.CreateBr(SuccessorBB); 00139 00140 // Phi nodes for result of div and rem 00141 IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); 00142 PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 00143 QuoPhi->addIncoming(SlowQuotientV, SlowBB); 00144 QuoPhi->addIncoming(FastQuotientV, FastBB); 00145 PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); 00146 RemPhi->addIncoming(SlowRemainderV, SlowBB); 00147 RemPhi->addIncoming(FastRemainderV, FastBB); 00148 00149 // Replace Instr with appropriate phi node 00150 if (UseDivOp) 00151 Instr->replaceAllUsesWith(QuoPhi); 00152 else 00153 Instr->replaceAllUsesWith(RemPhi); 00154 Instr->eraseFromParent(); 00155 00156 // Combine operands into a single value with OR for value testing below 00157 MainBB->getInstList().back().eraseFromParent(); 00158 IRBuilder<> MainBuilder(MainBB, MainBB->end()); 00159 Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); 00160 00161 // BitMask is inverted to check if the operands are 00162 // larger than the bypass type 00163 uint64_t BitMask = ~BypassType->getBitMask(); 00164 Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); 00165 00166 // Compare operand values and branch 00167 Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); 00168 Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); 00169 MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); 00170 00171 // point iterator J at first instruction of successorBB 00172 J = I->begin(); 00173 00174 // Cache phi nodes to be used later in place of other instances 00175 // of div or rem with the same sign, dividend, and divisor 00176 DivOpInfo Key(UseSignedOp, Dividend, Divisor); 00177 DivPhiNodes Value(QuoPhi, RemPhi); 00178 PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); 00179 return true; 00180 } 00181 00182 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if 00183 // operands and operation are identical. Otherwise call insertFastDiv to perform 00184 // the optimization and cache the resulting dividend and remainder. 00185 static bool reuseOrInsertFastDiv(Function &F, 00186 Function::iterator &I, 00187 BasicBlock::iterator &J, 00188 IntegerType *BypassType, 00189 bool UseDivOp, 00190 bool UseSignedOp, 00191 DivCacheTy &PerBBDivCache) { 00192 // Get instruction operands 00193 Instruction *Instr = J; 00194 DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); 00195 DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); 00196 00197 if (CacheI == PerBBDivCache.end()) { 00198 // If previous instance does not exist, insert fast div 00199 return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, 00200 PerBBDivCache); 00201 } 00202 00203 // Replace operation value with previously generated phi node 00204 DivPhiNodes &Value = CacheI->second; 00205 if (UseDivOp) { 00206 // Replace all uses of div instruction with quotient phi node 00207 J->replaceAllUsesWith(Value.Quotient); 00208 } else { 00209 // Replace all uses of rem instruction with remainder phi node 00210 J->replaceAllUsesWith(Value.Remainder); 00211 } 00212 00213 // Advance to next operation 00214 ++J; 00215 00216 // Remove redundant operation 00217 Instr->eraseFromParent(); 00218 return true; 00219 } 00220 00221 // bypassSlowDivision - This optimization identifies DIV instructions that can 00222 // be profitably bypassed and carried out with a shorter, faster divide. 00223 bool llvm::bypassSlowDivision(Function &F, 00224 Function::iterator &I, 00225 const DenseMap<unsigned int, unsigned int> &BypassWidths) { 00226 DivCacheTy DivCache; 00227 00228 bool MadeChange = false; 00229 for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { 00230 00231 // Get instruction details 00232 unsigned Opcode = J->getOpcode(); 00233 bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; 00234 bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; 00235 bool UseSignedOp = Opcode == Instruction::SDiv || 00236 Opcode == Instruction::SRem; 00237 00238 // Only optimize div or rem ops 00239 if (!UseDivOp && !UseRemOp) 00240 continue; 00241 00242 // Skip division on vector types, only optimize integer instructions 00243 if (!J->getType()->isIntegerTy()) 00244 continue; 00245 00246 // Get bitwidth of div/rem instruction 00247 IntegerType *T = cast<IntegerType>(J->getType()); 00248 unsigned int bitwidth = T->getBitWidth(); 00249 00250 // Continue if bitwidth is not bypassed 00251 DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); 00252 if (BI == BypassWidths.end()) 00253 continue; 00254 00255 // Get type for div/rem instruction with bypass bitwidth 00256 IntegerType *BT = IntegerType::get(J->getContext(), BI->second); 00257 00258 MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, 00259 UseSignedOp, DivCache); 00260 } 00261 00262 return MadeChange; 00263 }