LLVM API Documentation
00001 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // The machine combiner pass uses machine trace metrics to ensure the combined 00011 // instructions does not lengthen the critical path or the resource depth. 00012 //===----------------------------------------------------------------------===// 00013 #define DEBUG_TYPE "machine-combiner" 00014 00015 #include "llvm/ADT/Statistic.h" 00016 #include "llvm/ADT/DenseMap.h" 00017 #include "llvm/CodeGen/MachineDominators.h" 00018 #include "llvm/CodeGen/MachineFunction.h" 00019 #include "llvm/CodeGen/MachineFunctionPass.h" 00020 #include "llvm/CodeGen/MachineInstrBuilder.h" 00021 #include "llvm/CodeGen/MachineLoopInfo.h" 00022 #include "llvm/CodeGen/MachineRegisterInfo.h" 00023 #include "llvm/CodeGen/MachineTraceMetrics.h" 00024 #include "llvm/CodeGen/Passes.h" 00025 #include "llvm/CodeGen/TargetSchedule.h" 00026 #include "llvm/Support/CommandLine.h" 00027 #include "llvm/Support/Debug.h" 00028 #include "llvm/Support/raw_ostream.h" 00029 #include "llvm/Target/TargetInstrInfo.h" 00030 #include "llvm/Target/TargetRegisterInfo.h" 00031 #include "llvm/Target/TargetSubtargetInfo.h" 00032 00033 using namespace llvm; 00034 00035 STATISTIC(NumInstCombined, "Number of machineinst combined"); 00036 00037 namespace { 00038 class MachineCombiner : public MachineFunctionPass { 00039 const TargetInstrInfo *TII; 00040 const TargetRegisterInfo *TRI; 00041 MCSchedModel SchedModel; 00042 MachineRegisterInfo *MRI; 00043 MachineTraceMetrics *Traces; 00044 MachineTraceMetrics::Ensemble *MinInstr; 00045 00046 TargetSchedModel TSchedModel; 00047 00048 /// OptSize - True if optimizing for code size. 00049 bool OptSize; 00050 00051 public: 00052 static char ID; 00053 MachineCombiner() : MachineFunctionPass(ID) { 00054 initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); 00055 } 00056 void getAnalysisUsage(AnalysisUsage &AU) const override; 00057 bool runOnMachineFunction(MachineFunction &MF) override; 00058 const char *getPassName() const override { return "Machine InstCombiner"; } 00059 00060 private: 00061 bool doSubstitute(unsigned NewSize, unsigned OldSize); 00062 bool combineInstructions(MachineBasicBlock *); 00063 MachineInstr *getOperandDef(const MachineOperand &MO); 00064 unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, 00065 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 00066 MachineTraceMetrics::Trace BlockTrace); 00067 unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, 00068 MachineTraceMetrics::Trace BlockTrace); 00069 bool 00070 preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, 00071 MachineTraceMetrics::Trace BlockTrace, 00072 SmallVectorImpl<MachineInstr *> &InsInstrs, 00073 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg); 00074 bool preservesResourceLen(MachineBasicBlock *MBB, 00075 MachineTraceMetrics::Trace BlockTrace, 00076 SmallVectorImpl<MachineInstr *> &InsInstrs, 00077 SmallVectorImpl<MachineInstr *> &DelInstrs); 00078 void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, 00079 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); 00080 }; 00081 } 00082 00083 char MachineCombiner::ID = 0; 00084 char &llvm::MachineCombinerID = MachineCombiner::ID; 00085 00086 INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", 00087 "Machine InstCombiner", false, false) 00088 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) 00089 INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", 00090 false, false) 00091 00092 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 00093 AU.setPreservesCFG(); 00094 AU.addPreserved<MachineDominatorTree>(); 00095 AU.addPreserved<MachineLoopInfo>(); 00096 AU.addRequired<MachineTraceMetrics>(); 00097 AU.addPreserved<MachineTraceMetrics>(); 00098 MachineFunctionPass::getAnalysisUsage(AU); 00099 } 00100 00101 MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { 00102 MachineInstr *DefInstr = nullptr; 00103 // We need a virtual register definition. 00104 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) 00105 DefInstr = MRI->getUniqueVRegDef(MO.getReg()); 00106 // PHI's have no depth etc. 00107 if (DefInstr && DefInstr->isPHI()) 00108 DefInstr = nullptr; 00109 return DefInstr; 00110 } 00111 00112 /// getDepth - Computes depth of instructions in vector \InsInstr. 00113 /// 00114 /// \param InsInstrs is a vector of machine instructions 00115 /// \param InstrIdxForVirtReg is a dense map of virtual register to index 00116 /// of defining machine instruction in \p InsInstrs 00117 /// \param BlockTrace is a trace of machine instructions 00118 /// 00119 /// \returns Depth of last instruction in \InsInstrs ("NewRoot") 00120 unsigned 00121 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, 00122 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 00123 MachineTraceMetrics::Trace BlockTrace) { 00124 00125 SmallVector<unsigned, 16> InstrDepth; 00126 assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); 00127 00128 // Foreach instruction in in the new sequence compute the depth based on the 00129 // operands. Use the trace information when possible. For new operands which 00130 // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth 00131 for (auto *InstrPtr : InsInstrs) { // for each Use 00132 unsigned IDepth = 0; 00133 DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); 00134 for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) { 00135 const MachineOperand &MO = InstrPtr->getOperand(i); 00136 // Check for virtual register operand. 00137 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) 00138 continue; 00139 if (!MO.isUse()) 00140 continue; 00141 unsigned DepthOp = 0; 00142 unsigned LatencyOp = 0; 00143 DenseMap<unsigned, unsigned>::iterator II = 00144 InstrIdxForVirtReg.find(MO.getReg()); 00145 if (II != InstrIdxForVirtReg.end()) { 00146 // Operand is new virtual register not in trace 00147 assert(II->second < InstrDepth.size() && "Bad Index"); 00148 MachineInstr *DefInstr = InsInstrs[II->second]; 00149 assert(DefInstr && 00150 "There must be a definition for a new virtual register"); 00151 DepthOp = InstrDepth[II->second]; 00152 LatencyOp = TSchedModel.computeOperandLatency( 00153 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), 00154 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); 00155 } else { 00156 MachineInstr *DefInstr = getOperandDef(MO); 00157 if (DefInstr) { 00158 DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; 00159 LatencyOp = TSchedModel.computeOperandLatency( 00160 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), 00161 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); 00162 } 00163 } 00164 IDepth = std::max(IDepth, DepthOp + LatencyOp); 00165 } 00166 InstrDepth.push_back(IDepth); 00167 } 00168 unsigned NewRootIdx = InsInstrs.size() - 1; 00169 return InstrDepth[NewRootIdx]; 00170 } 00171 00172 /// getLatency - Computes instruction latency as max of latency of defined 00173 /// operands 00174 /// 00175 /// \param Root is a machine instruction that could be replaced by NewRoot. 00176 /// It is used to compute a more accurate latency information for NewRoot in 00177 /// case there is a dependent instruction in the same trace (\p BlockTrace) 00178 /// \param NewRoot is the instruction for which the latency is computed 00179 /// \param BlockTrace is a trace of machine instructions 00180 /// 00181 /// \returns Latency of \p NewRoot 00182 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, 00183 MachineTraceMetrics::Trace BlockTrace) { 00184 00185 assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); 00186 00187 // Check each definition in NewRoot and compute the latency 00188 unsigned NewRootLatency = 0; 00189 00190 for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) { 00191 const MachineOperand &MO = NewRoot->getOperand(i); 00192 // Check for virtual register operand. 00193 if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) 00194 continue; 00195 if (!MO.isDef()) 00196 continue; 00197 // Get the first instruction that uses MO 00198 MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); 00199 RI++; 00200 MachineInstr *UseMO = RI->getParent(); 00201 unsigned LatencyOp = 0; 00202 if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { 00203 LatencyOp = TSchedModel.computeOperandLatency( 00204 NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, 00205 UseMO->findRegisterUseOperandIdx(MO.getReg())); 00206 } else { 00207 LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); 00208 } 00209 NewRootLatency = std::max(NewRootLatency, LatencyOp); 00210 } 00211 return NewRootLatency; 00212 } 00213 00214 /// preservesCriticalPathlen - True when the new instruction sequence does not 00215 /// lengthen the critical path. The DAGCombine code sequence ends in MI 00216 /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A 00217 /// necessary condition for the new sequence to replace the old sequence is that 00218 /// is cannot lengthen the critical path. This is decided by the formula 00219 /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). 00220 /// The slack is the number of cycles Root can be delayed before the critical 00221 /// patch becomes longer. 00222 bool MachineCombiner::preservesCriticalPathLen( 00223 MachineBasicBlock *MBB, MachineInstr *Root, 00224 MachineTraceMetrics::Trace BlockTrace, 00225 SmallVectorImpl<MachineInstr *> &InsInstrs, 00226 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) { 00227 00228 assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); 00229 // NewRoot is the last instruction in the \p InsInstrs vector 00230 // Get depth and latency of NewRoot 00231 unsigned NewRootIdx = InsInstrs.size() - 1; 00232 MachineInstr *NewRoot = InsInstrs[NewRootIdx]; 00233 unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); 00234 unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); 00235 00236 // Get depth, latency and slack of Root 00237 unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; 00238 unsigned RootLatency = TSchedModel.computeInstrLatency(Root); 00239 unsigned RootSlack = BlockTrace.getInstrSlack(Root); 00240 00241 DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; 00242 dbgs() << " NewRootDepth: " << NewRootDepth 00243 << " NewRootLatency: " << NewRootLatency << "\n"; 00244 dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency 00245 << " RootSlack: " << RootSlack << "\n"; 00246 dbgs() << " NewRootDepth + NewRootLatency " 00247 << NewRootDepth + NewRootLatency << "\n"; 00248 dbgs() << " RootDepth + RootLatency + RootSlack " 00249 << RootDepth + RootLatency + RootSlack << "\n";); 00250 00251 /// True when the new sequence does not lenghten the critical path. 00252 return ((NewRootDepth + NewRootLatency) <= 00253 (RootDepth + RootLatency + RootSlack)); 00254 } 00255 00256 /// helper routine to convert instructions into SC 00257 void MachineCombiner::instr2instrSC( 00258 SmallVectorImpl<MachineInstr *> &Instrs, 00259 SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) { 00260 for (auto *InstrPtr : Instrs) { 00261 unsigned Opc = InstrPtr->getOpcode(); 00262 unsigned Idx = TII->get(Opc).getSchedClass(); 00263 const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); 00264 InstrsSC.push_back(SC); 00265 } 00266 } 00267 /// preservesResourceLen - True when the new instructions do not increase 00268 /// resource length 00269 bool MachineCombiner::preservesResourceLen( 00270 MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, 00271 SmallVectorImpl<MachineInstr *> &InsInstrs, 00272 SmallVectorImpl<MachineInstr *> &DelInstrs) { 00273 00274 // Compute current resource length 00275 00276 //ArrayRef<const MachineBasicBlock *> MBBarr(MBB); 00277 SmallVector <const MachineBasicBlock *, 1> MBBarr; 00278 MBBarr.push_back(MBB); 00279 unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); 00280 00281 // Deal with SC rather than Instructions. 00282 SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC; 00283 SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC; 00284 00285 instr2instrSC(InsInstrs, InsInstrsSC); 00286 instr2instrSC(DelInstrs, DelInstrsSC); 00287 00288 ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC); 00289 ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC); 00290 00291 // Compute new resource length 00292 unsigned ResLenAfterCombine = 00293 BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); 00294 00295 DEBUG(dbgs() << "RESOURCE DATA: \n"; 00296 dbgs() << " resource len before: " << ResLenBeforeCombine 00297 << " after: " << ResLenAfterCombine << "\n";); 00298 00299 return ResLenAfterCombine <= ResLenBeforeCombine; 00300 } 00301 00302 /// \returns true when new instruction sequence should be generated 00303 /// independent if it lenghtens critical path or not 00304 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { 00305 if (OptSize && (NewSize < OldSize)) 00306 return true; 00307 if (!TSchedModel.hasInstrSchedModel()) 00308 return true; 00309 return false; 00310 } 00311 00312 /// combineInstructions - substitute a slow code sequence with a faster one by 00313 /// evaluating instruction combining pattern. 00314 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction 00315 /// combining based on machine trace metrics. Only combine a sequence of 00316 /// instructions when this neither lengthens the critical path nor increases 00317 /// resource pressure. When optimizing for codesize always combine when the new 00318 /// sequence is shorter. 00319 bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { 00320 bool Changed = false; 00321 DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); 00322 00323 auto BlockIter = MBB->begin(); 00324 00325 while (BlockIter != MBB->end()) { 00326 auto &MI = *BlockIter++; 00327 00328 DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); 00329 SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern; 00330 // The motivating example is: 00331 // 00332 // MUL Other MUL_op1 MUL_op2 Other 00333 // \ / \ | / 00334 // ADD/SUB => MADD/MSUB 00335 // (=Root) (=NewRoot) 00336 00337 // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is 00338 // usually beneficial for code size it unfortunately can hurt performance 00339 // when the ADD is on the critical path, but the MUL is not. With the 00340 // substitution the MUL becomes part of the critical path (in form of the 00341 // MADD) and can lengthen it on architectures where the MADD latency is 00342 // longer than the ADD latency. 00343 // 00344 // For each instruction we check if it can be the root of a combiner 00345 // pattern. Then for each pattern the new code sequence in form of MI is 00346 // generated and evaluated. When the efficiency criteria (don't lengthen 00347 // critical path, don't use more resources) is met the new sequence gets 00348 // hooked up into the basic block before the old sequence is removed. 00349 // 00350 // The algorithm does not try to evaluate all patterns and pick the best. 00351 // This is only an artificial restriction though. In practice there is 00352 // mostly one pattern and hasPattern() can order patterns based on an 00353 // internal cost heuristic. 00354 00355 if (TII->hasPattern(MI, Pattern)) { 00356 for (auto P : Pattern) { 00357 SmallVector<MachineInstr *, 16> InsInstrs; 00358 SmallVector<MachineInstr *, 16> DelInstrs; 00359 DenseMap<unsigned, unsigned> InstrIdxForVirtReg; 00360 if (!MinInstr) 00361 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 00362 MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); 00363 Traces->verifyAnalysis(); 00364 TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, 00365 InstrIdxForVirtReg); 00366 // Found pattern, but did not generate alternative sequence. 00367 // This can happen e.g. when an immediate could not be materialized 00368 // in a single instruction. 00369 if (!InsInstrs.size()) 00370 continue; 00371 // Substitute when we optimize for codesize and the new sequence has 00372 // fewer instructions OR 00373 // the new sequence neither lenghten the critical path nor increases 00374 // resource pressure. 00375 if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || 00376 (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, 00377 InstrIdxForVirtReg) && 00378 preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { 00379 for (auto *InstrPtr : InsInstrs) 00380 MBB->insert((MachineBasicBlock::iterator) & MI, 00381 (MachineInstr *)InstrPtr); 00382 for (auto *InstrPtr : DelInstrs) 00383 InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); 00384 00385 Changed = true; 00386 ++NumInstCombined; 00387 00388 Traces->invalidate(MBB); 00389 Traces->verifyAnalysis(); 00390 // Eagerly stop after the first pattern fired 00391 break; 00392 } else { 00393 // Cleanup instructions of the alternative code sequence. There is no 00394 // use for them. 00395 for (auto *InstrPtr : InsInstrs) { 00396 MachineFunction *MF = MBB->getParent(); 00397 MF->DeleteMachineInstr((MachineInstr *)InstrPtr); 00398 } 00399 } 00400 InstrIdxForVirtReg.clear(); 00401 } 00402 } 00403 } 00404 00405 return Changed; 00406 } 00407 00408 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { 00409 const TargetSubtargetInfo &STI = 00410 MF.getTarget().getSubtarget<TargetSubtargetInfo>(); 00411 TII = STI.getInstrInfo(); 00412 TRI = STI.getRegisterInfo(); 00413 SchedModel = STI.getSchedModel(); 00414 TSchedModel.init(SchedModel, &STI, TII); 00415 MRI = &MF.getRegInfo(); 00416 Traces = &getAnalysis<MachineTraceMetrics>(); 00417 MinInstr = 0; 00418 00419 OptSize = MF.getFunction()->getAttributes().hasAttribute( 00420 AttributeSet::FunctionIndex, Attribute::OptimizeForSize); 00421 00422 DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); 00423 if (!TII->useMachineCombiner()) { 00424 DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); 00425 return false; 00426 } 00427 00428 bool Changed = false; 00429 00430 // Try to combine instructions. 00431 for (auto &MBB : MF) 00432 Changed |= combineInstructions(&MBB); 00433 00434 return Changed; 00435 }