LLVM API Documentation

MachineCombiner.cpp
Go to the documentation of this file.
00001 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // The machine combiner pass uses machine trace metrics to ensure the combined
00011 // instructions does not lengthen the critical path or the resource depth.
00012 //===----------------------------------------------------------------------===//
00013 #define DEBUG_TYPE "machine-combiner"
00014 
00015 #include "llvm/ADT/Statistic.h"
00016 #include "llvm/ADT/DenseMap.h"
00017 #include "llvm/CodeGen/MachineDominators.h"
00018 #include "llvm/CodeGen/MachineFunction.h"
00019 #include "llvm/CodeGen/MachineFunctionPass.h"
00020 #include "llvm/CodeGen/MachineInstrBuilder.h"
00021 #include "llvm/CodeGen/MachineLoopInfo.h"
00022 #include "llvm/CodeGen/MachineRegisterInfo.h"
00023 #include "llvm/CodeGen/MachineTraceMetrics.h"
00024 #include "llvm/CodeGen/Passes.h"
00025 #include "llvm/CodeGen/TargetSchedule.h"
00026 #include "llvm/Support/CommandLine.h"
00027 #include "llvm/Support/Debug.h"
00028 #include "llvm/Support/raw_ostream.h"
00029 #include "llvm/Target/TargetInstrInfo.h"
00030 #include "llvm/Target/TargetRegisterInfo.h"
00031 #include "llvm/Target/TargetSubtargetInfo.h"
00032 
00033 using namespace llvm;
00034 
00035 STATISTIC(NumInstCombined, "Number of machineinst combined");
00036 
00037 namespace {
00038 class MachineCombiner : public MachineFunctionPass {
00039   const TargetInstrInfo *TII;
00040   const TargetRegisterInfo *TRI;
00041   MCSchedModel SchedModel;
00042   MachineRegisterInfo *MRI;
00043   MachineTraceMetrics *Traces;
00044   MachineTraceMetrics::Ensemble *MinInstr;
00045 
00046   TargetSchedModel TSchedModel;
00047 
00048   /// OptSize - True if optimizing for code size.
00049   bool OptSize;
00050 
00051 public:
00052   static char ID;
00053   MachineCombiner() : MachineFunctionPass(ID) {
00054     initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
00055   }
00056   void getAnalysisUsage(AnalysisUsage &AU) const override;
00057   bool runOnMachineFunction(MachineFunction &MF) override;
00058   const char *getPassName() const override { return "Machine InstCombiner"; }
00059 
00060 private:
00061   bool doSubstitute(unsigned NewSize, unsigned OldSize);
00062   bool combineInstructions(MachineBasicBlock *);
00063   MachineInstr *getOperandDef(const MachineOperand &MO);
00064   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
00065                     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
00066                     MachineTraceMetrics::Trace BlockTrace);
00067   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
00068                       MachineTraceMetrics::Trace BlockTrace);
00069   bool
00070   preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
00071                            MachineTraceMetrics::Trace BlockTrace,
00072                            SmallVectorImpl<MachineInstr *> &InsInstrs,
00073                            DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
00074   bool preservesResourceLen(MachineBasicBlock *MBB,
00075                             MachineTraceMetrics::Trace BlockTrace,
00076                             SmallVectorImpl<MachineInstr *> &InsInstrs,
00077                             SmallVectorImpl<MachineInstr *> &DelInstrs);
00078   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
00079                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
00080 };
00081 }
00082 
00083 char MachineCombiner::ID = 0;
00084 char &llvm::MachineCombinerID = MachineCombiner::ID;
00085 
00086 INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
00087                       "Machine InstCombiner", false, false)
00088 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
00089 INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
00090                     false, false)
00091 
00092 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
00093   AU.setPreservesCFG();
00094   AU.addPreserved<MachineDominatorTree>();
00095   AU.addPreserved<MachineLoopInfo>();
00096   AU.addRequired<MachineTraceMetrics>();
00097   AU.addPreserved<MachineTraceMetrics>();
00098   MachineFunctionPass::getAnalysisUsage(AU);
00099 }
00100 
00101 MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
00102   MachineInstr *DefInstr = nullptr;
00103   // We need a virtual register definition.
00104   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
00105     DefInstr = MRI->getUniqueVRegDef(MO.getReg());
00106   // PHI's have no depth etc.
00107   if (DefInstr && DefInstr->isPHI())
00108     DefInstr = nullptr;
00109   return DefInstr;
00110 }
00111 
00112 /// getDepth - Computes depth of instructions in vector \InsInstr.
00113 ///
00114 /// \param InsInstrs is a vector of machine instructions
00115 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
00116 /// of defining machine instruction in \p InsInstrs
00117 /// \param BlockTrace is a trace of machine instructions
00118 ///
00119 /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
00120 unsigned
00121 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
00122                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
00123                           MachineTraceMetrics::Trace BlockTrace) {
00124 
00125   SmallVector<unsigned, 16> InstrDepth;
00126   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
00127 
00128   // Foreach instruction in in the new sequence compute the depth based on the
00129   // operands. Use the trace information when possible. For new operands which
00130   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
00131   for (auto *InstrPtr : InsInstrs) { // for each Use
00132     unsigned IDepth = 0;
00133     DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
00134     for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
00135       const MachineOperand &MO = InstrPtr->getOperand(i);
00136       // Check for virtual register operand.
00137       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
00138         continue;
00139       if (!MO.isUse())
00140         continue;
00141       unsigned DepthOp = 0;
00142       unsigned LatencyOp = 0;
00143       DenseMap<unsigned, unsigned>::iterator II =
00144           InstrIdxForVirtReg.find(MO.getReg());
00145       if (II != InstrIdxForVirtReg.end()) {
00146         // Operand is new virtual register not in trace
00147         assert(II->second < InstrDepth.size() && "Bad Index");
00148         MachineInstr *DefInstr = InsInstrs[II->second];
00149         assert(DefInstr &&
00150                "There must be a definition for a new virtual register");
00151         DepthOp = InstrDepth[II->second];
00152         LatencyOp = TSchedModel.computeOperandLatency(
00153             DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
00154             InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
00155       } else {
00156         MachineInstr *DefInstr = getOperandDef(MO);
00157         if (DefInstr) {
00158           DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
00159           LatencyOp = TSchedModel.computeOperandLatency(
00160               DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
00161               InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
00162         }
00163       }
00164       IDepth = std::max(IDepth, DepthOp + LatencyOp);
00165     }
00166     InstrDepth.push_back(IDepth);
00167   }
00168   unsigned NewRootIdx = InsInstrs.size() - 1;
00169   return InstrDepth[NewRootIdx];
00170 }
00171 
00172 /// getLatency - Computes instruction latency as max of latency of defined
00173 /// operands
00174 ///
00175 /// \param Root is a machine instruction that could be replaced by NewRoot.
00176 /// It is used to compute a more accurate latency information for NewRoot in
00177 /// case there is a dependent instruction in the same trace (\p BlockTrace)
00178 /// \param NewRoot is the instruction for which the latency is computed
00179 /// \param BlockTrace is a trace of machine instructions
00180 ///
00181 /// \returns Latency of \p NewRoot
00182 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
00183                                      MachineTraceMetrics::Trace BlockTrace) {
00184 
00185   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
00186 
00187   // Check each definition in NewRoot and compute the latency
00188   unsigned NewRootLatency = 0;
00189 
00190   for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
00191     const MachineOperand &MO = NewRoot->getOperand(i);
00192     // Check for virtual register operand.
00193     if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
00194       continue;
00195     if (!MO.isDef())
00196       continue;
00197     // Get the first instruction that uses MO
00198     MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
00199     RI++;
00200     MachineInstr *UseMO = RI->getParent();
00201     unsigned LatencyOp = 0;
00202     if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
00203       LatencyOp = TSchedModel.computeOperandLatency(
00204           NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
00205           UseMO->findRegisterUseOperandIdx(MO.getReg()));
00206     } else {
00207       LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
00208     }
00209     NewRootLatency = std::max(NewRootLatency, LatencyOp);
00210   }
00211   return NewRootLatency;
00212 }
00213 
00214 /// preservesCriticalPathlen - True when the new instruction sequence does not
00215 /// lengthen the critical path. The DAGCombine code sequence ends in MI
00216 /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
00217 /// necessary condition for the new sequence to replace the old sequence is that
00218 /// is cannot lengthen the critical path. This is decided by the formula
00219 /// (NewRootDepth + NewRootLatency) <=  (RootDepth + RootLatency + RootSlack)).
00220 /// The slack is the number of cycles Root can be delayed before the critical
00221 /// patch becomes longer.
00222 bool MachineCombiner::preservesCriticalPathLen(
00223     MachineBasicBlock *MBB, MachineInstr *Root,
00224     MachineTraceMetrics::Trace BlockTrace,
00225     SmallVectorImpl<MachineInstr *> &InsInstrs,
00226     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
00227 
00228   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
00229   // NewRoot is the last instruction in the \p InsInstrs vector
00230   // Get depth and latency of NewRoot
00231   unsigned NewRootIdx = InsInstrs.size() - 1;
00232   MachineInstr *NewRoot = InsInstrs[NewRootIdx];
00233   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
00234   unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
00235 
00236   // Get depth, latency and slack of Root
00237   unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
00238   unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
00239   unsigned RootSlack = BlockTrace.getInstrSlack(Root);
00240 
00241   DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
00242         dbgs() << " NewRootDepth: " << NewRootDepth
00243                << " NewRootLatency: " << NewRootLatency << "\n";
00244         dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
00245                << " RootSlack: " << RootSlack << "\n";
00246         dbgs() << " NewRootDepth + NewRootLatency "
00247                << NewRootDepth + NewRootLatency << "\n";
00248         dbgs() << " RootDepth + RootLatency + RootSlack "
00249                << RootDepth + RootLatency + RootSlack << "\n";);
00250 
00251   /// True when the new sequence does not lenghten the critical path.
00252   return ((NewRootDepth + NewRootLatency) <=
00253           (RootDepth + RootLatency + RootSlack));
00254 }
00255 
00256 /// helper routine to convert instructions into SC
00257 void MachineCombiner::instr2instrSC(
00258     SmallVectorImpl<MachineInstr *> &Instrs,
00259     SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
00260   for (auto *InstrPtr : Instrs) {
00261     unsigned Opc = InstrPtr->getOpcode();
00262     unsigned Idx = TII->get(Opc).getSchedClass();
00263     const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
00264     InstrsSC.push_back(SC);
00265   }
00266 }
00267 /// preservesResourceLen - True when the new instructions do not increase
00268 /// resource length
00269 bool MachineCombiner::preservesResourceLen(
00270     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
00271     SmallVectorImpl<MachineInstr *> &InsInstrs,
00272     SmallVectorImpl<MachineInstr *> &DelInstrs) {
00273 
00274   // Compute current resource length
00275 
00276   //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
00277   SmallVector <const MachineBasicBlock *, 1> MBBarr;
00278   MBBarr.push_back(MBB);
00279   unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
00280 
00281   // Deal with SC rather than Instructions.
00282   SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
00283   SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
00284 
00285   instr2instrSC(InsInstrs, InsInstrsSC);
00286   instr2instrSC(DelInstrs, DelInstrsSC);
00287 
00288   ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
00289   ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
00290 
00291   // Compute new resource length
00292   unsigned ResLenAfterCombine =
00293       BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
00294 
00295   DEBUG(dbgs() << "RESOURCE DATA: \n";
00296         dbgs() << " resource len before: " << ResLenBeforeCombine
00297                << " after: " << ResLenAfterCombine << "\n";);
00298 
00299   return ResLenAfterCombine <= ResLenBeforeCombine;
00300 }
00301 
00302 /// \returns true when new instruction sequence should be generated
00303 /// independent if it lenghtens critical path or not
00304 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
00305   if (OptSize && (NewSize < OldSize))
00306     return true;
00307   if (!TSchedModel.hasInstrSchedModel())
00308     return true;
00309   return false;
00310 }
00311 
00312 /// combineInstructions - substitute a slow code sequence with a faster one by
00313 /// evaluating instruction combining pattern.
00314 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
00315 /// combining based on machine trace metrics. Only combine a sequence of
00316 /// instructions  when this neither lengthens the critical path nor increases
00317 /// resource pressure. When optimizing for codesize always combine when the new
00318 /// sequence is shorter.
00319 bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
00320   bool Changed = false;
00321   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
00322 
00323   auto BlockIter = MBB->begin();
00324 
00325   while (BlockIter != MBB->end()) {
00326     auto &MI = *BlockIter++;
00327 
00328     DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
00329     SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
00330     // The motivating example is:
00331     //
00332     //     MUL  Other        MUL_op1 MUL_op2  Other
00333     //      \    /               \      |    /
00334     //      ADD/SUB      =>        MADD/MSUB
00335     //      (=Root)                (=NewRoot)
00336 
00337     // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
00338     // usually beneficial for code size it unfortunately can hurt performance
00339     // when the ADD is on the critical path, but the MUL is not. With the
00340     // substitution the MUL becomes part of the critical path (in form of the
00341     // MADD) and can lengthen it on architectures where the MADD latency is
00342     // longer than the ADD latency.
00343     //
00344     // For each instruction we check if it can be the root of a combiner
00345     // pattern. Then for each pattern the new code sequence in form of MI is
00346     // generated and evaluated. When the efficiency criteria (don't lengthen
00347     // critical path, don't use more resources) is met the new sequence gets
00348     // hooked up into the basic block before the old sequence is removed.
00349     //
00350     // The algorithm does not try to evaluate all patterns and pick the best.
00351     // This is only an artificial restriction though. In practice there is
00352     // mostly one pattern and hasPattern() can order patterns based on an
00353     // internal cost heuristic.
00354 
00355     if (TII->hasPattern(MI, Pattern)) {
00356       for (auto P : Pattern) {
00357         SmallVector<MachineInstr *, 16> InsInstrs;
00358         SmallVector<MachineInstr *, 16> DelInstrs;
00359         DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
00360         if (!MinInstr)
00361           MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
00362         MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
00363         Traces->verifyAnalysis();
00364         TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
00365                                         InstrIdxForVirtReg);
00366         // Found pattern, but did not generate alternative sequence.
00367         // This can happen e.g. when an immediate could not be materialized
00368         // in a single instruction.
00369         if (!InsInstrs.size())
00370           continue;
00371         // Substitute when we optimize for codesize and the new sequence has
00372         // fewer instructions OR
00373         // the new sequence neither lenghten the critical path nor increases
00374         // resource pressure.
00375         if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
00376             (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
00377                                       InstrIdxForVirtReg) &&
00378              preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
00379           for (auto *InstrPtr : InsInstrs)
00380             MBB->insert((MachineBasicBlock::iterator) & MI,
00381                         (MachineInstr *)InstrPtr);
00382           for (auto *InstrPtr : DelInstrs)
00383             InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
00384 
00385           Changed = true;
00386           ++NumInstCombined;
00387 
00388           Traces->invalidate(MBB);
00389           Traces->verifyAnalysis();
00390           // Eagerly stop after the first pattern fired
00391           break;
00392         } else {
00393           // Cleanup instructions of the alternative code sequence. There is no
00394           // use for them.
00395           for (auto *InstrPtr : InsInstrs) {
00396             MachineFunction *MF = MBB->getParent();
00397             MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
00398           }
00399         }
00400         InstrIdxForVirtReg.clear();
00401       }
00402     }
00403   }
00404 
00405   return Changed;
00406 }
00407 
00408 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
00409   const TargetSubtargetInfo &STI =
00410       MF.getTarget().getSubtarget<TargetSubtargetInfo>();
00411   TII = STI.getInstrInfo();
00412   TRI = STI.getRegisterInfo();
00413   SchedModel = STI.getSchedModel();
00414   TSchedModel.init(SchedModel, &STI, TII);
00415   MRI = &MF.getRegInfo();
00416   Traces = &getAnalysis<MachineTraceMetrics>();
00417   MinInstr = 0;
00418 
00419   OptSize = MF.getFunction()->getAttributes().hasAttribute(
00420       AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
00421 
00422   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
00423   if (!TII->useMachineCombiner()) {
00424     DEBUG(dbgs() << "  Skipping pass: Target does not support machine combiner\n");
00425     return false;
00426   }
00427 
00428   bool Changed = false;
00429 
00430   // Try to combine instructions.
00431   for (auto &MBB : MF)
00432     Changed |= combineInstructions(&MBB);
00433 
00434   return Changed;
00435 }