LLVM API Documentation

MachineTraceMetrics.cpp
Go to the documentation of this file.
00001 //===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 #include "llvm/CodeGen/MachineTraceMetrics.h"
00011 #include "llvm/ADT/PostOrderIterator.h"
00012 #include "llvm/ADT/SparseSet.h"
00013 #include "llvm/CodeGen/MachineBasicBlock.h"
00014 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
00015 #include "llvm/CodeGen/MachineLoopInfo.h"
00016 #include "llvm/CodeGen/MachineRegisterInfo.h"
00017 #include "llvm/CodeGen/Passes.h"
00018 #include "llvm/MC/MCSubtargetInfo.h"
00019 #include "llvm/Support/Debug.h"
00020 #include "llvm/Support/Format.h"
00021 #include "llvm/Support/raw_ostream.h"
00022 #include "llvm/Target/TargetInstrInfo.h"
00023 #include "llvm/Target/TargetRegisterInfo.h"
00024 #include "llvm/Target/TargetSubtargetInfo.h"
00025 
00026 using namespace llvm;
00027 
00028 #define DEBUG_TYPE "machine-trace-metrics"
00029 
00030 char MachineTraceMetrics::ID = 0;
00031 char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
00032 
00033 INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
00034                   "machine-trace-metrics", "Machine Trace Metrics", false, true)
00035 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
00036 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
00037 INITIALIZE_PASS_END(MachineTraceMetrics,
00038                   "machine-trace-metrics", "Machine Trace Metrics", false, true)
00039 
00040 MachineTraceMetrics::MachineTraceMetrics()
00041   : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
00042     MRI(nullptr), Loops(nullptr) {
00043   std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
00044 }
00045 
00046 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
00047   AU.setPreservesAll();
00048   AU.addRequired<MachineBranchProbabilityInfo>();
00049   AU.addRequired<MachineLoopInfo>();
00050   MachineFunctionPass::getAnalysisUsage(AU);
00051 }
00052 
00053 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
00054   MF = &Func;
00055   TII = MF->getSubtarget().getInstrInfo();
00056   TRI = MF->getSubtarget().getRegisterInfo();
00057   MRI = &MF->getRegInfo();
00058   Loops = &getAnalysis<MachineLoopInfo>();
00059   const TargetSubtargetInfo &ST =
00060     MF->getTarget().getSubtarget<TargetSubtargetInfo>();
00061   SchedModel.init(ST.getSchedModel(), &ST, TII);
00062   BlockInfo.resize(MF->getNumBlockIDs());
00063   ProcResourceCycles.resize(MF->getNumBlockIDs() *
00064                             SchedModel.getNumProcResourceKinds());
00065   return false;
00066 }
00067 
00068 void MachineTraceMetrics::releaseMemory() {
00069   MF = nullptr;
00070   BlockInfo.clear();
00071   for (unsigned i = 0; i != TS_NumStrategies; ++i) {
00072     delete Ensembles[i];
00073     Ensembles[i] = nullptr;
00074   }
00075 }
00076 
00077 //===----------------------------------------------------------------------===//
00078 //                          Fixed block information
00079 //===----------------------------------------------------------------------===//
00080 //
00081 // The number of instructions in a basic block and the CPU resources used by
00082 // those instructions don't depend on any given trace strategy.
00083 
00084 /// Compute the resource usage in basic block MBB.
00085 const MachineTraceMetrics::FixedBlockInfo*
00086 MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
00087   assert(MBB && "No basic block");
00088   FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
00089   if (FBI->hasResources())
00090     return FBI;
00091 
00092   // Compute resource usage in the block.
00093   FBI->HasCalls = false;
00094   unsigned InstrCount = 0;
00095 
00096   // Add up per-processor resource cycles as well.
00097   unsigned PRKinds = SchedModel.getNumProcResourceKinds();
00098   SmallVector<unsigned, 32> PRCycles(PRKinds);
00099 
00100   for (const auto &MI : *MBB) {
00101     if (MI.isTransient())
00102       continue;
00103     ++InstrCount;
00104     if (MI.isCall())
00105       FBI->HasCalls = true;
00106 
00107     // Count processor resources used.
00108     if (!SchedModel.hasInstrSchedModel())
00109       continue;
00110     const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
00111     if (!SC->isValid())
00112       continue;
00113 
00114     for (TargetSchedModel::ProcResIter
00115          PI = SchedModel.getWriteProcResBegin(SC),
00116          PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
00117       assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
00118       PRCycles[PI->ProcResourceIdx] += PI->Cycles;
00119     }
00120   }
00121   FBI->InstrCount = InstrCount;
00122 
00123   // Scale the resource cycles so they are comparable.
00124   unsigned PROffset = MBB->getNumber() * PRKinds;
00125   for (unsigned K = 0; K != PRKinds; ++K)
00126     ProcResourceCycles[PROffset + K] =
00127       PRCycles[K] * SchedModel.getResourceFactor(K);
00128 
00129   return FBI;
00130 }
00131 
00132 ArrayRef<unsigned>
00133 MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
00134   assert(BlockInfo[MBBNum].hasResources() &&
00135          "getResources() must be called before getProcResourceCycles()");
00136   unsigned PRKinds = SchedModel.getNumProcResourceKinds();
00137   assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
00138   return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
00139 }
00140 
00141 
00142 //===----------------------------------------------------------------------===//
00143 //                         Ensemble utility functions
00144 //===----------------------------------------------------------------------===//
00145 
00146 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
00147   : MTM(*ct) {
00148   BlockInfo.resize(MTM.BlockInfo.size());
00149   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
00150   ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
00151   ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
00152 }
00153 
00154 // Virtual destructor serves as an anchor.
00155 MachineTraceMetrics::Ensemble::~Ensemble() {}
00156 
00157 const MachineLoop*
00158 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
00159   return MTM.Loops->getLoopFor(MBB);
00160 }
00161 
00162 // Update resource-related information in the TraceBlockInfo for MBB.
00163 // Only update resources related to the trace above MBB.
00164 void MachineTraceMetrics::Ensemble::
00165 computeDepthResources(const MachineBasicBlock *MBB) {
00166   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
00167   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
00168   unsigned PROffset = MBB->getNumber() * PRKinds;
00169 
00170   // Compute resources from trace above. The top block is simple.
00171   if (!TBI->Pred) {
00172     TBI->InstrDepth = 0;
00173     TBI->Head = MBB->getNumber();
00174     std::fill(ProcResourceDepths.begin() + PROffset,
00175               ProcResourceDepths.begin() + PROffset + PRKinds, 0);
00176     return;
00177   }
00178 
00179   // Compute from the block above. A post-order traversal ensures the
00180   // predecessor is always computed first.
00181   unsigned PredNum = TBI->Pred->getNumber();
00182   TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
00183   assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
00184   const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
00185   TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
00186   TBI->Head = PredTBI->Head;
00187 
00188   // Compute per-resource depths.
00189   ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
00190   ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
00191   for (unsigned K = 0; K != PRKinds; ++K)
00192     ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
00193 }
00194 
00195 // Update resource-related information in the TraceBlockInfo for MBB.
00196 // Only update resources related to the trace below MBB.
00197 void MachineTraceMetrics::Ensemble::
00198 computeHeightResources(const MachineBasicBlock *MBB) {
00199   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
00200   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
00201   unsigned PROffset = MBB->getNumber() * PRKinds;
00202 
00203   // Compute resources for the current block.
00204   TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
00205   ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
00206 
00207   // The trace tail is done.
00208   if (!TBI->Succ) {
00209     TBI->Tail = MBB->getNumber();
00210     std::copy(PRCycles.begin(), PRCycles.end(),
00211               ProcResourceHeights.begin() + PROffset);
00212     return;
00213   }
00214 
00215   // Compute from the block below. A post-order traversal ensures the
00216   // predecessor is always computed first.
00217   unsigned SuccNum = TBI->Succ->getNumber();
00218   TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
00219   assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
00220   TBI->InstrHeight += SuccTBI->InstrHeight;
00221   TBI->Tail = SuccTBI->Tail;
00222 
00223   // Compute per-resource heights.
00224   ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
00225   for (unsigned K = 0; K != PRKinds; ++K)
00226     ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
00227 }
00228 
00229 // Check if depth resources for MBB are valid and return the TBI.
00230 // Return NULL if the resources have been invalidated.
00231 const MachineTraceMetrics::TraceBlockInfo*
00232 MachineTraceMetrics::Ensemble::
00233 getDepthResources(const MachineBasicBlock *MBB) const {
00234   const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
00235   return TBI->hasValidDepth() ? TBI : nullptr;
00236 }
00237 
00238 // Check if height resources for MBB are valid and return the TBI.
00239 // Return NULL if the resources have been invalidated.
00240 const MachineTraceMetrics::TraceBlockInfo*
00241 MachineTraceMetrics::Ensemble::
00242 getHeightResources(const MachineBasicBlock *MBB) const {
00243   const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
00244   return TBI->hasValidHeight() ? TBI : nullptr;
00245 }
00246 
00247 /// Get an array of processor resource depths for MBB. Indexed by processor
00248 /// resource kind, this array contains the scaled processor resources consumed
00249 /// by all blocks preceding MBB in its trace. It does not include instructions
00250 /// in MBB.
00251 ///
00252 /// Compare TraceBlockInfo::InstrDepth.
00253 ArrayRef<unsigned>
00254 MachineTraceMetrics::Ensemble::
00255 getProcResourceDepths(unsigned MBBNum) const {
00256   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
00257   assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
00258   return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
00259 }
00260 
00261 /// Get an array of processor resource heights for MBB. Indexed by processor
00262 /// resource kind, this array contains the scaled processor resources consumed
00263 /// by this block and all blocks following it in its trace.
00264 ///
00265 /// Compare TraceBlockInfo::InstrHeight.
00266 ArrayRef<unsigned>
00267 MachineTraceMetrics::Ensemble::
00268 getProcResourceHeights(unsigned MBBNum) const {
00269   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
00270   assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
00271   return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
00272 }
00273 
00274 //===----------------------------------------------------------------------===//
00275 //                         Trace Selection Strategies
00276 //===----------------------------------------------------------------------===//
00277 //
00278 // A trace selection strategy is implemented as a sub-class of Ensemble. The
00279 // trace through a block B is computed by two DFS traversals of the CFG
00280 // starting from B. One upwards, and one downwards. During the upwards DFS,
00281 // pickTracePred() is called on the post-ordered blocks. During the downwards
00282 // DFS, pickTraceSucc() is called in a post-order.
00283 //
00284 
00285 // We never allow traces that leave loops, but we do allow traces to enter
00286 // nested loops. We also never allow traces to contain back-edges.
00287 //
00288 // This means that a loop header can never appear above the center block of a
00289 // trace, except as the trace head. Below the center block, loop exiting edges
00290 // are banned.
00291 //
00292 // Return true if an edge from the From loop to the To loop is leaving a loop.
00293 // Either of To and From can be null.
00294 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
00295   return From && !From->contains(To);
00296 }
00297 
00298 // MinInstrCountEnsemble - Pick the trace that executes the least number of
00299 // instructions.
00300 namespace {
00301 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
00302   const char *getName() const override { return "MinInstr"; }
00303   const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
00304   const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
00305 
00306 public:
00307   MinInstrCountEnsemble(MachineTraceMetrics *mtm)
00308     : MachineTraceMetrics::Ensemble(mtm) {}
00309 };
00310 }
00311 
00312 // Select the preferred predecessor for MBB.
00313 const MachineBasicBlock*
00314 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
00315   if (MBB->pred_empty())
00316     return nullptr;
00317   const MachineLoop *CurLoop = getLoopFor(MBB);
00318   // Don't leave loops, and never follow back-edges.
00319   if (CurLoop && MBB == CurLoop->getHeader())
00320     return nullptr;
00321   unsigned CurCount = MTM.getResources(MBB)->InstrCount;
00322   const MachineBasicBlock *Best = nullptr;
00323   unsigned BestDepth = 0;
00324   for (MachineBasicBlock::const_pred_iterator
00325        I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
00326     const MachineBasicBlock *Pred = *I;
00327     const MachineTraceMetrics::TraceBlockInfo *PredTBI =
00328       getDepthResources(Pred);
00329     // Ignore cycles that aren't natural loops.
00330     if (!PredTBI)
00331       continue;
00332     // Pick the predecessor that would give this block the smallest InstrDepth.
00333     unsigned Depth = PredTBI->InstrDepth + CurCount;
00334     if (!Best || Depth < BestDepth)
00335       Best = Pred, BestDepth = Depth;
00336   }
00337   return Best;
00338 }
00339 
00340 // Select the preferred successor for MBB.
00341 const MachineBasicBlock*
00342 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
00343   if (MBB->pred_empty())
00344     return nullptr;
00345   const MachineLoop *CurLoop = getLoopFor(MBB);
00346   const MachineBasicBlock *Best = nullptr;
00347   unsigned BestHeight = 0;
00348   for (MachineBasicBlock::const_succ_iterator
00349        I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
00350     const MachineBasicBlock *Succ = *I;
00351     // Don't consider back-edges.
00352     if (CurLoop && Succ == CurLoop->getHeader())
00353       continue;
00354     // Don't consider successors exiting CurLoop.
00355     if (isExitingLoop(CurLoop, getLoopFor(Succ)))
00356       continue;
00357     const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
00358       getHeightResources(Succ);
00359     // Ignore cycles that aren't natural loops.
00360     if (!SuccTBI)
00361       continue;
00362     // Pick the successor that would give this block the smallest InstrHeight.
00363     unsigned Height = SuccTBI->InstrHeight;
00364     if (!Best || Height < BestHeight)
00365       Best = Succ, BestHeight = Height;
00366   }
00367   return Best;
00368 }
00369 
00370 // Get an Ensemble sub-class for the requested trace strategy.
00371 MachineTraceMetrics::Ensemble *
00372 MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
00373   assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
00374   Ensemble *&E = Ensembles[strategy];
00375   if (E)
00376     return E;
00377 
00378   // Allocate new Ensemble on demand.
00379   switch (strategy) {
00380   case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
00381   default: llvm_unreachable("Invalid trace strategy enum");
00382   }
00383 }
00384 
00385 void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
00386   DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
00387   BlockInfo[MBB->getNumber()].invalidate();
00388   for (unsigned i = 0; i != TS_NumStrategies; ++i)
00389     if (Ensembles[i])
00390       Ensembles[i]->invalidate(MBB);
00391 }
00392 
00393 void MachineTraceMetrics::verifyAnalysis() const {
00394   if (!MF)
00395     return;
00396 #ifndef NDEBUG
00397   assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
00398   for (unsigned i = 0; i != TS_NumStrategies; ++i)
00399     if (Ensembles[i])
00400       Ensembles[i]->verify();
00401 #endif
00402 }
00403 
00404 //===----------------------------------------------------------------------===//
00405 //                               Trace building
00406 //===----------------------------------------------------------------------===//
00407 //
00408 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
00409 // set abstraction that confines the search to the current loop, and doesn't
00410 // revisit blocks.
00411 
00412 namespace {
00413 struct LoopBounds {
00414   MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
00415   SmallPtrSet<const MachineBasicBlock*, 8> Visited;
00416   const MachineLoopInfo *Loops;
00417   bool Downward;
00418   LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
00419              const MachineLoopInfo *loops)
00420     : Blocks(blocks), Loops(loops), Downward(false) {}
00421 };
00422 }
00423 
00424 // Specialize po_iterator_storage in order to prune the post-order traversal so
00425 // it is limited to the current loop and doesn't traverse the loop back edges.
00426 namespace llvm {
00427 template<>
00428 class po_iterator_storage<LoopBounds, true> {
00429   LoopBounds &LB;
00430 public:
00431   po_iterator_storage(LoopBounds &lb) : LB(lb) {}
00432   void finishPostorder(const MachineBasicBlock*) {}
00433 
00434   bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
00435     // Skip already visited To blocks.
00436     MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
00437     if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
00438       return false;
00439     // From is null once when To is the trace center block.
00440     if (From) {
00441       if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
00442         // Don't follow backedges, don't leave FromLoop when going upwards.
00443         if ((LB.Downward ? To : From) == FromLoop->getHeader())
00444           return false;
00445         // Don't leave FromLoop.
00446         if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
00447           return false;
00448       }
00449     }
00450     // To is a new block. Mark the block as visited in case the CFG has cycles
00451     // that MachineLoopInfo didn't recognize as a natural loop.
00452     return LB.Visited.insert(To);
00453   }
00454 };
00455 }
00456 
00457 /// Compute the trace through MBB.
00458 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
00459   DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
00460                << MBB->getNumber() << '\n');
00461   // Set up loop bounds for the backwards post-order traversal.
00462   LoopBounds Bounds(BlockInfo, MTM.Loops);
00463 
00464   // Run an upwards post-order search for the trace start.
00465   Bounds.Downward = false;
00466   Bounds.Visited.clear();
00467   typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
00468   for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
00469        I != E; ++I) {
00470     DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
00471     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
00472     // All the predecessors have been visited, pick the preferred one.
00473     TBI.Pred = pickTracePred(*I);
00474     DEBUG({
00475       if (TBI.Pred)
00476         dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
00477       else
00478         dbgs() << "null\n";
00479     });
00480     // The trace leading to I is now known, compute the depth resources.
00481     computeDepthResources(*I);
00482   }
00483 
00484   // Run a downwards post-order search for the trace end.
00485   Bounds.Downward = true;
00486   Bounds.Visited.clear();
00487   typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
00488   for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
00489        I != E; ++I) {
00490     DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
00491     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
00492     // All the successors have been visited, pick the preferred one.
00493     TBI.Succ = pickTraceSucc(*I);
00494     DEBUG({
00495       if (TBI.Succ)
00496         dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
00497       else
00498         dbgs() << "null\n";
00499     });
00500     // The trace leaving I is now known, compute the height resources.
00501     computeHeightResources(*I);
00502   }
00503 }
00504 
00505 /// Invalidate traces through BadMBB.
00506 void
00507 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
00508   SmallVector<const MachineBasicBlock*, 16> WorkList;
00509   TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
00510 
00511   // Invalidate height resources of blocks above MBB.
00512   if (BadTBI.hasValidHeight()) {
00513     BadTBI.invalidateHeight();
00514     WorkList.push_back(BadMBB);
00515     do {
00516       const MachineBasicBlock *MBB = WorkList.pop_back_val();
00517       DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
00518             << " height.\n");
00519       // Find any MBB predecessors that have MBB as their preferred successor.
00520       // They are the only ones that need to be invalidated.
00521       for (MachineBasicBlock::const_pred_iterator
00522            I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
00523         TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
00524         if (!TBI.hasValidHeight())
00525           continue;
00526         if (TBI.Succ == MBB) {
00527           TBI.invalidateHeight();
00528           WorkList.push_back(*I);
00529           continue;
00530         }
00531         // Verify that TBI.Succ is actually a *I successor.
00532         assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
00533       }
00534     } while (!WorkList.empty());
00535   }
00536 
00537   // Invalidate depth resources of blocks below MBB.
00538   if (BadTBI.hasValidDepth()) {
00539     BadTBI.invalidateDepth();
00540     WorkList.push_back(BadMBB);
00541     do {
00542       const MachineBasicBlock *MBB = WorkList.pop_back_val();
00543       DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
00544             << " depth.\n");
00545       // Find any MBB successors that have MBB as their preferred predecessor.
00546       // They are the only ones that need to be invalidated.
00547       for (MachineBasicBlock::const_succ_iterator
00548            I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
00549         TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
00550         if (!TBI.hasValidDepth())
00551           continue;
00552         if (TBI.Pred == MBB) {
00553           TBI.invalidateDepth();
00554           WorkList.push_back(*I);
00555           continue;
00556         }
00557         // Verify that TBI.Pred is actually a *I predecessor.
00558         assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
00559       }
00560     } while (!WorkList.empty());
00561   }
00562 
00563   // Clear any per-instruction data. We only have to do this for BadMBB itself
00564   // because the instructions in that block may change. Other blocks may be
00565   // invalidated, but their instructions will stay the same, so there is no
00566   // need to erase the Cycle entries. They will be overwritten when we
00567   // recompute.
00568   for (const auto &I : *BadMBB)
00569     Cycles.erase(&I);
00570 }
00571 
00572 void MachineTraceMetrics::Ensemble::verify() const {
00573 #ifndef NDEBUG
00574   assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
00575          "Outdated BlockInfo size");
00576   for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
00577     const TraceBlockInfo &TBI = BlockInfo[Num];
00578     if (TBI.hasValidDepth() && TBI.Pred) {
00579       const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
00580       assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
00581       assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
00582              "Trace is broken, depth should have been invalidated.");
00583       const MachineLoop *Loop = getLoopFor(MBB);
00584       assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
00585     }
00586     if (TBI.hasValidHeight() && TBI.Succ) {
00587       const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
00588       assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
00589       assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
00590              "Trace is broken, height should have been invalidated.");
00591       const MachineLoop *Loop = getLoopFor(MBB);
00592       const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
00593       assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
00594              "Trace contains backedge");
00595     }
00596   }
00597 #endif
00598 }
00599 
00600 //===----------------------------------------------------------------------===//
00601 //                             Data Dependencies
00602 //===----------------------------------------------------------------------===//
00603 //
00604 // Compute the depth and height of each instruction based on data dependencies
00605 // and instruction latencies. These cycle numbers assume that the CPU can issue
00606 // an infinite number of instructions per cycle as long as their dependencies
00607 // are ready.
00608 
00609 // A data dependency is represented as a defining MI and operand numbers on the
00610 // defining and using MI.
00611 namespace {
00612 struct DataDep {
00613   const MachineInstr *DefMI;
00614   unsigned DefOp;
00615   unsigned UseOp;
00616 
00617   DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
00618     : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
00619 
00620   /// Create a DataDep from an SSA form virtual register.
00621   DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
00622     : UseOp(UseOp) {
00623     assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
00624     MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
00625     assert(!DefI.atEnd() && "Register has no defs");
00626     DefMI = DefI->getParent();
00627     DefOp = DefI.getOperandNo();
00628     assert((++DefI).atEnd() && "Register has multiple defs");
00629   }
00630 };
00631 }
00632 
00633 // Get the input data dependencies that must be ready before UseMI can issue.
00634 // Return true if UseMI has any physreg operands.
00635 static bool getDataDeps(const MachineInstr *UseMI,
00636                         SmallVectorImpl<DataDep> &Deps,
00637                         const MachineRegisterInfo *MRI) {
00638   bool HasPhysRegs = false;
00639   for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
00640     if (!MO->isReg())
00641       continue;
00642     unsigned Reg = MO->getReg();
00643     if (!Reg)
00644       continue;
00645     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
00646       HasPhysRegs = true;
00647       continue;
00648     }
00649     // Collect virtual register reads.
00650     if (MO->readsReg())
00651       Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
00652   }
00653   return HasPhysRegs;
00654 }
00655 
00656 // Get the input data dependencies of a PHI instruction, using Pred as the
00657 // preferred predecessor.
00658 // This will add at most one dependency to Deps.
00659 static void getPHIDeps(const MachineInstr *UseMI,
00660                        SmallVectorImpl<DataDep> &Deps,
00661                        const MachineBasicBlock *Pred,
00662                        const MachineRegisterInfo *MRI) {
00663   // No predecessor at the beginning of a trace. Ignore dependencies.
00664   if (!Pred)
00665     return;
00666   assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
00667   for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
00668     if (UseMI->getOperand(i + 1).getMBB() == Pred) {
00669       unsigned Reg = UseMI->getOperand(i).getReg();
00670       Deps.push_back(DataDep(MRI, Reg, i));
00671       return;
00672     }
00673   }
00674 }
00675 
00676 // Keep track of physreg data dependencies by recording each live register unit.
00677 // Associate each regunit with an instruction operand. Depending on the
00678 // direction instructions are scanned, it could be the operand that defined the
00679 // regunit, or the highest operand to read the regunit.
00680 namespace {
00681 struct LiveRegUnit {
00682   unsigned RegUnit;
00683   unsigned Cycle;
00684   const MachineInstr *MI;
00685   unsigned Op;
00686 
00687   unsigned getSparseSetIndex() const { return RegUnit; }
00688 
00689   LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
00690 };
00691 }
00692 
00693 // Identify physreg dependencies for UseMI, and update the live regunit
00694 // tracking set when scanning instructions downwards.
00695 static void updatePhysDepsDownwards(const MachineInstr *UseMI,
00696                                     SmallVectorImpl<DataDep> &Deps,
00697                                     SparseSet<LiveRegUnit> &RegUnits,
00698                                     const TargetRegisterInfo *TRI) {
00699   SmallVector<unsigned, 8> Kills;
00700   SmallVector<unsigned, 8> LiveDefOps;
00701 
00702   for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
00703     if (!MO->isReg())
00704       continue;
00705     unsigned Reg = MO->getReg();
00706     if (!TargetRegisterInfo::isPhysicalRegister(Reg))
00707       continue;
00708     // Track live defs and kills for updating RegUnits.
00709     if (MO->isDef()) {
00710       if (MO->isDead())
00711         Kills.push_back(Reg);
00712       else
00713         LiveDefOps.push_back(MO.getOperandNo());
00714     } else if (MO->isKill())
00715       Kills.push_back(Reg);
00716     // Identify dependencies.
00717     if (!MO->readsReg())
00718       continue;
00719     for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
00720       SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
00721       if (I == RegUnits.end())
00722         continue;
00723       Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
00724       break;
00725     }
00726   }
00727 
00728   // Update RegUnits to reflect live registers after UseMI.
00729   // First kills.
00730   for (unsigned i = 0, e = Kills.size(); i != e; ++i)
00731     for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
00732       RegUnits.erase(*Units);
00733 
00734   // Second, live defs.
00735   for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
00736     unsigned DefOp = LiveDefOps[i];
00737     for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
00738          Units.isValid(); ++Units) {
00739       LiveRegUnit &LRU = RegUnits[*Units];
00740       LRU.MI = UseMI;
00741       LRU.Op = DefOp;
00742     }
00743   }
00744 }
00745 
00746 /// The length of the critical path through a trace is the maximum of two path
00747 /// lengths:
00748 ///
00749 /// 1. The maximum height+depth over all instructions in the trace center block.
00750 ///
00751 /// 2. The longest cross-block dependency chain. For small blocks, it is
00752 ///    possible that the critical path through the trace doesn't include any
00753 ///    instructions in the block.
00754 ///
00755 /// This function computes the second number from the live-in list of the
00756 /// center block.
00757 unsigned MachineTraceMetrics::Ensemble::
00758 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
00759   assert(TBI.HasValidInstrDepths && "Missing depth info");
00760   assert(TBI.HasValidInstrHeights && "Missing height info");
00761   unsigned MaxLen = 0;
00762   for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
00763     const LiveInReg &LIR = TBI.LiveIns[i];
00764     if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
00765       continue;
00766     const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
00767     // Ignore dependencies outside the current trace.
00768     const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
00769     if (!DefTBI.isUsefulDominator(TBI))
00770       continue;
00771     unsigned Len = LIR.Height + Cycles[DefMI].Depth;
00772     MaxLen = std::max(MaxLen, Len);
00773   }
00774   return MaxLen;
00775 }
00776 
00777 /// Compute instruction depths for all instructions above or in MBB in its
00778 /// trace. This assumes that the trace through MBB has already been computed.
00779 void MachineTraceMetrics::Ensemble::
00780 computeInstrDepths(const MachineBasicBlock *MBB) {
00781   // The top of the trace may already be computed, and HasValidInstrDepths
00782   // implies Head->HasValidInstrDepths, so we only need to start from the first
00783   // block in the trace that needs to be recomputed.
00784   SmallVector<const MachineBasicBlock*, 8> Stack;
00785   do {
00786     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
00787     assert(TBI.hasValidDepth() && "Incomplete trace");
00788     if (TBI.HasValidInstrDepths)
00789       break;
00790     Stack.push_back(MBB);
00791     MBB = TBI.Pred;
00792   } while (MBB);
00793 
00794   // FIXME: If MBB is non-null at this point, it is the last pre-computed block
00795   // in the trace. We should track any live-out physregs that were defined in
00796   // the trace. This is quite rare in SSA form, typically created by CSE
00797   // hoisting a compare.
00798   SparseSet<LiveRegUnit> RegUnits;
00799   RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
00800 
00801   // Go through trace blocks in top-down order, stopping after the center block.
00802   SmallVector<DataDep, 8> Deps;
00803   while (!Stack.empty()) {
00804     MBB = Stack.pop_back_val();
00805     DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
00806     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
00807     TBI.HasValidInstrDepths = true;
00808     TBI.CriticalPath = 0;
00809 
00810     // Print out resource depths here as well.
00811     DEBUG({
00812       dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
00813       ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
00814       for (unsigned K = 0; K != PRDepths.size(); ++K)
00815         if (PRDepths[K]) {
00816           unsigned Factor = MTM.SchedModel.getResourceFactor(K);
00817           dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
00818                  << MTM.SchedModel.getProcResource(K)->Name << " ("
00819                  << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
00820         }
00821     });
00822 
00823     // Also compute the critical path length through MBB when possible.
00824     if (TBI.HasValidInstrHeights)
00825       TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
00826 
00827     for (const auto &UseMI : *MBB) {
00828       // Collect all data dependencies.
00829       Deps.clear();
00830       if (UseMI.isPHI())
00831         getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
00832       else if (getDataDeps(&UseMI, Deps, MTM.MRI))
00833         updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
00834 
00835       // Filter and process dependencies, computing the earliest issue cycle.
00836       unsigned Cycle = 0;
00837       for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
00838         const DataDep &Dep = Deps[i];
00839         const TraceBlockInfo&DepTBI =
00840           BlockInfo[Dep.DefMI->getParent()->getNumber()];
00841         // Ignore dependencies from outside the current trace.
00842         if (!DepTBI.isUsefulDominator(TBI))
00843           continue;
00844         assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
00845         unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
00846         // Add latency if DefMI is a real instruction. Transients get latency 0.
00847         if (!Dep.DefMI->isTransient())
00848           DepCycle += MTM.SchedModel
00849             .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
00850         Cycle = std::max(Cycle, DepCycle);
00851       }
00852       // Remember the instruction depth.
00853       InstrCycles &MICycles = Cycles[&UseMI];
00854       MICycles.Depth = Cycle;
00855 
00856       if (!TBI.HasValidInstrHeights) {
00857         DEBUG(dbgs() << Cycle << '\t' << UseMI);
00858         continue;
00859       }
00860       // Update critical path length.
00861       TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
00862       DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
00863     }
00864   }
00865 }
00866 
00867 // Identify physreg dependencies for MI when scanning instructions upwards.
00868 // Return the issue height of MI after considering any live regunits.
00869 // Height is the issue height computed from virtual register dependencies alone.
00870 static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
00871                                       SparseSet<LiveRegUnit> &RegUnits,
00872                                       const TargetSchedModel &SchedModel,
00873                                       const TargetInstrInfo *TII,
00874                                       const TargetRegisterInfo *TRI) {
00875   SmallVector<unsigned, 8> ReadOps;
00876   for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
00877     if (!MO->isReg())
00878       continue;
00879     unsigned Reg = MO->getReg();
00880     if (!TargetRegisterInfo::isPhysicalRegister(Reg))
00881       continue;
00882     if (MO->readsReg())
00883       ReadOps.push_back(MO.getOperandNo());
00884     if (!MO->isDef())
00885       continue;
00886     // This is a def of Reg. Remove corresponding entries from RegUnits, and
00887     // update MI Height to consider the physreg dependencies.
00888     for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
00889       SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
00890       if (I == RegUnits.end())
00891         continue;
00892       unsigned DepHeight = I->Cycle;
00893       if (!MI->isTransient()) {
00894         // We may not know the UseMI of this dependency, if it came from the
00895         // live-in list. SchedModel can handle a NULL UseMI.
00896         DepHeight += SchedModel
00897           .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op);
00898       }
00899       Height = std::max(Height, DepHeight);
00900       // This regunit is dead above MI.
00901       RegUnits.erase(I);
00902     }
00903   }
00904 
00905   // Now we know the height of MI. Update any regunits read.
00906   for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
00907     unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
00908     for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
00909       LiveRegUnit &LRU = RegUnits[*Units];
00910       // Set the height to the highest reader of the unit.
00911       if (LRU.Cycle <= Height && LRU.MI != MI) {
00912         LRU.Cycle = Height;
00913         LRU.MI = MI;
00914         LRU.Op = ReadOps[i];
00915       }
00916     }
00917   }
00918 
00919   return Height;
00920 }
00921 
00922 
00923 typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
00924 
00925 // Push the height of DefMI upwards if required to match UseMI.
00926 // Return true if this is the first time DefMI was seen.
00927 static bool pushDepHeight(const DataDep &Dep,
00928                           const MachineInstr *UseMI, unsigned UseHeight,
00929                           MIHeightMap &Heights,
00930                           const TargetSchedModel &SchedModel,
00931                           const TargetInstrInfo *TII) {
00932   // Adjust height by Dep.DefMI latency.
00933   if (!Dep.DefMI->isTransient())
00934     UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
00935                                                   UseMI, Dep.UseOp);
00936 
00937   // Update Heights[DefMI] to be the maximum height seen.
00938   MIHeightMap::iterator I;
00939   bool New;
00940   std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
00941   if (New)
00942     return true;
00943 
00944   // DefMI has been pushed before. Give it the max height.
00945   if (I->second < UseHeight)
00946     I->second = UseHeight;
00947   return false;
00948 }
00949 
00950 /// Assuming that the virtual register defined by DefMI:DefOp was used by
00951 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
00952 /// when reaching the block that contains DefMI.
00953 void MachineTraceMetrics::Ensemble::
00954 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
00955            ArrayRef<const MachineBasicBlock*> Trace) {
00956   assert(!Trace.empty() && "Trace should contain at least one block");
00957   unsigned Reg = DefMI->getOperand(DefOp).getReg();
00958   assert(TargetRegisterInfo::isVirtualRegister(Reg));
00959   const MachineBasicBlock *DefMBB = DefMI->getParent();
00960 
00961   // Reg is live-in to all blocks in Trace that follow DefMBB.
00962   for (unsigned i = Trace.size(); i; --i) {
00963     const MachineBasicBlock *MBB = Trace[i-1];
00964     if (MBB == DefMBB)
00965       return;
00966     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
00967     // Just add the register. The height will be updated later.
00968     TBI.LiveIns.push_back(Reg);
00969   }
00970 }
00971 
00972 /// Compute instruction heights in the trace through MBB. This updates MBB and
00973 /// the blocks below it in the trace. It is assumed that the trace has already
00974 /// been computed.
00975 void MachineTraceMetrics::Ensemble::
00976 computeInstrHeights(const MachineBasicBlock *MBB) {
00977   // The bottom of the trace may already be computed.
00978   // Find the blocks that need updating.
00979   SmallVector<const MachineBasicBlock*, 8> Stack;
00980   do {
00981     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
00982     assert(TBI.hasValidHeight() && "Incomplete trace");
00983     if (TBI.HasValidInstrHeights)
00984       break;
00985     Stack.push_back(MBB);
00986     TBI.LiveIns.clear();
00987     MBB = TBI.Succ;
00988   } while (MBB);
00989 
00990   // As we move upwards in the trace, keep track of instructions that are
00991   // required by deeper trace instructions. Map MI -> height required so far.
00992   MIHeightMap Heights;
00993 
00994   // For physregs, the def isn't known when we see the use.
00995   // Instead, keep track of the highest use of each regunit.
00996   SparseSet<LiveRegUnit> RegUnits;
00997   RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
00998 
00999   // If the bottom of the trace was already precomputed, initialize heights
01000   // from its live-in list.
01001   // MBB is the highest precomputed block in the trace.
01002   if (MBB) {
01003     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
01004     for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
01005       LiveInReg LI = TBI.LiveIns[i];
01006       if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
01007         // For virtual registers, the def latency is included.
01008         unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
01009         if (Height < LI.Height)
01010           Height = LI.Height;
01011       } else {
01012         // For register units, the def latency is not included because we don't
01013         // know the def yet.
01014         RegUnits[LI.Reg].Cycle = LI.Height;
01015       }
01016     }
01017   }
01018 
01019   // Go through the trace blocks in bottom-up order.
01020   SmallVector<DataDep, 8> Deps;
01021   for (;!Stack.empty(); Stack.pop_back()) {
01022     MBB = Stack.back();
01023     DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
01024     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
01025     TBI.HasValidInstrHeights = true;
01026     TBI.CriticalPath = 0;
01027 
01028     DEBUG({
01029       dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
01030       ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
01031       for (unsigned K = 0; K != PRHeights.size(); ++K)
01032         if (PRHeights[K]) {
01033           unsigned Factor = MTM.SchedModel.getResourceFactor(K);
01034           dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
01035                  << MTM.SchedModel.getProcResource(K)->Name << " ("
01036                  << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
01037         }
01038     });
01039 
01040     // Get dependencies from PHIs in the trace successor.
01041     const MachineBasicBlock *Succ = TBI.Succ;
01042     // If MBB is the last block in the trace, and it has a back-edge to the
01043     // loop header, get loop-carried dependencies from PHIs in the header. For
01044     // that purpose, pretend that all the loop header PHIs have height 0.
01045     if (!Succ)
01046       if (const MachineLoop *Loop = getLoopFor(MBB))
01047         if (MBB->isSuccessor(Loop->getHeader()))
01048           Succ = Loop->getHeader();
01049 
01050     if (Succ) {
01051       for (const auto &PHI : *Succ) {
01052         if (!PHI.isPHI())
01053           break;
01054         Deps.clear();
01055         getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
01056         if (!Deps.empty()) {
01057           // Loop header PHI heights are all 0.
01058           unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
01059           DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
01060           if (pushDepHeight(Deps.front(), &PHI, Height,
01061                             Heights, MTM.SchedModel, MTM.TII))
01062             addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
01063         }
01064       }
01065     }
01066 
01067     // Go through the block backwards.
01068     for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
01069          BI != BB;) {
01070       const MachineInstr *MI = --BI;
01071 
01072       // Find the MI height as determined by virtual register uses in the
01073       // trace below.
01074       unsigned Cycle = 0;
01075       MIHeightMap::iterator HeightI = Heights.find(MI);
01076       if (HeightI != Heights.end()) {
01077         Cycle = HeightI->second;
01078         // We won't be seeing any more MI uses.
01079         Heights.erase(HeightI);
01080       }
01081 
01082       // Don't process PHI deps. They depend on the specific predecessor, and
01083       // we'll get them when visiting the predecessor.
01084       Deps.clear();
01085       bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
01086 
01087       // There may also be regunit dependencies to include in the height.
01088       if (HasPhysRegs)
01089         Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
01090                                       MTM.SchedModel, MTM.TII, MTM.TRI);
01091 
01092       // Update the required height of any virtual registers read by MI.
01093       for (unsigned i = 0, e = Deps.size(); i != e; ++i)
01094         if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
01095           addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
01096 
01097       InstrCycles &MICycles = Cycles[MI];
01098       MICycles.Height = Cycle;
01099       if (!TBI.HasValidInstrDepths) {
01100         DEBUG(dbgs() << Cycle << '\t' << *MI);
01101         continue;
01102       }
01103       // Update critical path length.
01104       TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
01105       DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
01106     }
01107 
01108     // Update virtual live-in heights. They were added by addLiveIns() with a 0
01109     // height because the final height isn't known until now.
01110     DEBUG(dbgs() << "BB#" << MBB->getNumber() <<  " Live-ins:");
01111     for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
01112       LiveInReg &LIR = TBI.LiveIns[i];
01113       const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
01114       LIR.Height = Heights.lookup(DefMI);
01115       DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
01116     }
01117 
01118     // Transfer the live regunits to the live-in list.
01119     for (SparseSet<LiveRegUnit>::const_iterator
01120          RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
01121       TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
01122       DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
01123                    << '@' << RI->Cycle);
01124     }
01125     DEBUG(dbgs() << '\n');
01126 
01127     if (!TBI.HasValidInstrDepths)
01128       continue;
01129     // Add live-ins to the critical path length.
01130     TBI.CriticalPath = std::max(TBI.CriticalPath,
01131                                 computeCrossBlockCriticalPath(TBI));
01132     DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
01133   }
01134 }
01135 
01136 MachineTraceMetrics::Trace
01137 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
01138   // FIXME: Check cache tags, recompute as needed.
01139   computeTrace(MBB);
01140   computeInstrDepths(MBB);
01141   computeInstrHeights(MBB);
01142   return Trace(*this, BlockInfo[MBB->getNumber()]);
01143 }
01144 
01145 unsigned
01146 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
01147   assert(MI && "Not an instruction.");
01148   assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
01149          "MI must be in the trace center block");
01150   InstrCycles Cyc = getInstrCycles(MI);
01151   return getCriticalPath() - (Cyc.Depth + Cyc.Height);
01152 }
01153 
01154 unsigned
01155 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
01156   const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
01157   SmallVector<DataDep, 1> Deps;
01158   getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
01159   assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
01160   DataDep &Dep = Deps.front();
01161   unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
01162   // Add latency if DefMI is a real instruction. Transients get latency 0.
01163   if (!Dep.DefMI->isTransient())
01164     DepCycle += TE.MTM.SchedModel
01165       .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
01166   return DepCycle;
01167 }
01168 
01169 /// When bottom is set include instructions in current block in estimate.
01170 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
01171   // Find the limiting processor resource.
01172   // Numbers have been pre-scaled to be comparable.
01173   unsigned PRMax = 0;
01174   ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
01175   if (Bottom) {
01176     ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
01177     for (unsigned K = 0; K != PRDepths.size(); ++K)
01178       PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
01179   } else {
01180     for (unsigned K = 0; K != PRDepths.size(); ++K)
01181       PRMax = std::max(PRMax, PRDepths[K]);
01182   }
01183   // Convert to cycle count.
01184   PRMax = TE.MTM.getCycles(PRMax);
01185 
01186   /// All instructions before current block
01187   unsigned Instrs = TBI.InstrDepth;
01188   // plus instructions in current block
01189   if (Bottom)
01190     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
01191   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
01192     Instrs /= IW;
01193   // Assume issue width 1 without a schedule model.
01194   return std::max(Instrs, PRMax);
01195 }
01196 
01197 unsigned MachineTraceMetrics::Trace::getResourceLength(
01198     ArrayRef<const MachineBasicBlock *> Extrablocks,
01199     ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
01200     ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
01201   // Add up resources above and below the center block.
01202   ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
01203   ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
01204   unsigned PRMax = 0;
01205 
01206   // Capture computing cycles from extra instructions
01207   auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
01208                             unsigned ResourceIdx)
01209                          ->unsigned {
01210     unsigned Cycles = 0;
01211     for (unsigned I = 0; I != Instrs.size(); ++I) {
01212       const MCSchedClassDesc *SC = Instrs[I];
01213       if (!SC->isValid())
01214         continue;
01215       for (TargetSchedModel::ProcResIter
01216                PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
01217                PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
01218            PI != PE; ++PI) {
01219         if (PI->ProcResourceIdx != ResourceIdx)
01220           continue;
01221         Cycles +=
01222             (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
01223       }
01224     }
01225     return Cycles;
01226   };
01227 
01228   for (unsigned K = 0; K != PRDepths.size(); ++K) {
01229     unsigned PRCycles = PRDepths[K] + PRHeights[K];
01230     for (unsigned I = 0; I != Extrablocks.size(); ++I)
01231       PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
01232     PRCycles += extraCycles(ExtraInstrs, K);
01233     PRCycles -= extraCycles(RemoveInstrs, K);
01234     PRMax = std::max(PRMax, PRCycles);
01235   }
01236   // Convert to cycle count.
01237   PRMax = TE.MTM.getCycles(PRMax);
01238 
01239   // Instrs: #instructions in current trace outside current block.
01240   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
01241   // Add instruction count from the extra blocks.
01242   for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
01243     Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
01244   Instrs += ExtraInstrs.size();
01245   Instrs -= RemoveInstrs.size();
01246   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
01247     Instrs /= IW;
01248   // Assume issue width 1 without a schedule model.
01249   return std::max(Instrs, PRMax);
01250 }
01251 
01252 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
01253                                               const MachineInstr *UseMI) const {
01254   if (DefMI->getParent() == UseMI->getParent())
01255     return true;
01256 
01257   const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
01258   const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
01259 
01260   return DepTBI.isUsefulDominator(TBI);
01261 }
01262 
01263 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
01264   OS << getName() << " ensemble:\n";
01265   for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
01266     OS << "  BB#" << i << '\t';
01267     BlockInfo[i].print(OS);
01268     OS << '\n';
01269   }
01270 }
01271 
01272 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
01273   if (hasValidDepth()) {
01274     OS << "depth=" << InstrDepth;
01275     if (Pred)
01276       OS << " pred=BB#" << Pred->getNumber();
01277     else
01278       OS << " pred=null";
01279     OS << " head=BB#" << Head;
01280     if (HasValidInstrDepths)
01281       OS << " +instrs";
01282   } else
01283     OS << "depth invalid";
01284   OS << ", ";
01285   if (hasValidHeight()) {
01286     OS << "height=" << InstrHeight;
01287     if (Succ)
01288       OS << " succ=BB#" << Succ->getNumber();
01289     else
01290       OS << " succ=null";
01291     OS << " tail=BB#" << Tail;
01292     if (HasValidInstrHeights)
01293       OS << " +instrs";
01294   } else
01295     OS << "height invalid";
01296   if (HasValidInstrDepths && HasValidInstrHeights)
01297     OS << ", crit=" << CriticalPath;
01298 }
01299 
01300 void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
01301   unsigned MBBNum = &TBI - &TE.BlockInfo[0];
01302 
01303   OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
01304      << " --> BB#" << TBI.Tail << ':';
01305   if (TBI.hasValidHeight() && TBI.hasValidDepth())
01306     OS << ' ' << getInstrCount() << " instrs.";
01307   if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
01308     OS << ' ' << TBI.CriticalPath << " cycles.";
01309 
01310   const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
01311   OS << "\nBB#" << MBBNum;
01312   while (Block->hasValidDepth() && Block->Pred) {
01313     unsigned Num = Block->Pred->getNumber();
01314     OS << " <- BB#" << Num;
01315     Block = &TE.BlockInfo[Num];
01316   }
01317 
01318   Block = &TBI;
01319   OS << "\n    ";
01320   while (Block->hasValidHeight() && Block->Succ) {
01321     unsigned Num = Block->Succ->getNumber();
01322     OS << " -> BB#" << Num;
01323     Block = &TE.BlockInfo[Num];
01324   }
01325   OS << '\n';
01326 }