LLVM API Documentation

MachineScheduler.cpp
Go to the documentation of this file.
00001 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // MachineScheduler schedules machine instructions after phi elimination. It
00011 // preserves LiveIntervals so it can be invoked before register allocation.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/CodeGen/MachineScheduler.h"
00016 #include "llvm/ADT/PriorityQueue.h"
00017 #include "llvm/Analysis/AliasAnalysis.h"
00018 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
00019 #include "llvm/CodeGen/MachineDominators.h"
00020 #include "llvm/CodeGen/MachineLoopInfo.h"
00021 #include "llvm/CodeGen/MachineRegisterInfo.h"
00022 #include "llvm/CodeGen/Passes.h"
00023 #include "llvm/CodeGen/RegisterClassInfo.h"
00024 #include "llvm/CodeGen/ScheduleDFS.h"
00025 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
00026 #include "llvm/Support/CommandLine.h"
00027 #include "llvm/Support/Debug.h"
00028 #include "llvm/Support/ErrorHandling.h"
00029 #include "llvm/Support/GraphWriter.h"
00030 #include "llvm/Support/raw_ostream.h"
00031 #include "llvm/Target/TargetInstrInfo.h"
00032 #include <queue>
00033 
00034 using namespace llvm;
00035 
00036 #define DEBUG_TYPE "misched"
00037 
00038 namespace llvm {
00039 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
00040                            cl::desc("Force top-down list scheduling"));
00041 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
00042                             cl::desc("Force bottom-up list scheduling"));
00043 cl::opt<bool>
00044 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
00045                        cl::desc("Print critical path length to stdout"));
00046 }
00047 
00048 #ifndef NDEBUG
00049 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
00050   cl::desc("Pop up a window to show MISched dags after they are processed"));
00051 
00052 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
00053   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
00054 
00055 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
00056   cl::desc("Only schedule this function"));
00057 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
00058   cl::desc("Only schedule this MBB#"));
00059 #else
00060 static bool ViewMISchedDAGs = false;
00061 #endif // NDEBUG
00062 
00063 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
00064   cl::desc("Enable register pressure scheduling."), cl::init(true));
00065 
00066 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
00067   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
00068 
00069 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
00070   cl::desc("Enable load clustering."), cl::init(true));
00071 
00072 // Experimental heuristics
00073 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
00074   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
00075 
00076 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
00077   cl::desc("Verify machine instrs before and after machine scheduling"));
00078 
00079 // DAG subtrees must have at least this many nodes.
00080 static const unsigned MinSubtreeSize = 8;
00081 
00082 // Pin the vtables to this file.
00083 void MachineSchedStrategy::anchor() {}
00084 void ScheduleDAGMutation::anchor() {}
00085 
00086 //===----------------------------------------------------------------------===//
00087 // Machine Instruction Scheduling Pass and Registry
00088 //===----------------------------------------------------------------------===//
00089 
00090 MachineSchedContext::MachineSchedContext():
00091     MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
00092   RegClassInfo = new RegisterClassInfo();
00093 }
00094 
00095 MachineSchedContext::~MachineSchedContext() {
00096   delete RegClassInfo;
00097 }
00098 
00099 namespace {
00100 /// Base class for a machine scheduler class that can run at any point.
00101 class MachineSchedulerBase : public MachineSchedContext,
00102                              public MachineFunctionPass {
00103 public:
00104   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
00105 
00106   void print(raw_ostream &O, const Module* = nullptr) const override;
00107 
00108 protected:
00109   void scheduleRegions(ScheduleDAGInstrs &Scheduler);
00110 };
00111 
00112 /// MachineScheduler runs after coalescing and before register allocation.
00113 class MachineScheduler : public MachineSchedulerBase {
00114 public:
00115   MachineScheduler();
00116 
00117   void getAnalysisUsage(AnalysisUsage &AU) const override;
00118 
00119   bool runOnMachineFunction(MachineFunction&) override;
00120 
00121   static char ID; // Class identification, replacement for typeinfo
00122 
00123 protected:
00124   ScheduleDAGInstrs *createMachineScheduler();
00125 };
00126 
00127 /// PostMachineScheduler runs after shortly before code emission.
00128 class PostMachineScheduler : public MachineSchedulerBase {
00129 public:
00130   PostMachineScheduler();
00131 
00132   void getAnalysisUsage(AnalysisUsage &AU) const override;
00133 
00134   bool runOnMachineFunction(MachineFunction&) override;
00135 
00136   static char ID; // Class identification, replacement for typeinfo
00137 
00138 protected:
00139   ScheduleDAGInstrs *createPostMachineScheduler();
00140 };
00141 } // namespace
00142 
00143 char MachineScheduler::ID = 0;
00144 
00145 char &llvm::MachineSchedulerID = MachineScheduler::ID;
00146 
00147 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
00148                       "Machine Instruction Scheduler", false, false)
00149 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
00150 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
00151 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
00152 INITIALIZE_PASS_END(MachineScheduler, "misched",
00153                     "Machine Instruction Scheduler", false, false)
00154 
00155 MachineScheduler::MachineScheduler()
00156 : MachineSchedulerBase(ID) {
00157   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
00158 }
00159 
00160 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
00161   AU.setPreservesCFG();
00162   AU.addRequiredID(MachineDominatorsID);
00163   AU.addRequired<MachineLoopInfo>();
00164   AU.addRequired<AliasAnalysis>();
00165   AU.addRequired<TargetPassConfig>();
00166   AU.addRequired<SlotIndexes>();
00167   AU.addPreserved<SlotIndexes>();
00168   AU.addRequired<LiveIntervals>();
00169   AU.addPreserved<LiveIntervals>();
00170   MachineFunctionPass::getAnalysisUsage(AU);
00171 }
00172 
00173 char PostMachineScheduler::ID = 0;
00174 
00175 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
00176 
00177 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
00178                 "PostRA Machine Instruction Scheduler", false, false)
00179 
00180 PostMachineScheduler::PostMachineScheduler()
00181 : MachineSchedulerBase(ID) {
00182   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
00183 }
00184 
00185 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
00186   AU.setPreservesCFG();
00187   AU.addRequiredID(MachineDominatorsID);
00188   AU.addRequired<MachineLoopInfo>();
00189   AU.addRequired<TargetPassConfig>();
00190   MachineFunctionPass::getAnalysisUsage(AU);
00191 }
00192 
00193 MachinePassRegistry MachineSchedRegistry::Registry;
00194 
00195 /// A dummy default scheduler factory indicates whether the scheduler
00196 /// is overridden on the command line.
00197 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
00198   return nullptr;
00199 }
00200 
00201 /// MachineSchedOpt allows command line selection of the scheduler.
00202 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
00203                RegisterPassParser<MachineSchedRegistry> >
00204 MachineSchedOpt("misched",
00205                 cl::init(&useDefaultMachineSched), cl::Hidden,
00206                 cl::desc("Machine instruction scheduler to use"));
00207 
00208 static MachineSchedRegistry
00209 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
00210                      useDefaultMachineSched);
00211 
00212 /// Forward declare the standard machine scheduler. This will be used as the
00213 /// default scheduler if the target does not set a default.
00214 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
00215 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
00216 
00217 /// Decrement this iterator until reaching the top or a non-debug instr.
00218 static MachineBasicBlock::const_iterator
00219 priorNonDebug(MachineBasicBlock::const_iterator I,
00220               MachineBasicBlock::const_iterator Beg) {
00221   assert(I != Beg && "reached the top of the region, cannot decrement");
00222   while (--I != Beg) {
00223     if (!I->isDebugValue())
00224       break;
00225   }
00226   return I;
00227 }
00228 
00229 /// Non-const version.
00230 static MachineBasicBlock::iterator
00231 priorNonDebug(MachineBasicBlock::iterator I,
00232               MachineBasicBlock::const_iterator Beg) {
00233   return const_cast<MachineInstr*>(
00234     &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
00235 }
00236 
00237 /// If this iterator is a debug value, increment until reaching the End or a
00238 /// non-debug instruction.
00239 static MachineBasicBlock::const_iterator
00240 nextIfDebug(MachineBasicBlock::const_iterator I,
00241             MachineBasicBlock::const_iterator End) {
00242   for(; I != End; ++I) {
00243     if (!I->isDebugValue())
00244       break;
00245   }
00246   return I;
00247 }
00248 
00249 /// Non-const version.
00250 static MachineBasicBlock::iterator
00251 nextIfDebug(MachineBasicBlock::iterator I,
00252             MachineBasicBlock::const_iterator End) {
00253   // Cast the return value to nonconst MachineInstr, then cast to an
00254   // instr_iterator, which does not check for null, finally return a
00255   // bundle_iterator.
00256   return MachineBasicBlock::instr_iterator(
00257     const_cast<MachineInstr*>(
00258       &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
00259 }
00260 
00261 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
00262 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
00263   // Select the scheduler, or set the default.
00264   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
00265   if (Ctor != useDefaultMachineSched)
00266     return Ctor(this);
00267 
00268   // Get the default scheduler set by the target for this function.
00269   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
00270   if (Scheduler)
00271     return Scheduler;
00272 
00273   // Default to GenericScheduler.
00274   return createGenericSchedLive(this);
00275 }
00276 
00277 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
00278 /// the caller. We don't have a command line option to override the postRA
00279 /// scheduler. The Target must configure it.
00280 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
00281   // Get the postRA scheduler set by the target for this function.
00282   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
00283   if (Scheduler)
00284     return Scheduler;
00285 
00286   // Default to GenericScheduler.
00287   return createGenericSchedPostRA(this);
00288 }
00289 
00290 /// Top-level MachineScheduler pass driver.
00291 ///
00292 /// Visit blocks in function order. Divide each block into scheduling regions
00293 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
00294 /// consistent with the DAG builder, which traverses the interior of the
00295 /// scheduling regions bottom-up.
00296 ///
00297 /// This design avoids exposing scheduling boundaries to the DAG builder,
00298 /// simplifying the DAG builder's support for "special" target instructions.
00299 /// At the same time the design allows target schedulers to operate across
00300 /// scheduling boundaries, for example to bundle the boudary instructions
00301 /// without reordering them. This creates complexity, because the target
00302 /// scheduler must update the RegionBegin and RegionEnd positions cached by
00303 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
00304 /// design would be to split blocks at scheduling boundaries, but LLVM has a
00305 /// general bias against block splitting purely for implementation simplicity.
00306 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
00307   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
00308 
00309   // Initialize the context of the pass.
00310   MF = &mf;
00311   MLI = &getAnalysis<MachineLoopInfo>();
00312   MDT = &getAnalysis<MachineDominatorTree>();
00313   PassConfig = &getAnalysis<TargetPassConfig>();
00314   AA = &getAnalysis<AliasAnalysis>();
00315 
00316   LIS = &getAnalysis<LiveIntervals>();
00317 
00318   if (VerifyScheduling) {
00319     DEBUG(LIS->dump());
00320     MF->verify(this, "Before machine scheduling.");
00321   }
00322   RegClassInfo->runOnMachineFunction(*MF);
00323 
00324   // Instantiate the selected scheduler for this target, function, and
00325   // optimization level.
00326   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
00327   scheduleRegions(*Scheduler);
00328 
00329   DEBUG(LIS->dump());
00330   if (VerifyScheduling)
00331     MF->verify(this, "After machine scheduling.");
00332   return true;
00333 }
00334 
00335 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
00336   if (skipOptnoneFunction(*mf.getFunction()))
00337     return false;
00338 
00339   const TargetSubtargetInfo &ST =
00340     mf.getTarget().getSubtarget<TargetSubtargetInfo>();
00341   if (!ST.enablePostMachineScheduler()) {
00342     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
00343     return false;
00344   }
00345   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
00346 
00347   // Initialize the context of the pass.
00348   MF = &mf;
00349   PassConfig = &getAnalysis<TargetPassConfig>();
00350 
00351   if (VerifyScheduling)
00352     MF->verify(this, "Before post machine scheduling.");
00353 
00354   // Instantiate the selected scheduler for this target, function, and
00355   // optimization level.
00356   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
00357   scheduleRegions(*Scheduler);
00358 
00359   if (VerifyScheduling)
00360     MF->verify(this, "After post machine scheduling.");
00361   return true;
00362 }
00363 
00364 /// Return true of the given instruction should not be included in a scheduling
00365 /// region.
00366 ///
00367 /// MachineScheduler does not currently support scheduling across calls. To
00368 /// handle calls, the DAG builder needs to be modified to create register
00369 /// anti/output dependencies on the registers clobbered by the call's regmask
00370 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
00371 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
00372 /// the boundary, but there would be no benefit to postRA scheduling across
00373 /// calls this late anyway.
00374 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
00375                             MachineBasicBlock *MBB,
00376                             MachineFunction *MF,
00377                             const TargetInstrInfo *TII,
00378                             bool IsPostRA) {
00379   return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
00380 }
00381 
00382 /// Main driver for both MachineScheduler and PostMachineScheduler.
00383 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
00384   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
00385   bool IsPostRA = Scheduler.isPostRA();
00386 
00387   // Visit all machine basic blocks.
00388   //
00389   // TODO: Visit blocks in global postorder or postorder within the bottom-up
00390   // loop tree. Then we can optionally compute global RegPressure.
00391   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
00392        MBB != MBBEnd; ++MBB) {
00393 
00394     Scheduler.startBlock(MBB);
00395 
00396 #ifndef NDEBUG
00397     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
00398       continue;
00399     if (SchedOnlyBlock.getNumOccurrences()
00400         && (int)SchedOnlyBlock != MBB->getNumber())
00401       continue;
00402 #endif
00403 
00404     // Break the block into scheduling regions [I, RegionEnd), and schedule each
00405     // region as soon as it is discovered. RegionEnd points the scheduling
00406     // boundary at the bottom of the region. The DAG does not include RegionEnd,
00407     // but the region does (i.e. the next RegionEnd is above the previous
00408     // RegionBegin). If the current block has no terminator then RegionEnd ==
00409     // MBB->end() for the bottom region.
00410     //
00411     // The Scheduler may insert instructions during either schedule() or
00412     // exitRegion(), even for empty regions. So the local iterators 'I' and
00413     // 'RegionEnd' are invalid across these calls.
00414     //
00415     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
00416     // as a single instruction.
00417     unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
00418     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
00419         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
00420 
00421       // Avoid decrementing RegionEnd for blocks with no terminator.
00422       if (RegionEnd != MBB->end() ||
00423           isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
00424         --RegionEnd;
00425         // Count the boundary instruction.
00426         --RemainingInstrs;
00427       }
00428 
00429       // The next region starts above the previous region. Look backward in the
00430       // instruction stream until we find the nearest boundary.
00431       unsigned NumRegionInstrs = 0;
00432       MachineBasicBlock::iterator I = RegionEnd;
00433       for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
00434         if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
00435           break;
00436       }
00437       // Notify the scheduler of the region, even if we may skip scheduling
00438       // it. Perhaps it still needs to be bundled.
00439       Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
00440 
00441       // Skip empty scheduling regions (0 or 1 schedulable instructions).
00442       if (I == RegionEnd || I == std::prev(RegionEnd)) {
00443         // Close the current region. Bundle the terminator if needed.
00444         // This invalidates 'RegionEnd' and 'I'.
00445         Scheduler.exitRegion();
00446         continue;
00447       }
00448       DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
00449             << "MI Scheduling **********\n");
00450       DEBUG(dbgs() << MF->getName()
00451             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
00452             << "\n  From: " << *I << "    To: ";
00453             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
00454             else dbgs() << "End";
00455             dbgs() << " RegionInstrs: " << NumRegionInstrs
00456             << " Remaining: " << RemainingInstrs << "\n");
00457       if (DumpCriticalPathLength) {
00458         errs() << MF->getName();
00459         errs() << ":BB# " << MBB->getNumber();
00460         errs() << " " << MBB->getName() << " \n";
00461       }
00462 
00463       // Schedule a region: possibly reorder instructions.
00464       // This invalidates 'RegionEnd' and 'I'.
00465       Scheduler.schedule();
00466 
00467       // Close the current region.
00468       Scheduler.exitRegion();
00469 
00470       // Scheduling has invalidated the current iterator 'I'. Ask the
00471       // scheduler for the top of it's scheduled region.
00472       RegionEnd = Scheduler.begin();
00473     }
00474     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
00475     Scheduler.finishBlock();
00476     if (Scheduler.isPostRA()) {
00477       // FIXME: Ideally, no further passes should rely on kill flags. However,
00478       // thumb2 size reduction is currently an exception.
00479       Scheduler.fixupKills(MBB);
00480     }
00481   }
00482   Scheduler.finalizeSchedule();
00483 }
00484 
00485 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
00486   // unimplemented
00487 }
00488 
00489 LLVM_DUMP_METHOD
00490 void ReadyQueue::dump() {
00491   dbgs() << Name << ": ";
00492   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
00493     dbgs() << Queue[i]->NodeNum << " ";
00494   dbgs() << "\n";
00495 }
00496 
00497 //===----------------------------------------------------------------------===//
00498 // ScheduleDAGMI - Basic machine instruction scheduling. This is
00499 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
00500 // virtual registers.
00501 // ===----------------------------------------------------------------------===/
00502 
00503 // Provide a vtable anchor.
00504 ScheduleDAGMI::~ScheduleDAGMI() {
00505 }
00506 
00507 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
00508   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
00509 }
00510 
00511 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
00512   if (SuccSU != &ExitSU) {
00513     // Do not use WillCreateCycle, it assumes SD scheduling.
00514     // If Pred is reachable from Succ, then the edge creates a cycle.
00515     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
00516       return false;
00517     Topo.AddPred(SuccSU, PredDep.getSUnit());
00518   }
00519   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
00520   // Return true regardless of whether a new edge needed to be inserted.
00521   return true;
00522 }
00523 
00524 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
00525 /// NumPredsLeft reaches zero, release the successor node.
00526 ///
00527 /// FIXME: Adjust SuccSU height based on MinLatency.
00528 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
00529   SUnit *SuccSU = SuccEdge->getSUnit();
00530 
00531   if (SuccEdge->isWeak()) {
00532     --SuccSU->WeakPredsLeft;
00533     if (SuccEdge->isCluster())
00534       NextClusterSucc = SuccSU;
00535     return;
00536   }
00537 #ifndef NDEBUG
00538   if (SuccSU->NumPredsLeft == 0) {
00539     dbgs() << "*** Scheduling failed! ***\n";
00540     SuccSU->dump(this);
00541     dbgs() << " has been released too many times!\n";
00542     llvm_unreachable(nullptr);
00543   }
00544 #endif
00545   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
00546   // CurrCycle may have advanced since then.
00547   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
00548     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
00549 
00550   --SuccSU->NumPredsLeft;
00551   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
00552     SchedImpl->releaseTopNode(SuccSU);
00553 }
00554 
00555 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
00556 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
00557   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
00558        I != E; ++I) {
00559     releaseSucc(SU, &*I);
00560   }
00561 }
00562 
00563 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
00564 /// NumSuccsLeft reaches zero, release the predecessor node.
00565 ///
00566 /// FIXME: Adjust PredSU height based on MinLatency.
00567 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
00568   SUnit *PredSU = PredEdge->getSUnit();
00569 
00570   if (PredEdge->isWeak()) {
00571     --PredSU->WeakSuccsLeft;
00572     if (PredEdge->isCluster())
00573       NextClusterPred = PredSU;
00574     return;
00575   }
00576 #ifndef NDEBUG
00577   if (PredSU->NumSuccsLeft == 0) {
00578     dbgs() << "*** Scheduling failed! ***\n";
00579     PredSU->dump(this);
00580     dbgs() << " has been released too many times!\n";
00581     llvm_unreachable(nullptr);
00582   }
00583 #endif
00584   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
00585   // CurrCycle may have advanced since then.
00586   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
00587     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
00588 
00589   --PredSU->NumSuccsLeft;
00590   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
00591     SchedImpl->releaseBottomNode(PredSU);
00592 }
00593 
00594 /// releasePredecessors - Call releasePred on each of SU's predecessors.
00595 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
00596   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
00597        I != E; ++I) {
00598     releasePred(SU, &*I);
00599   }
00600 }
00601 
00602 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
00603 /// crossing a scheduling boundary. [begin, end) includes all instructions in
00604 /// the region, including the boundary itself and single-instruction regions
00605 /// that don't get scheduled.
00606 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
00607                                      MachineBasicBlock::iterator begin,
00608                                      MachineBasicBlock::iterator end,
00609                                      unsigned regioninstrs)
00610 {
00611   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
00612 
00613   SchedImpl->initPolicy(begin, end, regioninstrs);
00614 }
00615 
00616 /// This is normally called from the main scheduler loop but may also be invoked
00617 /// by the scheduling strategy to perform additional code motion.
00618 void ScheduleDAGMI::moveInstruction(
00619   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
00620   // Advance RegionBegin if the first instruction moves down.
00621   if (&*RegionBegin == MI)
00622     ++RegionBegin;
00623 
00624   // Update the instruction stream.
00625   BB->splice(InsertPos, BB, MI);
00626 
00627   // Update LiveIntervals
00628   if (LIS)
00629     LIS->handleMove(MI, /*UpdateFlags=*/true);
00630 
00631   // Recede RegionBegin if an instruction moves above the first.
00632   if (RegionBegin == InsertPos)
00633     RegionBegin = MI;
00634 }
00635 
00636 bool ScheduleDAGMI::checkSchedLimit() {
00637 #ifndef NDEBUG
00638   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
00639     CurrentTop = CurrentBottom;
00640     return false;
00641   }
00642   ++NumInstrsScheduled;
00643 #endif
00644   return true;
00645 }
00646 
00647 /// Per-region scheduling driver, called back from
00648 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
00649 /// does not consider liveness or register pressure. It is useful for PostRA
00650 /// scheduling and potentially other custom schedulers.
00651 void ScheduleDAGMI::schedule() {
00652   // Build the DAG.
00653   buildSchedGraph(AA);
00654 
00655   Topo.InitDAGTopologicalSorting();
00656 
00657   postprocessDAG();
00658 
00659   SmallVector<SUnit*, 8> TopRoots, BotRoots;
00660   findRootsAndBiasEdges(TopRoots, BotRoots);
00661 
00662   // Initialize the strategy before modifying the DAG.
00663   // This may initialize a DFSResult to be used for queue priority.
00664   SchedImpl->initialize(this);
00665 
00666   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
00667           SUnits[su].dumpAll(this));
00668   if (ViewMISchedDAGs) viewGraph();
00669 
00670   // Initialize ready queues now that the DAG and priority data are finalized.
00671   initQueues(TopRoots, BotRoots);
00672 
00673   bool IsTopNode = false;
00674   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
00675     assert(!SU->isScheduled && "Node already scheduled");
00676     if (!checkSchedLimit())
00677       break;
00678 
00679     MachineInstr *MI = SU->getInstr();
00680     if (IsTopNode) {
00681       assert(SU->isTopReady() && "node still has unscheduled dependencies");
00682       if (&*CurrentTop == MI)
00683         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
00684       else
00685         moveInstruction(MI, CurrentTop);
00686     }
00687     else {
00688       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
00689       MachineBasicBlock::iterator priorII =
00690         priorNonDebug(CurrentBottom, CurrentTop);
00691       if (&*priorII == MI)
00692         CurrentBottom = priorII;
00693       else {
00694         if (&*CurrentTop == MI)
00695           CurrentTop = nextIfDebug(++CurrentTop, priorII);
00696         moveInstruction(MI, CurrentBottom);
00697         CurrentBottom = MI;
00698       }
00699     }
00700     // Notify the scheduling strategy before updating the DAG.
00701     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
00702     // runs, it can then use the accurate ReadyCycle time to determine whether
00703     // newly released nodes can move to the readyQ.
00704     SchedImpl->schedNode(SU, IsTopNode);
00705 
00706     updateQueues(SU, IsTopNode);
00707   }
00708   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
00709 
00710   placeDebugValues();
00711 
00712   DEBUG({
00713       unsigned BBNum = begin()->getParent()->getNumber();
00714       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
00715       dumpSchedule();
00716       dbgs() << '\n';
00717     });
00718 }
00719 
00720 /// Apply each ScheduleDAGMutation step in order.
00721 void ScheduleDAGMI::postprocessDAG() {
00722   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
00723     Mutations[i]->apply(this);
00724   }
00725 }
00726 
00727 void ScheduleDAGMI::
00728 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
00729                       SmallVectorImpl<SUnit*> &BotRoots) {
00730   for (std::vector<SUnit>::iterator
00731          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
00732     SUnit *SU = &(*I);
00733     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
00734 
00735     // Order predecessors so DFSResult follows the critical path.
00736     SU->biasCriticalPath();
00737 
00738     // A SUnit is ready to top schedule if it has no predecessors.
00739     if (!I->NumPredsLeft)
00740       TopRoots.push_back(SU);
00741     // A SUnit is ready to bottom schedule if it has no successors.
00742     if (!I->NumSuccsLeft)
00743       BotRoots.push_back(SU);
00744   }
00745   ExitSU.biasCriticalPath();
00746 }
00747 
00748 /// Identify DAG roots and setup scheduler queues.
00749 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
00750                                ArrayRef<SUnit*> BotRoots) {
00751   NextClusterSucc = nullptr;
00752   NextClusterPred = nullptr;
00753 
00754   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
00755   //
00756   // Nodes with unreleased weak edges can still be roots.
00757   // Release top roots in forward order.
00758   for (SmallVectorImpl<SUnit*>::const_iterator
00759          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
00760     SchedImpl->releaseTopNode(*I);
00761   }
00762   // Release bottom roots in reverse order so the higher priority nodes appear
00763   // first. This is more natural and slightly more efficient.
00764   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
00765          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
00766     SchedImpl->releaseBottomNode(*I);
00767   }
00768 
00769   releaseSuccessors(&EntrySU);
00770   releasePredecessors(&ExitSU);
00771 
00772   SchedImpl->registerRoots();
00773 
00774   // Advance past initial DebugValues.
00775   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
00776   CurrentBottom = RegionEnd;
00777 }
00778 
00779 /// Update scheduler queues after scheduling an instruction.
00780 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
00781   // Release dependent instructions for scheduling.
00782   if (IsTopNode)
00783     releaseSuccessors(SU);
00784   else
00785     releasePredecessors(SU);
00786 
00787   SU->isScheduled = true;
00788 }
00789 
00790 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
00791 void ScheduleDAGMI::placeDebugValues() {
00792   // If first instruction was a DBG_VALUE then put it back.
00793   if (FirstDbgValue) {
00794     BB->splice(RegionBegin, BB, FirstDbgValue);
00795     RegionBegin = FirstDbgValue;
00796   }
00797 
00798   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
00799          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
00800     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
00801     MachineInstr *DbgValue = P.first;
00802     MachineBasicBlock::iterator OrigPrevMI = P.second;
00803     if (&*RegionBegin == DbgValue)
00804       ++RegionBegin;
00805     BB->splice(++OrigPrevMI, BB, DbgValue);
00806     if (OrigPrevMI == std::prev(RegionEnd))
00807       RegionEnd = DbgValue;
00808   }
00809   DbgValues.clear();
00810   FirstDbgValue = nullptr;
00811 }
00812 
00813 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
00814 void ScheduleDAGMI::dumpSchedule() const {
00815   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
00816     if (SUnit *SU = getSUnit(&(*MI)))
00817       SU->dump(this);
00818     else
00819       dbgs() << "Missing SUnit\n";
00820   }
00821 }
00822 #endif
00823 
00824 //===----------------------------------------------------------------------===//
00825 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
00826 // preservation.
00827 //===----------------------------------------------------------------------===//
00828 
00829 ScheduleDAGMILive::~ScheduleDAGMILive() {
00830   delete DFSResult;
00831 }
00832 
00833 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
00834 /// crossing a scheduling boundary. [begin, end) includes all instructions in
00835 /// the region, including the boundary itself and single-instruction regions
00836 /// that don't get scheduled.
00837 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
00838                                 MachineBasicBlock::iterator begin,
00839                                 MachineBasicBlock::iterator end,
00840                                 unsigned regioninstrs)
00841 {
00842   // ScheduleDAGMI initializes SchedImpl's per-region policy.
00843   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
00844 
00845   // For convenience remember the end of the liveness region.
00846   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
00847 
00848   SUPressureDiffs.clear();
00849 
00850   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
00851 }
00852 
00853 // Setup the register pressure trackers for the top scheduled top and bottom
00854 // scheduled regions.
00855 void ScheduleDAGMILive::initRegPressure() {
00856   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
00857   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
00858 
00859   // Close the RPTracker to finalize live ins.
00860   RPTracker.closeRegion();
00861 
00862   DEBUG(RPTracker.dump());
00863 
00864   // Initialize the live ins and live outs.
00865   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
00866   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
00867 
00868   // Close one end of the tracker so we can call
00869   // getMaxUpward/DownwardPressureDelta before advancing across any
00870   // instructions. This converts currently live regs into live ins/outs.
00871   TopRPTracker.closeTop();
00872   BotRPTracker.closeBottom();
00873 
00874   BotRPTracker.initLiveThru(RPTracker);
00875   if (!BotRPTracker.getLiveThru().empty()) {
00876     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
00877     DEBUG(dbgs() << "Live Thru: ";
00878           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
00879   };
00880 
00881   // For each live out vreg reduce the pressure change associated with other
00882   // uses of the same vreg below the live-out reaching def.
00883   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
00884 
00885   // Account for liveness generated by the region boundary.
00886   if (LiveRegionEnd != RegionEnd) {
00887     SmallVector<unsigned, 8> LiveUses;
00888     BotRPTracker.recede(&LiveUses);
00889     updatePressureDiffs(LiveUses);
00890   }
00891 
00892   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
00893 
00894   // Cache the list of excess pressure sets in this region. This will also track
00895   // the max pressure in the scheduled code for these sets.
00896   RegionCriticalPSets.clear();
00897   const std::vector<unsigned> &RegionPressure =
00898     RPTracker.getPressure().MaxSetPressure;
00899   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
00900     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
00901     if (RegionPressure[i] > Limit) {
00902       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
00903             << " Limit " << Limit
00904             << " Actual " << RegionPressure[i] << "\n");
00905       RegionCriticalPSets.push_back(PressureChange(i));
00906     }
00907   }
00908   DEBUG(dbgs() << "Excess PSets: ";
00909         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
00910           dbgs() << TRI->getRegPressureSetName(
00911             RegionCriticalPSets[i].getPSet()) << " ";
00912         dbgs() << "\n");
00913 }
00914 
00915 void ScheduleDAGMILive::
00916 updateScheduledPressure(const SUnit *SU,
00917                         const std::vector<unsigned> &NewMaxPressure) {
00918   const PressureDiff &PDiff = getPressureDiff(SU);
00919   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
00920   for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
00921        I != E; ++I) {
00922     if (!I->isValid())
00923       break;
00924     unsigned ID = I->getPSet();
00925     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
00926       ++CritIdx;
00927     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
00928       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
00929           && NewMaxPressure[ID] <= INT16_MAX)
00930         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
00931     }
00932     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
00933     if (NewMaxPressure[ID] >= Limit - 2) {
00934       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
00935             << NewMaxPressure[ID] << " > " << Limit << "(+ "
00936             << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
00937     }
00938   }
00939 }
00940 
00941 /// Update the PressureDiff array for liveness after scheduling this
00942 /// instruction.
00943 void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
00944   for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
00945     /// FIXME: Currently assuming single-use physregs.
00946     unsigned Reg = LiveUses[LUIdx];
00947     DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
00948     if (!TRI->isVirtualRegister(Reg))
00949       continue;
00950 
00951     // This may be called before CurrentBottom has been initialized. However,
00952     // BotRPTracker must have a valid position. We want the value live into the
00953     // instruction or live out of the block, so ask for the previous
00954     // instruction's live-out.
00955     const LiveInterval &LI = LIS->getInterval(Reg);
00956     VNInfo *VNI;
00957     MachineBasicBlock::const_iterator I =
00958       nextIfDebug(BotRPTracker.getPos(), BB->end());
00959     if (I == BB->end())
00960       VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
00961     else {
00962       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
00963       VNI = LRQ.valueIn();
00964     }
00965     // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
00966     assert(VNI && "No live value at use.");
00967     for (VReg2UseMap::iterator
00968            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
00969       SUnit *SU = UI->SU;
00970       DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
00971             << *SU->getInstr());
00972       // If this use comes before the reaching def, it cannot be a last use, so
00973       // descrease its pressure change.
00974       if (!SU->isScheduled && SU != &ExitSU) {
00975         LiveQueryResult LRQ
00976           = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
00977         if (LRQ.valueIn() == VNI)
00978           getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
00979       }
00980     }
00981   }
00982 }
00983 
00984 /// schedule - Called back from MachineScheduler::runOnMachineFunction
00985 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
00986 /// only includes instructions that have DAG nodes, not scheduling boundaries.
00987 ///
00988 /// This is a skeletal driver, with all the functionality pushed into helpers,
00989 /// so that it can be easilly extended by experimental schedulers. Generally,
00990 /// implementing MachineSchedStrategy should be sufficient to implement a new
00991 /// scheduling algorithm. However, if a scheduler further subclasses
00992 /// ScheduleDAGMILive then it will want to override this virtual method in order
00993 /// to update any specialized state.
00994 void ScheduleDAGMILive::schedule() {
00995   buildDAGWithRegPressure();
00996 
00997   Topo.InitDAGTopologicalSorting();
00998 
00999   postprocessDAG();
01000 
01001   SmallVector<SUnit*, 8> TopRoots, BotRoots;
01002   findRootsAndBiasEdges(TopRoots, BotRoots);
01003 
01004   // Initialize the strategy before modifying the DAG.
01005   // This may initialize a DFSResult to be used for queue priority.
01006   SchedImpl->initialize(this);
01007 
01008   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
01009           SUnits[su].dumpAll(this));
01010   if (ViewMISchedDAGs) viewGraph();
01011 
01012   // Initialize ready queues now that the DAG and priority data are finalized.
01013   initQueues(TopRoots, BotRoots);
01014 
01015   if (ShouldTrackPressure) {
01016     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
01017     TopRPTracker.setPos(CurrentTop);
01018   }
01019 
01020   bool IsTopNode = false;
01021   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
01022     assert(!SU->isScheduled && "Node already scheduled");
01023     if (!checkSchedLimit())
01024       break;
01025 
01026     scheduleMI(SU, IsTopNode);
01027 
01028     updateQueues(SU, IsTopNode);
01029 
01030     if (DFSResult) {
01031       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
01032       if (!ScheduledTrees.test(SubtreeID)) {
01033         ScheduledTrees.set(SubtreeID);
01034         DFSResult->scheduleTree(SubtreeID);
01035         SchedImpl->scheduleTree(SubtreeID);
01036       }
01037     }
01038 
01039     // Notify the scheduling strategy after updating the DAG.
01040     SchedImpl->schedNode(SU, IsTopNode);
01041   }
01042   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
01043 
01044   placeDebugValues();
01045 
01046   DEBUG({
01047       unsigned BBNum = begin()->getParent()->getNumber();
01048       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
01049       dumpSchedule();
01050       dbgs() << '\n';
01051     });
01052 }
01053 
01054 /// Build the DAG and setup three register pressure trackers.
01055 void ScheduleDAGMILive::buildDAGWithRegPressure() {
01056   if (!ShouldTrackPressure) {
01057     RPTracker.reset();
01058     RegionCriticalPSets.clear();
01059     buildSchedGraph(AA);
01060     return;
01061   }
01062 
01063   // Initialize the register pressure tracker used by buildSchedGraph.
01064   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
01065                  /*TrackUntiedDefs=*/true);
01066 
01067   // Account for liveness generate by the region boundary.
01068   if (LiveRegionEnd != RegionEnd)
01069     RPTracker.recede();
01070 
01071   // Build the DAG, and compute current register pressure.
01072   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
01073 
01074   // Initialize top/bottom trackers after computing region pressure.
01075   initRegPressure();
01076 }
01077 
01078 void ScheduleDAGMILive::computeDFSResult() {
01079   if (!DFSResult)
01080     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
01081   DFSResult->clear();
01082   ScheduledTrees.clear();
01083   DFSResult->resize(SUnits.size());
01084   DFSResult->compute(SUnits);
01085   ScheduledTrees.resize(DFSResult->getNumSubtrees());
01086 }
01087 
01088 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
01089 /// only provides the critical path for single block loops. To handle loops that
01090 /// span blocks, we could use the vreg path latencies provided by
01091 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
01092 /// available for use in the scheduler.
01093 ///
01094 /// The cyclic path estimation identifies a def-use pair that crosses the back
01095 /// edge and considers the depth and height of the nodes. For example, consider
01096 /// the following instruction sequence where each instruction has unit latency
01097 /// and defines an epomymous virtual register:
01098 ///
01099 /// a->b(a,c)->c(b)->d(c)->exit
01100 ///
01101 /// The cyclic critical path is a two cycles: b->c->b
01102 /// The acyclic critical path is four cycles: a->b->c->d->exit
01103 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
01104 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
01105 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
01106 /// LiveInDepth = depth(b) = len(a->b) = 1
01107 ///
01108 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
01109 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
01110 /// CyclicCriticalPath = min(2, 2) = 2
01111 ///
01112 /// This could be relevant to PostRA scheduling, but is currently implemented
01113 /// assuming LiveIntervals.
01114 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
01115   // This only applies to single block loop.
01116   if (!BB->isSuccessor(BB))
01117     return 0;
01118 
01119   unsigned MaxCyclicLatency = 0;
01120   // Visit each live out vreg def to find def/use pairs that cross iterations.
01121   ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
01122   for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
01123        RI != RE; ++RI) {
01124     unsigned Reg = *RI;
01125     if (!TRI->isVirtualRegister(Reg))
01126         continue;
01127     const LiveInterval &LI = LIS->getInterval(Reg);
01128     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
01129     if (!DefVNI)
01130       continue;
01131 
01132     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
01133     const SUnit *DefSU = getSUnit(DefMI);
01134     if (!DefSU)
01135       continue;
01136 
01137     unsigned LiveOutHeight = DefSU->getHeight();
01138     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
01139     // Visit all local users of the vreg def.
01140     for (VReg2UseMap::iterator
01141            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
01142       if (UI->SU == &ExitSU)
01143         continue;
01144 
01145       // Only consider uses of the phi.
01146       LiveQueryResult LRQ =
01147         LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
01148       if (!LRQ.valueIn()->isPHIDef())
01149         continue;
01150 
01151       // Assume that a path spanning two iterations is a cycle, which could
01152       // overestimate in strange cases. This allows cyclic latency to be
01153       // estimated as the minimum slack of the vreg's depth or height.
01154       unsigned CyclicLatency = 0;
01155       if (LiveOutDepth > UI->SU->getDepth())
01156         CyclicLatency = LiveOutDepth - UI->SU->getDepth();
01157 
01158       unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
01159       if (LiveInHeight > LiveOutHeight) {
01160         if (LiveInHeight - LiveOutHeight < CyclicLatency)
01161           CyclicLatency = LiveInHeight - LiveOutHeight;
01162       }
01163       else
01164         CyclicLatency = 0;
01165 
01166       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
01167             << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
01168       if (CyclicLatency > MaxCyclicLatency)
01169         MaxCyclicLatency = CyclicLatency;
01170     }
01171   }
01172   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
01173   return MaxCyclicLatency;
01174 }
01175 
01176 /// Move an instruction and update register pressure.
01177 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
01178   // Move the instruction to its new location in the instruction stream.
01179   MachineInstr *MI = SU->getInstr();
01180 
01181   if (IsTopNode) {
01182     assert(SU->isTopReady() && "node still has unscheduled dependencies");
01183     if (&*CurrentTop == MI)
01184       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
01185     else {
01186       moveInstruction(MI, CurrentTop);
01187       TopRPTracker.setPos(MI);
01188     }
01189 
01190     if (ShouldTrackPressure) {
01191       // Update top scheduled pressure.
01192       TopRPTracker.advance();
01193       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
01194       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
01195     }
01196   }
01197   else {
01198     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
01199     MachineBasicBlock::iterator priorII =
01200       priorNonDebug(CurrentBottom, CurrentTop);
01201     if (&*priorII == MI)
01202       CurrentBottom = priorII;
01203     else {
01204       if (&*CurrentTop == MI) {
01205         CurrentTop = nextIfDebug(++CurrentTop, priorII);
01206         TopRPTracker.setPos(CurrentTop);
01207       }
01208       moveInstruction(MI, CurrentBottom);
01209       CurrentBottom = MI;
01210     }
01211     if (ShouldTrackPressure) {
01212       // Update bottom scheduled pressure.
01213       SmallVector<unsigned, 8> LiveUses;
01214       BotRPTracker.recede(&LiveUses);
01215       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
01216       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
01217       updatePressureDiffs(LiveUses);
01218     }
01219   }
01220 }
01221 
01222 //===----------------------------------------------------------------------===//
01223 // LoadClusterMutation - DAG post-processing to cluster loads.
01224 //===----------------------------------------------------------------------===//
01225 
01226 namespace {
01227 /// \brief Post-process the DAG to create cluster edges between neighboring
01228 /// loads.
01229 class LoadClusterMutation : public ScheduleDAGMutation {
01230   struct LoadInfo {
01231     SUnit *SU;
01232     unsigned BaseReg;
01233     unsigned Offset;
01234     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
01235       : SU(su), BaseReg(reg), Offset(ofs) {}
01236 
01237     bool operator<(const LoadInfo &RHS) const {
01238       return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
01239     }
01240   };
01241 
01242   const TargetInstrInfo *TII;
01243   const TargetRegisterInfo *TRI;
01244 public:
01245   LoadClusterMutation(const TargetInstrInfo *tii,
01246                       const TargetRegisterInfo *tri)
01247     : TII(tii), TRI(tri) {}
01248 
01249   void apply(ScheduleDAGMI *DAG) override;
01250 protected:
01251   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
01252 };
01253 } // anonymous
01254 
01255 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
01256                                                   ScheduleDAGMI *DAG) {
01257   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
01258   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
01259     SUnit *SU = Loads[Idx];
01260     unsigned BaseReg;
01261     unsigned Offset;
01262     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
01263       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
01264   }
01265   if (LoadRecords.size() < 2)
01266     return;
01267   std::sort(LoadRecords.begin(), LoadRecords.end());
01268   unsigned ClusterLength = 1;
01269   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
01270     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
01271       ClusterLength = 1;
01272       continue;
01273     }
01274 
01275     SUnit *SUa = LoadRecords[Idx].SU;
01276     SUnit *SUb = LoadRecords[Idx+1].SU;
01277     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
01278         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
01279 
01280       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
01281             << SUb->NodeNum << ")\n");
01282       // Copy successor edges from SUa to SUb. Interleaving computation
01283       // dependent on SUa can prevent load combining due to register reuse.
01284       // Predecessor edges do not need to be copied from SUb to SUa since nearby
01285       // loads should have effectively the same inputs.
01286       for (SUnit::const_succ_iterator
01287              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
01288         if (SI->getSUnit() == SUb)
01289           continue;
01290         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
01291         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
01292       }
01293       ++ClusterLength;
01294     }
01295     else
01296       ClusterLength = 1;
01297   }
01298 }
01299 
01300 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
01301 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
01302   // Map DAG NodeNum to store chain ID.
01303   DenseMap<unsigned, unsigned> StoreChainIDs;
01304   // Map each store chain to a set of dependent loads.
01305   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
01306   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
01307     SUnit *SU = &DAG->SUnits[Idx];
01308     if (!SU->getInstr()->mayLoad())
01309       continue;
01310     unsigned ChainPredID = DAG->SUnits.size();
01311     for (SUnit::const_pred_iterator
01312            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
01313       if (PI->isCtrl()) {
01314         ChainPredID = PI->getSUnit()->NodeNum;
01315         break;
01316       }
01317     }
01318     // Check if this chain-like pred has been seen
01319     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
01320     unsigned NumChains = StoreChainDependents.size();
01321     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
01322       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
01323     if (Result.second)
01324       StoreChainDependents.resize(NumChains + 1);
01325     StoreChainDependents[Result.first->second].push_back(SU);
01326   }
01327   // Iterate over the store chains.
01328   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
01329     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
01330 }
01331 
01332 //===----------------------------------------------------------------------===//
01333 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
01334 //===----------------------------------------------------------------------===//
01335 
01336 namespace {
01337 /// \brief Post-process the DAG to create cluster edges between instructions
01338 /// that may be fused by the processor into a single operation.
01339 class MacroFusion : public ScheduleDAGMutation {
01340   const TargetInstrInfo *TII;
01341 public:
01342   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
01343 
01344   void apply(ScheduleDAGMI *DAG) override;
01345 };
01346 } // anonymous
01347 
01348 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
01349 /// fused operations.
01350 void MacroFusion::apply(ScheduleDAGMI *DAG) {
01351   // For now, assume targets can only fuse with the branch.
01352   MachineInstr *Branch = DAG->ExitSU.getInstr();
01353   if (!Branch)
01354     return;
01355 
01356   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
01357     SUnit *SU = &DAG->SUnits[--Idx];
01358     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
01359       continue;
01360 
01361     // Create a single weak edge from SU to ExitSU. The only effect is to cause
01362     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
01363     // need to copy predecessor edges from ExitSU to SU, since top-down
01364     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
01365     // of SU, we could create an artificial edge from the deepest root, but it
01366     // hasn't been needed yet.
01367     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
01368     (void)Success;
01369     assert(Success && "No DAG nodes should be reachable from ExitSU");
01370 
01371     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
01372     break;
01373   }
01374 }
01375 
01376 //===----------------------------------------------------------------------===//
01377 // CopyConstrain - DAG post-processing to encourage copy elimination.
01378 //===----------------------------------------------------------------------===//
01379 
01380 namespace {
01381 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
01382 /// the one use that defines the copy's source vreg, most likely an induction
01383 /// variable increment.
01384 class CopyConstrain : public ScheduleDAGMutation {
01385   // Transient state.
01386   SlotIndex RegionBeginIdx;
01387   // RegionEndIdx is the slot index of the last non-debug instruction in the
01388   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
01389   SlotIndex RegionEndIdx;
01390 public:
01391   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
01392 
01393   void apply(ScheduleDAGMI *DAG) override;
01394 
01395 protected:
01396   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
01397 };
01398 } // anonymous
01399 
01400 /// constrainLocalCopy handles two possibilities:
01401 /// 1) Local src:
01402 /// I0:     = dst
01403 /// I1: src = ...
01404 /// I2:     = dst
01405 /// I3: dst = src (copy)
01406 /// (create pred->succ edges I0->I1, I2->I1)
01407 ///
01408 /// 2) Local copy:
01409 /// I0: dst = src (copy)
01410 /// I1:     = dst
01411 /// I2: src = ...
01412 /// I3:     = dst
01413 /// (create pred->succ edges I1->I2, I3->I2)
01414 ///
01415 /// Although the MachineScheduler is currently constrained to single blocks,
01416 /// this algorithm should handle extended blocks. An EBB is a set of
01417 /// contiguously numbered blocks such that the previous block in the EBB is
01418 /// always the single predecessor.
01419 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
01420   LiveIntervals *LIS = DAG->getLIS();
01421   MachineInstr *Copy = CopySU->getInstr();
01422 
01423   // Check for pure vreg copies.
01424   unsigned SrcReg = Copy->getOperand(1).getReg();
01425   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
01426     return;
01427 
01428   unsigned DstReg = Copy->getOperand(0).getReg();
01429   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
01430     return;
01431 
01432   // Check if either the dest or source is local. If it's live across a back
01433   // edge, it's not local. Note that if both vregs are live across the back
01434   // edge, we cannot successfully contrain the copy without cyclic scheduling.
01435   unsigned LocalReg = DstReg;
01436   unsigned GlobalReg = SrcReg;
01437   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
01438   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
01439     LocalReg = SrcReg;
01440     GlobalReg = DstReg;
01441     LocalLI = &LIS->getInterval(LocalReg);
01442     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
01443       return;
01444   }
01445   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
01446 
01447   // Find the global segment after the start of the local LI.
01448   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
01449   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
01450   // local live range. We could create edges from other global uses to the local
01451   // start, but the coalescer should have already eliminated these cases, so
01452   // don't bother dealing with it.
01453   if (GlobalSegment == GlobalLI->end())
01454     return;
01455 
01456   // If GlobalSegment is killed at the LocalLI->start, the call to find()
01457   // returned the next global segment. But if GlobalSegment overlaps with
01458   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
01459   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
01460   if (GlobalSegment->contains(LocalLI->beginIndex()))
01461     ++GlobalSegment;
01462 
01463   if (GlobalSegment == GlobalLI->end())
01464     return;
01465 
01466   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
01467   if (GlobalSegment != GlobalLI->begin()) {
01468     // Two address defs have no hole.
01469     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
01470                                GlobalSegment->start)) {
01471       return;
01472     }
01473     // If the prior global segment may be defined by the same two-address
01474     // instruction that also defines LocalLI, then can't make a hole here.
01475     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
01476                                LocalLI->beginIndex())) {
01477       return;
01478     }
01479     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
01480     // it would be a disconnected component in the live range.
01481     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
01482            "Disconnected LRG within the scheduling region.");
01483   }
01484   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
01485   if (!GlobalDef)
01486     return;
01487 
01488   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
01489   if (!GlobalSU)
01490     return;
01491 
01492   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
01493   // constraining the uses of the last local def to precede GlobalDef.
01494   SmallVector<SUnit*,8> LocalUses;
01495   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
01496   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
01497   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
01498   for (SUnit::const_succ_iterator
01499          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
01500        I != E; ++I) {
01501     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
01502       continue;
01503     if (I->getSUnit() == GlobalSU)
01504       continue;
01505     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
01506       return;
01507     LocalUses.push_back(I->getSUnit());
01508   }
01509   // Open the top of the GlobalLI hole by constraining any earlier global uses
01510   // to precede the start of LocalLI.
01511   SmallVector<SUnit*,8> GlobalUses;
01512   MachineInstr *FirstLocalDef =
01513     LIS->getInstructionFromIndex(LocalLI->beginIndex());
01514   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
01515   for (SUnit::const_pred_iterator
01516          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
01517     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
01518       continue;
01519     if (I->getSUnit() == FirstLocalSU)
01520       continue;
01521     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
01522       return;
01523     GlobalUses.push_back(I->getSUnit());
01524   }
01525   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
01526   // Add the weak edges.
01527   for (SmallVectorImpl<SUnit*>::const_iterator
01528          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
01529     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
01530           << GlobalSU->NodeNum << ")\n");
01531     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
01532   }
01533   for (SmallVectorImpl<SUnit*>::const_iterator
01534          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
01535     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
01536           << FirstLocalSU->NodeNum << ")\n");
01537     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
01538   }
01539 }
01540 
01541 /// \brief Callback from DAG postProcessing to create weak edges to encourage
01542 /// copy elimination.
01543 void CopyConstrain::apply(ScheduleDAGMI *DAG) {
01544   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
01545 
01546   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
01547   if (FirstPos == DAG->end())
01548     return;
01549   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
01550   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
01551     &*priorNonDebug(DAG->end(), DAG->begin()));
01552 
01553   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
01554     SUnit *SU = &DAG->SUnits[Idx];
01555     if (!SU->getInstr()->isCopy())
01556       continue;
01557 
01558     constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
01559   }
01560 }
01561 
01562 //===----------------------------------------------------------------------===//
01563 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
01564 // and possibly other custom schedulers.
01565 //===----------------------------------------------------------------------===//
01566 
01567 static const unsigned InvalidCycle = ~0U;
01568 
01569 SchedBoundary::~SchedBoundary() { delete HazardRec; }
01570 
01571 void SchedBoundary::reset() {
01572   // A new HazardRec is created for each DAG and owned by SchedBoundary.
01573   // Destroying and reconstructing it is very expensive though. So keep
01574   // invalid, placeholder HazardRecs.
01575   if (HazardRec && HazardRec->isEnabled()) {
01576     delete HazardRec;
01577     HazardRec = nullptr;
01578   }
01579   Available.clear();
01580   Pending.clear();
01581   CheckPending = false;
01582   NextSUs.clear();
01583   CurrCycle = 0;
01584   CurrMOps = 0;
01585   MinReadyCycle = UINT_MAX;
01586   ExpectedLatency = 0;
01587   DependentLatency = 0;
01588   RetiredMOps = 0;
01589   MaxExecutedResCount = 0;
01590   ZoneCritResIdx = 0;
01591   IsResourceLimited = false;
01592   ReservedCycles.clear();
01593 #ifndef NDEBUG
01594   // Track the maximum number of stall cycles that could arise either from the
01595   // latency of a DAG edge or the number of cycles that a processor resource is
01596   // reserved (SchedBoundary::ReservedCycles).
01597   MaxObservedStall = 0;
01598 #endif
01599   // Reserve a zero-count for invalid CritResIdx.
01600   ExecutedResCounts.resize(1);
01601   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
01602 }
01603 
01604 void SchedRemainder::
01605 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
01606   reset();
01607   if (!SchedModel->hasInstrSchedModel())
01608     return;
01609   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
01610   for (std::vector<SUnit>::iterator
01611          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
01612     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
01613     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
01614       * SchedModel->getMicroOpFactor();
01615     for (TargetSchedModel::ProcResIter
01616            PI = SchedModel->getWriteProcResBegin(SC),
01617            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
01618       unsigned PIdx = PI->ProcResourceIdx;
01619       unsigned Factor = SchedModel->getResourceFactor(PIdx);
01620       RemainingCounts[PIdx] += (Factor * PI->Cycles);
01621     }
01622   }
01623 }
01624 
01625 void SchedBoundary::
01626 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
01627   reset();
01628   DAG = dag;
01629   SchedModel = smodel;
01630   Rem = rem;
01631   if (SchedModel->hasInstrSchedModel()) {
01632     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
01633     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
01634   }
01635 }
01636 
01637 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
01638 /// these "soft stalls" differently than the hard stall cycles based on CPU
01639 /// resources and computed by checkHazard(). A fully in-order model
01640 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
01641 /// available for scheduling until they are ready. However, a weaker in-order
01642 /// model may use this for heuristics. For example, if a processor has in-order
01643 /// behavior when reading certain resources, this may come into play.
01644 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
01645   if (!SU->isUnbuffered)
01646     return 0;
01647 
01648   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
01649   if (ReadyCycle > CurrCycle)
01650     return ReadyCycle - CurrCycle;
01651   return 0;
01652 }
01653 
01654 /// Compute the next cycle at which the given processor resource can be
01655 /// scheduled.
01656 unsigned SchedBoundary::
01657 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
01658   unsigned NextUnreserved = ReservedCycles[PIdx];
01659   // If this resource has never been used, always return cycle zero.
01660   if (NextUnreserved == InvalidCycle)
01661     return 0;
01662   // For bottom-up scheduling add the cycles needed for the current operation.
01663   if (!isTop())
01664     NextUnreserved += Cycles;
01665   return NextUnreserved;
01666 }
01667 
01668 /// Does this SU have a hazard within the current instruction group.
01669 ///
01670 /// The scheduler supports two modes of hazard recognition. The first is the
01671 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
01672 /// supports highly complicated in-order reservation tables
01673 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
01674 ///
01675 /// The second is a streamlined mechanism that checks for hazards based on
01676 /// simple counters that the scheduler itself maintains. It explicitly checks
01677 /// for instruction dispatch limitations, including the number of micro-ops that
01678 /// can dispatch per cycle.
01679 ///
01680 /// TODO: Also check whether the SU must start a new group.
01681 bool SchedBoundary::checkHazard(SUnit *SU) {
01682   if (HazardRec->isEnabled()
01683       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
01684     return true;
01685   }
01686   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
01687   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
01688     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
01689           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
01690     return true;
01691   }
01692   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
01693     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
01694     for (TargetSchedModel::ProcResIter
01695            PI = SchedModel->getWriteProcResBegin(SC),
01696            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
01697       unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
01698       if (NRCycle > CurrCycle) {
01699 #ifndef NDEBUG
01700         MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
01701 #endif
01702         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
01703               << SchedModel->getResourceName(PI->ProcResourceIdx)
01704               << "=" << NRCycle << "c\n");
01705         return true;
01706       }
01707     }
01708   }
01709   return false;
01710 }
01711 
01712 // Find the unscheduled node in ReadySUs with the highest latency.
01713 unsigned SchedBoundary::
01714 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
01715   SUnit *LateSU = nullptr;
01716   unsigned RemLatency = 0;
01717   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
01718        I != E; ++I) {
01719     unsigned L = getUnscheduledLatency(*I);
01720     if (L > RemLatency) {
01721       RemLatency = L;
01722       LateSU = *I;
01723     }
01724   }
01725   if (LateSU) {
01726     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
01727           << LateSU->NodeNum << ") " << RemLatency << "c\n");
01728   }
01729   return RemLatency;
01730 }
01731 
01732 // Count resources in this zone and the remaining unscheduled
01733 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
01734 // resource index, or zero if the zone is issue limited.
01735 unsigned SchedBoundary::
01736 getOtherResourceCount(unsigned &OtherCritIdx) {
01737   OtherCritIdx = 0;
01738   if (!SchedModel->hasInstrSchedModel())
01739     return 0;
01740 
01741   unsigned OtherCritCount = Rem->RemIssueCount
01742     + (RetiredMOps * SchedModel->getMicroOpFactor());
01743   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
01744         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
01745   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
01746        PIdx != PEnd; ++PIdx) {
01747     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
01748     if (OtherCount > OtherCritCount) {
01749       OtherCritCount = OtherCount;
01750       OtherCritIdx = PIdx;
01751     }
01752   }
01753   if (OtherCritIdx) {
01754     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
01755           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
01756           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
01757   }
01758   return OtherCritCount;
01759 }
01760 
01761 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
01762   assert(SU->getInstr() && "Scheduled SUnit must have instr");
01763 
01764 #ifndef NDEBUG
01765   // ReadyCycle was been bumped up to the CurrCycle when this node was
01766   // scheduled, but CurrCycle may have been eagerly advanced immediately after
01767   // scheduling, so may now be greater than ReadyCycle.
01768   if (ReadyCycle > CurrCycle)
01769     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
01770 #endif
01771 
01772   if (ReadyCycle < MinReadyCycle)
01773     MinReadyCycle = ReadyCycle;
01774 
01775   // Check for interlocks first. For the purpose of other heuristics, an
01776   // instruction that cannot issue appears as if it's not in the ReadyQueue.
01777   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
01778   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
01779     Pending.push(SU);
01780   else
01781     Available.push(SU);
01782 
01783   // Record this node as an immediate dependent of the scheduled node.
01784   NextSUs.insert(SU);
01785 }
01786 
01787 void SchedBoundary::releaseTopNode(SUnit *SU) {
01788   if (SU->isScheduled)
01789     return;
01790 
01791   releaseNode(SU, SU->TopReadyCycle);
01792 }
01793 
01794 void SchedBoundary::releaseBottomNode(SUnit *SU) {
01795   if (SU->isScheduled)
01796     return;
01797 
01798   releaseNode(SU, SU->BotReadyCycle);
01799 }
01800 
01801 /// Move the boundary of scheduled code by one cycle.
01802 void SchedBoundary::bumpCycle(unsigned NextCycle) {
01803   if (SchedModel->getMicroOpBufferSize() == 0) {
01804     assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
01805     if (MinReadyCycle > NextCycle)
01806       NextCycle = MinReadyCycle;
01807   }
01808   // Update the current micro-ops, which will issue in the next cycle.
01809   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
01810   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
01811 
01812   // Decrement DependentLatency based on the next cycle.
01813   if ((NextCycle - CurrCycle) > DependentLatency)
01814     DependentLatency = 0;
01815   else
01816     DependentLatency -= (NextCycle - CurrCycle);
01817 
01818   if (!HazardRec->isEnabled()) {
01819     // Bypass HazardRec virtual calls.
01820     CurrCycle = NextCycle;
01821   }
01822   else {
01823     // Bypass getHazardType calls in case of long latency.
01824     for (; CurrCycle != NextCycle; ++CurrCycle) {
01825       if (isTop())
01826         HazardRec->AdvanceCycle();
01827       else
01828         HazardRec->RecedeCycle();
01829     }
01830   }
01831   CheckPending = true;
01832   unsigned LFactor = SchedModel->getLatencyFactor();
01833   IsResourceLimited =
01834     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
01835     > (int)LFactor;
01836 
01837   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
01838 }
01839 
01840 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
01841   ExecutedResCounts[PIdx] += Count;
01842   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
01843     MaxExecutedResCount = ExecutedResCounts[PIdx];
01844 }
01845 
01846 /// Add the given processor resource to this scheduled zone.
01847 ///
01848 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
01849 /// during which this resource is consumed.
01850 ///
01851 /// \return the next cycle at which the instruction may execute without
01852 /// oversubscribing resources.
01853 unsigned SchedBoundary::
01854 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
01855   unsigned Factor = SchedModel->getResourceFactor(PIdx);
01856   unsigned Count = Factor * Cycles;
01857   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
01858         << " +" << Cycles << "x" << Factor << "u\n");
01859 
01860   // Update Executed resources counts.
01861   incExecutedResources(PIdx, Count);
01862   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
01863   Rem->RemainingCounts[PIdx] -= Count;
01864 
01865   // Check if this resource exceeds the current critical resource. If so, it
01866   // becomes the critical resource.
01867   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
01868     ZoneCritResIdx = PIdx;
01869     DEBUG(dbgs() << "  *** Critical resource "
01870           << SchedModel->getResourceName(PIdx) << ": "
01871           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
01872   }
01873   // For reserved resources, record the highest cycle using the resource.
01874   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
01875   if (NextAvailable > CurrCycle) {
01876     DEBUG(dbgs() << "  Resource conflict: "
01877           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
01878           << NextAvailable << "\n");
01879   }
01880   return NextAvailable;
01881 }
01882 
01883 /// Move the boundary of scheduled code by one SUnit.
01884 void SchedBoundary::bumpNode(SUnit *SU) {
01885   // Update the reservation table.
01886   if (HazardRec->isEnabled()) {
01887     if (!isTop() && SU->isCall) {
01888       // Calls are scheduled with their preceding instructions. For bottom-up
01889       // scheduling, clear the pipeline state before emitting.
01890       HazardRec->Reset();
01891     }
01892     HazardRec->EmitInstruction(SU);
01893   }
01894   // checkHazard should prevent scheduling multiple instructions per cycle that
01895   // exceed the issue width.
01896   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
01897   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
01898   assert(
01899       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
01900       "Cannot schedule this instruction's MicroOps in the current cycle.");
01901 
01902   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
01903   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
01904 
01905   unsigned NextCycle = CurrCycle;
01906   switch (SchedModel->getMicroOpBufferSize()) {
01907   case 0:
01908     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
01909     break;
01910   case 1:
01911     if (ReadyCycle > NextCycle) {
01912       NextCycle = ReadyCycle;
01913       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
01914     }
01915     break;
01916   default:
01917     // We don't currently model the OOO reorder buffer, so consider all
01918     // scheduled MOps to be "retired". We do loosely model in-order resource
01919     // latency. If this instruction uses an in-order resource, account for any
01920     // likely stall cycles.
01921     if (SU->isUnbuffered && ReadyCycle > NextCycle)
01922       NextCycle = ReadyCycle;
01923     break;
01924   }
01925   RetiredMOps += IncMOps;
01926 
01927   // Update resource counts and critical resource.
01928   if (SchedModel->hasInstrSchedModel()) {
01929     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
01930     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
01931     Rem->RemIssueCount -= DecRemIssue;
01932     if (ZoneCritResIdx) {
01933       // Scale scheduled micro-ops for comparing with the critical resource.
01934       unsigned ScaledMOps =
01935         RetiredMOps * SchedModel->getMicroOpFactor();
01936 
01937       // If scaled micro-ops are now more than the previous critical resource by
01938       // a full cycle, then micro-ops issue becomes critical.
01939       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
01940           >= (int)SchedModel->getLatencyFactor()) {
01941         ZoneCritResIdx = 0;
01942         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
01943               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
01944       }
01945     }
01946     for (TargetSchedModel::ProcResIter
01947            PI = SchedModel->getWriteProcResBegin(SC),
01948            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
01949       unsigned RCycle =
01950         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
01951       if (RCycle > NextCycle)
01952         NextCycle = RCycle;
01953     }
01954     if (SU->hasReservedResource) {
01955       // For reserved resources, record the highest cycle using the resource.
01956       // For top-down scheduling, this is the cycle in which we schedule this
01957       // instruction plus the number of cycles the operations reserves the
01958       // resource. For bottom-up is it simply the instruction's cycle.
01959       for (TargetSchedModel::ProcResIter
01960              PI = SchedModel->getWriteProcResBegin(SC),
01961              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
01962         unsigned PIdx = PI->ProcResourceIdx;
01963         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
01964           if (isTop()) {
01965             ReservedCycles[PIdx] =
01966               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
01967           }
01968           else
01969             ReservedCycles[PIdx] = NextCycle;
01970         }
01971       }
01972     }
01973   }
01974   // Update ExpectedLatency and DependentLatency.
01975   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
01976   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
01977   if (SU->getDepth() > TopLatency) {
01978     TopLatency = SU->getDepth();
01979     DEBUG(dbgs() << "  " << Available.getName()
01980           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
01981   }
01982   if (SU->getHeight() > BotLatency) {
01983     BotLatency = SU->getHeight();
01984     DEBUG(dbgs() << "  " << Available.getName()
01985           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
01986   }
01987   // If we stall for any reason, bump the cycle.
01988   if (NextCycle > CurrCycle) {
01989     bumpCycle(NextCycle);
01990   }
01991   else {
01992     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
01993     // resource limited. If a stall occurred, bumpCycle does this.
01994     unsigned LFactor = SchedModel->getLatencyFactor();
01995     IsResourceLimited =
01996       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
01997       > (int)LFactor;
01998   }
01999   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
02000   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
02001   // one cycle.  Since we commonly reach the max MOps here, opportunistically
02002   // bump the cycle to avoid uselessly checking everything in the readyQ.
02003   CurrMOps += IncMOps;
02004   while (CurrMOps >= SchedModel->getIssueWidth()) {
02005     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
02006           << " at cycle " << CurrCycle << '\n');
02007     bumpCycle(++NextCycle);
02008   }
02009   DEBUG(dumpScheduledState());
02010 }
02011 
02012 /// Release pending ready nodes in to the available queue. This makes them
02013 /// visible to heuristics.
02014 void SchedBoundary::releasePending() {
02015   // If the available queue is empty, it is safe to reset MinReadyCycle.
02016   if (Available.empty())
02017     MinReadyCycle = UINT_MAX;
02018 
02019   // Check to see if any of the pending instructions are ready to issue.  If
02020   // so, add them to the available queue.
02021   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
02022   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
02023     SUnit *SU = *(Pending.begin()+i);
02024     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
02025 
02026     if (ReadyCycle < MinReadyCycle)
02027       MinReadyCycle = ReadyCycle;
02028 
02029     if (!IsBuffered && ReadyCycle > CurrCycle)
02030       continue;
02031 
02032     if (checkHazard(SU))
02033       continue;
02034 
02035     Available.push(SU);
02036     Pending.remove(Pending.begin()+i);
02037     --i; --e;
02038   }
02039   DEBUG(if (!Pending.empty()) Pending.dump());
02040   CheckPending = false;
02041 }
02042 
02043 /// Remove SU from the ready set for this boundary.
02044 void SchedBoundary::removeReady(SUnit *SU) {
02045   if (Available.isInQueue(SU))
02046     Available.remove(Available.find(SU));
02047   else {
02048     assert(Pending.isInQueue(SU) && "bad ready count");
02049     Pending.remove(Pending.find(SU));
02050   }
02051 }
02052 
02053 /// If this queue only has one ready candidate, return it. As a side effect,
02054 /// defer any nodes that now hit a hazard, and advance the cycle until at least
02055 /// one node is ready. If multiple instructions are ready, return NULL.
02056 SUnit *SchedBoundary::pickOnlyChoice() {
02057   if (CheckPending)
02058     releasePending();
02059 
02060   if (CurrMOps > 0) {
02061     // Defer any ready instrs that now have a hazard.
02062     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
02063       if (checkHazard(*I)) {
02064         Pending.push(*I);
02065         I = Available.remove(I);
02066         continue;
02067       }
02068       ++I;
02069     }
02070   }
02071   for (unsigned i = 0; Available.empty(); ++i) {
02072 //  FIXME: Re-enable assert once PR20057 is resolved.
02073 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
02074 //           "permanent hazard");
02075     (void)i;
02076     bumpCycle(CurrCycle + 1);
02077     releasePending();
02078   }
02079   if (Available.size() == 1)
02080     return *Available.begin();
02081   return nullptr;
02082 }
02083 
02084 #ifndef NDEBUG
02085 // This is useful information to dump after bumpNode.
02086 // Note that the Queue contents are more useful before pickNodeFromQueue.
02087 void SchedBoundary::dumpScheduledState() {
02088   unsigned ResFactor;
02089   unsigned ResCount;
02090   if (ZoneCritResIdx) {
02091     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
02092     ResCount = getResourceCount(ZoneCritResIdx);
02093   }
02094   else {
02095     ResFactor = SchedModel->getMicroOpFactor();
02096     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
02097   }
02098   unsigned LFactor = SchedModel->getLatencyFactor();
02099   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
02100          << "  Retired: " << RetiredMOps;
02101   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
02102   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
02103          << ResCount / ResFactor << " "
02104          << SchedModel->getResourceName(ZoneCritResIdx)
02105          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
02106          << (IsResourceLimited ? "  - Resource" : "  - Latency")
02107          << " limited.\n";
02108 }
02109 #endif
02110 
02111 //===----------------------------------------------------------------------===//
02112 // GenericScheduler - Generic implementation of MachineSchedStrategy.
02113 //===----------------------------------------------------------------------===//
02114 
02115 void GenericSchedulerBase::SchedCandidate::
02116 initResourceDelta(const ScheduleDAGMI *DAG,
02117                   const TargetSchedModel *SchedModel) {
02118   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
02119     return;
02120 
02121   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
02122   for (TargetSchedModel::ProcResIter
02123          PI = SchedModel->getWriteProcResBegin(SC),
02124          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
02125     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
02126       ResDelta.CritResources += PI->Cycles;
02127     if (PI->ProcResourceIdx == Policy.DemandResIdx)
02128       ResDelta.DemandedResources += PI->Cycles;
02129   }
02130 }
02131 
02132 /// Set the CandPolicy given a scheduling zone given the current resources and
02133 /// latencies inside and outside the zone.
02134 void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
02135                                      bool IsPostRA,
02136                                      SchedBoundary &CurrZone,
02137                                      SchedBoundary *OtherZone) {
02138   // Apply preemptive heuristics based on the the total latency and resources
02139   // inside and outside this zone. Potential stalls should be considered before
02140   // following this policy.
02141 
02142   // Compute remaining latency. We need this both to determine whether the
02143   // overall schedule has become latency-limited and whether the instructions
02144   // outside this zone are resource or latency limited.
02145   //
02146   // The "dependent" latency is updated incrementally during scheduling as the
02147   // max height/depth of scheduled nodes minus the cycles since it was
02148   // scheduled:
02149   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
02150   //
02151   // The "independent" latency is the max ready queue depth:
02152   //   ILat = max N.depth for N in Available|Pending
02153   //
02154   // RemainingLatency is the greater of independent and dependent latency.
02155   unsigned RemLatency = CurrZone.getDependentLatency();
02156   RemLatency = std::max(RemLatency,
02157                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
02158   RemLatency = std::max(RemLatency,
02159                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
02160 
02161   // Compute the critical resource outside the zone.
02162   unsigned OtherCritIdx = 0;
02163   unsigned OtherCount =
02164     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
02165 
02166   bool OtherResLimited = false;
02167   if (SchedModel->hasInstrSchedModel()) {
02168     unsigned LFactor = SchedModel->getLatencyFactor();
02169     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
02170   }
02171   // Schedule aggressively for latency in PostRA mode. We don't check for
02172   // acyclic latency during PostRA, and highly out-of-order processors will
02173   // skip PostRA scheduling.
02174   if (!OtherResLimited) {
02175     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
02176       Policy.ReduceLatency |= true;
02177       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
02178             << " RemainingLatency " << RemLatency << " + "
02179             << CurrZone.getCurrCycle() << "c > CritPath "
02180             << Rem.CriticalPath << "\n");
02181     }
02182   }
02183   // If the same resource is limiting inside and outside the zone, do nothing.
02184   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
02185     return;
02186 
02187   DEBUG(
02188     if (CurrZone.isResourceLimited()) {
02189       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
02190              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
02191              << "\n";
02192     }
02193     if (OtherResLimited)
02194       dbgs() << "  RemainingLimit: "
02195              << SchedModel->getResourceName(OtherCritIdx) << "\n";
02196     if (!CurrZone.isResourceLimited() && !OtherResLimited)
02197       dbgs() << "  Latency limited both directions.\n");
02198 
02199   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
02200     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
02201 
02202   if (OtherResLimited)
02203     Policy.DemandResIdx = OtherCritIdx;
02204 }
02205 
02206 #ifndef NDEBUG
02207 const char *GenericSchedulerBase::getReasonStr(
02208   GenericSchedulerBase::CandReason Reason) {
02209   switch (Reason) {
02210   case NoCand:         return "NOCAND    ";
02211   case PhysRegCopy:    return "PREG-COPY";
02212   case RegExcess:      return "REG-EXCESS";
02213   case RegCritical:    return "REG-CRIT  ";
02214   case Stall:          return "STALL     ";
02215   case Cluster:        return "CLUSTER   ";
02216   case Weak:           return "WEAK      ";
02217   case RegMax:         return "REG-MAX   ";
02218   case ResourceReduce: return "RES-REDUCE";
02219   case ResourceDemand: return "RES-DEMAND";
02220   case TopDepthReduce: return "TOP-DEPTH ";
02221   case TopPathReduce:  return "TOP-PATH  ";
02222   case BotHeightReduce:return "BOT-HEIGHT";
02223   case BotPathReduce:  return "BOT-PATH  ";
02224   case NextDefUse:     return "DEF-USE   ";
02225   case NodeOrder:      return "ORDER     ";
02226   };
02227   llvm_unreachable("Unknown reason!");
02228 }
02229 
02230 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
02231   PressureChange P;
02232   unsigned ResIdx = 0;
02233   unsigned Latency = 0;
02234   switch (Cand.Reason) {
02235   default:
02236     break;
02237   case RegExcess:
02238     P = Cand.RPDelta.Excess;
02239     break;
02240   case RegCritical:
02241     P = Cand.RPDelta.CriticalMax;
02242     break;
02243   case RegMax:
02244     P = Cand.RPDelta.CurrentMax;
02245     break;
02246   case ResourceReduce:
02247     ResIdx = Cand.Policy.ReduceResIdx;
02248     break;
02249   case ResourceDemand:
02250     ResIdx = Cand.Policy.DemandResIdx;
02251     break;
02252   case TopDepthReduce:
02253     Latency = Cand.SU->getDepth();
02254     break;
02255   case TopPathReduce:
02256     Latency = Cand.SU->getHeight();
02257     break;
02258   case BotHeightReduce:
02259     Latency = Cand.SU->getHeight();
02260     break;
02261   case BotPathReduce:
02262     Latency = Cand.SU->getDepth();
02263     break;
02264   }
02265   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
02266   if (P.isValid())
02267     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
02268            << ":" << P.getUnitInc() << " ";
02269   else
02270     dbgs() << "      ";
02271   if (ResIdx)
02272     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
02273   else
02274     dbgs() << "         ";
02275   if (Latency)
02276     dbgs() << " " << Latency << " cycles ";
02277   else
02278     dbgs() << "          ";
02279   dbgs() << '\n';
02280 }
02281 #endif
02282 
02283 /// Return true if this heuristic determines order.
02284 static bool tryLess(int TryVal, int CandVal,
02285                     GenericSchedulerBase::SchedCandidate &TryCand,
02286                     GenericSchedulerBase::SchedCandidate &Cand,
02287                     GenericSchedulerBase::CandReason Reason) {
02288   if (TryVal < CandVal) {
02289     TryCand.Reason = Reason;
02290     return true;
02291   }
02292   if (TryVal > CandVal) {
02293     if (Cand.Reason > Reason)
02294       Cand.Reason = Reason;
02295     return true;
02296   }
02297   Cand.setRepeat(Reason);
02298   return false;
02299 }
02300 
02301 static bool tryGreater(int TryVal, int CandVal,
02302                        GenericSchedulerBase::SchedCandidate &TryCand,
02303                        GenericSchedulerBase::SchedCandidate &Cand,
02304                        GenericSchedulerBase::CandReason Reason) {
02305   if (TryVal > CandVal) {
02306     TryCand.Reason = Reason;
02307     return true;
02308   }
02309   if (TryVal < CandVal) {
02310     if (Cand.Reason > Reason)
02311       Cand.Reason = Reason;
02312     return true;
02313   }
02314   Cand.setRepeat(Reason);
02315   return false;
02316 }
02317 
02318 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
02319                        GenericSchedulerBase::SchedCandidate &Cand,
02320                        SchedBoundary &Zone) {
02321   if (Zone.isTop()) {
02322     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
02323       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
02324                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
02325         return true;
02326     }
02327     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
02328                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
02329       return true;
02330   }
02331   else {
02332     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
02333       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
02334                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
02335         return true;
02336     }
02337     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
02338                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
02339       return true;
02340   }
02341   return false;
02342 }
02343 
02344 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
02345                       bool IsTop) {
02346   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
02347         << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
02348 }
02349 
02350 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
02351   assert(dag->hasVRegLiveness() &&
02352          "(PreRA)GenericScheduler needs vreg liveness");
02353   DAG = static_cast<ScheduleDAGMILive*>(dag);
02354   SchedModel = DAG->getSchedModel();
02355   TRI = DAG->TRI;
02356 
02357   Rem.init(DAG, SchedModel);
02358   Top.init(DAG, SchedModel, &Rem);
02359   Bot.init(DAG, SchedModel, &Rem);
02360 
02361   // Initialize resource counts.
02362 
02363   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
02364   // are disabled, then these HazardRecs will be disabled.
02365   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
02366   const TargetMachine &TM = DAG->MF.getTarget();
02367   if (!Top.HazardRec) {
02368     Top.HazardRec =
02369         TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
02370             Itin, DAG);
02371   }
02372   if (!Bot.HazardRec) {
02373     Bot.HazardRec =
02374         TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
02375             Itin, DAG);
02376   }
02377 }
02378 
02379 /// Initialize the per-region scheduling policy.
02380 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
02381                                   MachineBasicBlock::iterator End,
02382                                   unsigned NumRegionInstrs) {
02383   const TargetMachine &TM = Context->MF->getTarget();
02384   const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
02385 
02386   // Avoid setting up the register pressure tracker for small regions to save
02387   // compile time. As a rough heuristic, only track pressure when the number of
02388   // schedulable instructions exceeds half the integer register file.
02389   RegionPolicy.ShouldTrackPressure = true;
02390   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
02391     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
02392     if (TLI->isTypeLegal(LegalIntVT)) {
02393       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
02394         TLI->getRegClassFor(LegalIntVT));
02395       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
02396     }
02397   }
02398 
02399   // For generic targets, we default to bottom-up, because it's simpler and more
02400   // compile-time optimizations have been implemented in that direction.
02401   RegionPolicy.OnlyBottomUp = true;
02402 
02403   // Allow the subtarget to override default policy.
02404   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
02405   ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
02406 
02407   // After subtarget overrides, apply command line options.
02408   if (!EnableRegPressure)
02409     RegionPolicy.ShouldTrackPressure = false;
02410 
02411   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
02412   // e.g. -misched-bottomup=false allows scheduling in both directions.
02413   assert((!ForceTopDown || !ForceBottomUp) &&
02414          "-misched-topdown incompatible with -misched-bottomup");
02415   if (ForceBottomUp.getNumOccurrences() > 0) {
02416     RegionPolicy.OnlyBottomUp = ForceBottomUp;
02417     if (RegionPolicy.OnlyBottomUp)
02418       RegionPolicy.OnlyTopDown = false;
02419   }
02420   if (ForceTopDown.getNumOccurrences() > 0) {
02421     RegionPolicy.OnlyTopDown = ForceTopDown;
02422     if (RegionPolicy.OnlyTopDown)
02423       RegionPolicy.OnlyBottomUp = false;
02424   }
02425 }
02426 
02427 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
02428 /// critical path by more cycles than it takes to drain the instruction buffer.
02429 /// We estimate an upper bounds on in-flight instructions as:
02430 ///
02431 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
02432 /// InFlightIterations = AcyclicPath / CyclesPerIteration
02433 /// InFlightResources = InFlightIterations * LoopResources
02434 ///
02435 /// TODO: Check execution resources in addition to IssueCount.
02436 void GenericScheduler::checkAcyclicLatency() {
02437   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
02438     return;
02439 
02440   // Scaled number of cycles per loop iteration.
02441   unsigned IterCount =
02442     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
02443              Rem.RemIssueCount);
02444   // Scaled acyclic critical path.
02445   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
02446   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
02447   unsigned InFlightCount =
02448     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
02449   unsigned BufferLimit =
02450     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
02451 
02452   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
02453 
02454   DEBUG(dbgs() << "IssueCycles="
02455         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
02456         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
02457         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
02458         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
02459         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
02460         if (Rem.IsAcyclicLatencyLimited)
02461           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
02462 }
02463 
02464 void GenericScheduler::registerRoots() {
02465   Rem.CriticalPath = DAG->ExitSU.getDepth();
02466 
02467   // Some roots may not feed into ExitSU. Check all of them in case.
02468   for (std::vector<SUnit*>::const_iterator
02469          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
02470     if ((*I)->getDepth() > Rem.CriticalPath)
02471       Rem.CriticalPath = (*I)->getDepth();
02472   }
02473   DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
02474   if (DumpCriticalPathLength) {
02475     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
02476   }
02477 
02478   if (EnableCyclicPath) {
02479     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
02480     checkAcyclicLatency();
02481   }
02482 }
02483 
02484 static bool tryPressure(const PressureChange &TryP,
02485                         const PressureChange &CandP,
02486                         GenericSchedulerBase::SchedCandidate &TryCand,
02487                         GenericSchedulerBase::SchedCandidate &Cand,
02488                         GenericSchedulerBase::CandReason Reason) {
02489   int TryRank = TryP.getPSetOrMax();
02490   int CandRank = CandP.getPSetOrMax();
02491   // If both candidates affect the same set, go with the smallest increase.
02492   if (TryRank == CandRank) {
02493     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
02494                    Reason);
02495   }
02496   // If one candidate decreases and the other increases, go with it.
02497   // Invalid candidates have UnitInc==0.
02498   if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
02499               Reason)) {
02500     return true;
02501   }
02502   // If the candidates are decreasing pressure, reverse priority.
02503   if (TryP.getUnitInc() < 0)
02504     std::swap(TryRank, CandRank);
02505   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
02506 }
02507 
02508 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
02509   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
02510 }
02511 
02512 /// Minimize physical register live ranges. Regalloc wants them adjacent to
02513 /// their physreg def/use.
02514 ///
02515 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
02516 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
02517 /// with the operation that produces or consumes the physreg. We'll do this when
02518 /// regalloc has support for parallel copies.
02519 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
02520   const MachineInstr *MI = SU->getInstr();
02521   if (!MI->isCopy())
02522     return 0;
02523 
02524   unsigned ScheduledOper = isTop ? 1 : 0;
02525   unsigned UnscheduledOper = isTop ? 0 : 1;
02526   // If we have already scheduled the physreg produce/consumer, immediately
02527   // schedule the copy.
02528   if (TargetRegisterInfo::isPhysicalRegister(
02529         MI->getOperand(ScheduledOper).getReg()))
02530     return 1;
02531   // If the physreg is at the boundary, defer it. Otherwise schedule it
02532   // immediately to free the dependent. We can hoist the copy later.
02533   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
02534   if (TargetRegisterInfo::isPhysicalRegister(
02535         MI->getOperand(UnscheduledOper).getReg()))
02536     return AtBoundary ? -1 : 1;
02537   return 0;
02538 }
02539 
02540 /// Apply a set of heursitics to a new candidate. Heuristics are currently
02541 /// hierarchical. This may be more efficient than a graduated cost model because
02542 /// we don't need to evaluate all aspects of the model for each node in the
02543 /// queue. But it's really done to make the heuristics easier to debug and
02544 /// statistically analyze.
02545 ///
02546 /// \param Cand provides the policy and current best candidate.
02547 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
02548 /// \param Zone describes the scheduled zone that we are extending.
02549 /// \param RPTracker describes reg pressure within the scheduled zone.
02550 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
02551 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
02552                                     SchedCandidate &TryCand,
02553                                     SchedBoundary &Zone,
02554                                     const RegPressureTracker &RPTracker,
02555                                     RegPressureTracker &TempTracker) {
02556 
02557   if (DAG->isTrackingPressure()) {
02558     // Always initialize TryCand's RPDelta.
02559     if (Zone.isTop()) {
02560       TempTracker.getMaxDownwardPressureDelta(
02561         TryCand.SU->getInstr(),
02562         TryCand.RPDelta,
02563         DAG->getRegionCriticalPSets(),
02564         DAG->getRegPressure().MaxSetPressure);
02565     }
02566     else {
02567       if (VerifyScheduling) {
02568         TempTracker.getMaxUpwardPressureDelta(
02569           TryCand.SU->getInstr(),
02570           &DAG->getPressureDiff(TryCand.SU),
02571           TryCand.RPDelta,
02572           DAG->getRegionCriticalPSets(),
02573           DAG->getRegPressure().MaxSetPressure);
02574       }
02575       else {
02576         RPTracker.getUpwardPressureDelta(
02577           TryCand.SU->getInstr(),
02578           DAG->getPressureDiff(TryCand.SU),
02579           TryCand.RPDelta,
02580           DAG->getRegionCriticalPSets(),
02581           DAG->getRegPressure().MaxSetPressure);
02582       }
02583     }
02584   }
02585   DEBUG(if (TryCand.RPDelta.Excess.isValid())
02586           dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
02587                  << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
02588                  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
02589 
02590   // Initialize the candidate if needed.
02591   if (!Cand.isValid()) {
02592     TryCand.Reason = NodeOrder;
02593     return;
02594   }
02595 
02596   if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
02597                  biasPhysRegCopy(Cand.SU, Zone.isTop()),
02598                  TryCand, Cand, PhysRegCopy))
02599     return;
02600 
02601   // Avoid exceeding the target's limit. If signed PSetID is negative, it is
02602   // invalid; convert it to INT_MAX to give it lowest priority.
02603   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
02604                                                Cand.RPDelta.Excess,
02605                                                TryCand, Cand, RegExcess))
02606     return;
02607 
02608   // Avoid increasing the max critical pressure in the scheduled region.
02609   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
02610                                                Cand.RPDelta.CriticalMax,
02611                                                TryCand, Cand, RegCritical))
02612     return;
02613 
02614   // For loops that are acyclic path limited, aggressively schedule for latency.
02615   // This can result in very long dependence chains scheduled in sequence, so
02616   // once every cycle (when CurrMOps == 0), switch to normal heuristics.
02617   if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
02618       && tryLatency(TryCand, Cand, Zone))
02619     return;
02620 
02621   // Prioritize instructions that read unbuffered resources by stall cycles.
02622   if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
02623               Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
02624     return;
02625 
02626   // Keep clustered nodes together to encourage downstream peephole
02627   // optimizations which may reduce resource requirements.
02628   //
02629   // This is a best effort to set things up for a post-RA pass. Optimizations
02630   // like generating loads of multiple registers should ideally be done within
02631   // the scheduler pass by combining the loads during DAG postprocessing.
02632   const SUnit *NextClusterSU =
02633     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
02634   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
02635                  TryCand, Cand, Cluster))
02636     return;
02637 
02638   // Weak edges are for clustering and other constraints.
02639   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
02640               getWeakLeft(Cand.SU, Zone.isTop()),
02641               TryCand, Cand, Weak)) {
02642     return;
02643   }
02644   // Avoid increasing the max pressure of the entire region.
02645   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
02646                                                Cand.RPDelta.CurrentMax,
02647                                                TryCand, Cand, RegMax))
02648     return;
02649 
02650   // Avoid critical resource consumption and balance the schedule.
02651   TryCand.initResourceDelta(DAG, SchedModel);
02652   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
02653               TryCand, Cand, ResourceReduce))
02654     return;
02655   if (tryGreater(TryCand.ResDelta.DemandedResources,
02656                  Cand.ResDelta.DemandedResources,
02657                  TryCand, Cand, ResourceDemand))
02658     return;
02659 
02660   // Avoid serializing long latency dependence chains.
02661   // For acyclic path limited loops, latency was already checked above.
02662   if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
02663       && tryLatency(TryCand, Cand, Zone)) {
02664     return;
02665   }
02666 
02667   // Prefer immediate defs/users of the last scheduled instruction. This is a
02668   // local pressure avoidance strategy that also makes the machine code
02669   // readable.
02670   if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
02671                  TryCand, Cand, NextDefUse))
02672     return;
02673 
02674   // Fall through to original instruction order.
02675   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
02676       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
02677     TryCand.Reason = NodeOrder;
02678   }
02679 }
02680 
02681 /// Pick the best candidate from the queue.
02682 ///
02683 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
02684 /// DAG building. To adjust for the current scheduling location we need to
02685 /// maintain the number of vreg uses remaining to be top-scheduled.
02686 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
02687                                          const RegPressureTracker &RPTracker,
02688                                          SchedCandidate &Cand) {
02689   ReadyQueue &Q = Zone.Available;
02690 
02691   DEBUG(Q.dump());
02692 
02693   // getMaxPressureDelta temporarily modifies the tracker.
02694   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
02695 
02696   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
02697 
02698     SchedCandidate TryCand(Cand.Policy);
02699     TryCand.SU = *I;
02700     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
02701     if (TryCand.Reason != NoCand) {
02702       // Initialize resource delta if needed in case future heuristics query it.
02703       if (TryCand.ResDelta == SchedResourceDelta())
02704         TryCand.initResourceDelta(DAG, SchedModel);
02705       Cand.setBest(TryCand);
02706       DEBUG(traceCandidate(Cand));
02707     }
02708   }
02709 }
02710 
02711 /// Pick the best candidate node from either the top or bottom queue.
02712 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
02713   // Schedule as far as possible in the direction of no choice. This is most
02714   // efficient, but also provides the best heuristics for CriticalPSets.
02715   if (SUnit *SU = Bot.pickOnlyChoice()) {
02716     IsTopNode = false;
02717     DEBUG(dbgs() << "Pick Bot NOCAND\n");
02718     return SU;
02719   }
02720   if (SUnit *SU = Top.pickOnlyChoice()) {
02721     IsTopNode = true;
02722     DEBUG(dbgs() << "Pick Top NOCAND\n");
02723     return SU;
02724   }
02725   CandPolicy NoPolicy;
02726   SchedCandidate BotCand(NoPolicy);
02727   SchedCandidate TopCand(NoPolicy);
02728   // Set the bottom-up policy based on the state of the current bottom zone and
02729   // the instructions outside the zone, including the top zone.
02730   setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
02731   // Set the top-down policy based on the state of the current top zone and
02732   // the instructions outside the zone, including the bottom zone.
02733   setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
02734 
02735   // Prefer bottom scheduling when heuristics are silent.
02736   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
02737   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
02738 
02739   // If either Q has a single candidate that provides the least increase in
02740   // Excess pressure, we can immediately schedule from that Q.
02741   //
02742   // RegionCriticalPSets summarizes the pressure within the scheduled region and
02743   // affects picking from either Q. If scheduling in one direction must
02744   // increase pressure for one of the excess PSets, then schedule in that
02745   // direction first to provide more freedom in the other direction.
02746   if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
02747       || (BotCand.Reason == RegCritical
02748           && !BotCand.isRepeat(RegCritical)))
02749   {
02750     IsTopNode = false;
02751     tracePick(BotCand, IsTopNode);
02752     return BotCand.SU;
02753   }
02754   // Check if the top Q has a better candidate.
02755   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
02756   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
02757 
02758   // Choose the queue with the most important (lowest enum) reason.
02759   if (TopCand.Reason < BotCand.Reason) {
02760     IsTopNode = true;
02761     tracePick(TopCand, IsTopNode);
02762     return TopCand.SU;
02763   }
02764   // Otherwise prefer the bottom candidate, in node order if all else failed.
02765   IsTopNode = false;
02766   tracePick(BotCand, IsTopNode);
02767   return BotCand.SU;
02768 }
02769 
02770 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
02771 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
02772   if (DAG->top() == DAG->bottom()) {
02773     assert(Top.Available.empty() && Top.Pending.empty() &&
02774            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
02775     return nullptr;
02776   }
02777   SUnit *SU;
02778   do {
02779     if (RegionPolicy.OnlyTopDown) {
02780       SU = Top.pickOnlyChoice();
02781       if (!SU) {
02782         CandPolicy NoPolicy;
02783         SchedCandidate TopCand(NoPolicy);
02784         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
02785         assert(TopCand.Reason != NoCand && "failed to find a candidate");
02786         tracePick(TopCand, true);
02787         SU = TopCand.SU;
02788       }
02789       IsTopNode = true;
02790     }
02791     else if (RegionPolicy.OnlyBottomUp) {
02792       SU = Bot.pickOnlyChoice();
02793       if (!SU) {
02794         CandPolicy NoPolicy;
02795         SchedCandidate BotCand(NoPolicy);
02796         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
02797         assert(BotCand.Reason != NoCand && "failed to find a candidate");
02798         tracePick(BotCand, false);
02799         SU = BotCand.SU;
02800       }
02801       IsTopNode = false;
02802     }
02803     else {
02804       SU = pickNodeBidirectional(IsTopNode);
02805     }
02806   } while (SU->isScheduled);
02807 
02808   if (SU->isTopReady())
02809     Top.removeReady(SU);
02810   if (SU->isBottomReady())
02811     Bot.removeReady(SU);
02812 
02813   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
02814   return SU;
02815 }
02816 
02817 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
02818 
02819   MachineBasicBlock::iterator InsertPos = SU->getInstr();
02820   if (!isTop)
02821     ++InsertPos;
02822   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
02823 
02824   // Find already scheduled copies with a single physreg dependence and move
02825   // them just above the scheduled instruction.
02826   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
02827        I != E; ++I) {
02828     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
02829       continue;
02830     SUnit *DepSU = I->getSUnit();
02831     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
02832       continue;
02833     MachineInstr *Copy = DepSU->getInstr();
02834     if (!Copy->isCopy())
02835       continue;
02836     DEBUG(dbgs() << "  Rescheduling physreg copy ";
02837           I->getSUnit()->dump(DAG));
02838     DAG->moveInstruction(Copy, InsertPos);
02839   }
02840 }
02841 
02842 /// Update the scheduler's state after scheduling a node. This is the same node
02843 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
02844 /// update it's state based on the current cycle before MachineSchedStrategy
02845 /// does.
02846 ///
02847 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
02848 /// them here. See comments in biasPhysRegCopy.
02849 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
02850   if (IsTopNode) {
02851     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
02852     Top.bumpNode(SU);
02853     if (SU->hasPhysRegUses)
02854       reschedulePhysRegCopies(SU, true);
02855   }
02856   else {
02857     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
02858     Bot.bumpNode(SU);
02859     if (SU->hasPhysRegDefs)
02860       reschedulePhysRegCopies(SU, false);
02861   }
02862 }
02863 
02864 /// Create the standard converging machine scheduler. This will be used as the
02865 /// default scheduler if the target does not set a default.
02866 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
02867   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
02868   // Register DAG post-processors.
02869   //
02870   // FIXME: extend the mutation API to allow earlier mutations to instantiate
02871   // data and pass it to later mutations. Have a single mutation that gathers
02872   // the interesting nodes in one pass.
02873   DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
02874   if (EnableLoadCluster && DAG->TII->enableClusterLoads())
02875     DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
02876   if (EnableMacroFusion)
02877     DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
02878   return DAG;
02879 }
02880 
02881 static MachineSchedRegistry
02882 GenericSchedRegistry("converge", "Standard converging scheduler.",
02883                      createGenericSchedLive);
02884 
02885 //===----------------------------------------------------------------------===//
02886 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
02887 //===----------------------------------------------------------------------===//
02888 
02889 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
02890   DAG = Dag;
02891   SchedModel = DAG->getSchedModel();
02892   TRI = DAG->TRI;
02893 
02894   Rem.init(DAG, SchedModel);
02895   Top.init(DAG, SchedModel, &Rem);
02896   BotRoots.clear();
02897 
02898   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
02899   // or are disabled, then these HazardRecs will be disabled.
02900   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
02901   const TargetMachine &TM = DAG->MF.getTarget();
02902   if (!Top.HazardRec) {
02903     Top.HazardRec =
02904         TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer(
02905             Itin, DAG);
02906   }
02907 }
02908 
02909 
02910 void PostGenericScheduler::registerRoots() {
02911   Rem.CriticalPath = DAG->ExitSU.getDepth();
02912 
02913   // Some roots may not feed into ExitSU. Check all of them in case.
02914   for (SmallVectorImpl<SUnit*>::const_iterator
02915          I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
02916     if ((*I)->getDepth() > Rem.CriticalPath)
02917       Rem.CriticalPath = (*I)->getDepth();
02918   }
02919   DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
02920   if (DumpCriticalPathLength) {
02921     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
02922   }
02923 }
02924 
02925 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
02926 ///
02927 /// \param Cand provides the policy and current best candidate.
02928 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
02929 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
02930                                         SchedCandidate &TryCand) {
02931 
02932   // Initialize the candidate if needed.
02933   if (!Cand.isValid()) {
02934     TryCand.Reason = NodeOrder;
02935     return;
02936   }
02937 
02938   // Prioritize instructions that read unbuffered resources by stall cycles.
02939   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
02940               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
02941     return;
02942 
02943   // Avoid critical resource consumption and balance the schedule.
02944   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
02945               TryCand, Cand, ResourceReduce))
02946     return;
02947   if (tryGreater(TryCand.ResDelta.DemandedResources,
02948                  Cand.ResDelta.DemandedResources,
02949                  TryCand, Cand, ResourceDemand))
02950     return;
02951 
02952   // Avoid serializing long latency dependence chains.
02953   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
02954     return;
02955   }
02956 
02957   // Fall through to original instruction order.
02958   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
02959     TryCand.Reason = NodeOrder;
02960 }
02961 
02962 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
02963   ReadyQueue &Q = Top.Available;
02964 
02965   DEBUG(Q.dump());
02966 
02967   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
02968     SchedCandidate TryCand(Cand.Policy);
02969     TryCand.SU = *I;
02970     TryCand.initResourceDelta(DAG, SchedModel);
02971     tryCandidate(Cand, TryCand);
02972     if (TryCand.Reason != NoCand) {
02973       Cand.setBest(TryCand);
02974       DEBUG(traceCandidate(Cand));
02975     }
02976   }
02977 }
02978 
02979 /// Pick the next node to schedule.
02980 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
02981   if (DAG->top() == DAG->bottom()) {
02982     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
02983     return nullptr;
02984   }
02985   SUnit *SU;
02986   do {
02987     SU = Top.pickOnlyChoice();
02988     if (!SU) {
02989       CandPolicy NoPolicy;
02990       SchedCandidate TopCand(NoPolicy);
02991       // Set the top-down policy based on the state of the current top zone and
02992       // the instructions outside the zone, including the bottom zone.
02993       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
02994       pickNodeFromQueue(TopCand);
02995       assert(TopCand.Reason != NoCand && "failed to find a candidate");
02996       tracePick(TopCand, true);
02997       SU = TopCand.SU;
02998     }
02999   } while (SU->isScheduled);
03000 
03001   IsTopNode = true;
03002   Top.removeReady(SU);
03003 
03004   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
03005   return SU;
03006 }
03007 
03008 /// Called after ScheduleDAGMI has scheduled an instruction and updated
03009 /// scheduled/remaining flags in the DAG nodes.
03010 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
03011   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
03012   Top.bumpNode(SU);
03013 }
03014 
03015 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
03016 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
03017   return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
03018 }
03019 
03020 //===----------------------------------------------------------------------===//
03021 // ILP Scheduler. Currently for experimental analysis of heuristics.
03022 //===----------------------------------------------------------------------===//
03023 
03024 namespace {
03025 /// \brief Order nodes by the ILP metric.
03026 struct ILPOrder {
03027   const SchedDFSResult *DFSResult;
03028   const BitVector *ScheduledTrees;
03029   bool MaximizeILP;
03030 
03031   ILPOrder(bool MaxILP)
03032     : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
03033 
03034   /// \brief Apply a less-than relation on node priority.
03035   ///
03036   /// (Return true if A comes after B in the Q.)
03037   bool operator()(const SUnit *A, const SUnit *B) const {
03038     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
03039     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
03040     if (SchedTreeA != SchedTreeB) {
03041       // Unscheduled trees have lower priority.
03042       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
03043         return ScheduledTrees->test(SchedTreeB);
03044 
03045       // Trees with shallower connections have have lower priority.
03046       if (DFSResult->getSubtreeLevel(SchedTreeA)
03047           != DFSResult->getSubtreeLevel(SchedTreeB)) {
03048         return DFSResult->getSubtreeLevel(SchedTreeA)
03049           < DFSResult->getSubtreeLevel(SchedTreeB);
03050       }
03051     }
03052     if (MaximizeILP)
03053       return DFSResult->getILP(A) < DFSResult->getILP(B);
03054     else
03055       return DFSResult->getILP(A) > DFSResult->getILP(B);
03056   }
03057 };
03058 
03059 /// \brief Schedule based on the ILP metric.
03060 class ILPScheduler : public MachineSchedStrategy {
03061   ScheduleDAGMILive *DAG;
03062   ILPOrder Cmp;
03063 
03064   std::vector<SUnit*> ReadyQ;
03065 public:
03066   ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
03067 
03068   void initialize(ScheduleDAGMI *dag) override {
03069     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
03070     DAG = static_cast<ScheduleDAGMILive*>(dag);
03071     DAG->computeDFSResult();
03072     Cmp.DFSResult = DAG->getDFSResult();
03073     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
03074     ReadyQ.clear();
03075   }
03076 
03077   void registerRoots() override {
03078     // Restore the heap in ReadyQ with the updated DFS results.
03079     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
03080   }
03081 
03082   /// Implement MachineSchedStrategy interface.
03083   /// -----------------------------------------
03084 
03085   /// Callback to select the highest priority node from the ready Q.
03086   SUnit *pickNode(bool &IsTopNode) override {
03087     if (ReadyQ.empty()) return nullptr;
03088     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
03089     SUnit *SU = ReadyQ.back();
03090     ReadyQ.pop_back();
03091     IsTopNode = false;
03092     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
03093           << " ILP: " << DAG->getDFSResult()->getILP(SU)
03094           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
03095           << DAG->getDFSResult()->getSubtreeLevel(
03096             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
03097           << "Scheduling " << *SU->getInstr());
03098     return SU;
03099   }
03100 
03101   /// \brief Scheduler callback to notify that a new subtree is scheduled.
03102   void scheduleTree(unsigned SubtreeID) override {
03103     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
03104   }
03105 
03106   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
03107   /// DFSResults, and resort the priority Q.
03108   void schedNode(SUnit *SU, bool IsTopNode) override {
03109     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
03110   }
03111 
03112   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
03113 
03114   void releaseBottomNode(SUnit *SU) override {
03115     ReadyQ.push_back(SU);
03116     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
03117   }
03118 };
03119 } // namespace
03120 
03121 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
03122   return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
03123 }
03124 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
03125   return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
03126 }
03127 static MachineSchedRegistry ILPMaxRegistry(
03128   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
03129 static MachineSchedRegistry ILPMinRegistry(
03130   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
03131 
03132 //===----------------------------------------------------------------------===//
03133 // Machine Instruction Shuffler for Correctness Testing
03134 //===----------------------------------------------------------------------===//
03135 
03136 #ifndef NDEBUG
03137 namespace {
03138 /// Apply a less-than relation on the node order, which corresponds to the
03139 /// instruction order prior to scheduling. IsReverse implements greater-than.
03140 template<bool IsReverse>
03141 struct SUnitOrder {
03142   bool operator()(SUnit *A, SUnit *B) const {
03143     if (IsReverse)
03144       return A->NodeNum > B->NodeNum;
03145     else
03146       return A->NodeNum < B->NodeNum;
03147   }
03148 };
03149 
03150 /// Reorder instructions as much as possible.
03151 class InstructionShuffler : public MachineSchedStrategy {
03152   bool IsAlternating;
03153   bool IsTopDown;
03154 
03155   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
03156   // gives nodes with a higher number higher priority causing the latest
03157   // instructions to be scheduled first.
03158   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
03159     TopQ;
03160   // When scheduling bottom-up, use greater-than as the queue priority.
03161   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
03162     BottomQ;
03163 public:
03164   InstructionShuffler(bool alternate, bool topdown)
03165     : IsAlternating(alternate), IsTopDown(topdown) {}
03166 
03167   void initialize(ScheduleDAGMI*) override {
03168     TopQ.clear();
03169     BottomQ.clear();
03170   }
03171 
03172   /// Implement MachineSchedStrategy interface.
03173   /// -----------------------------------------
03174 
03175   SUnit *pickNode(bool &IsTopNode) override {
03176     SUnit *SU;
03177     if (IsTopDown) {
03178       do {
03179         if (TopQ.empty()) return nullptr;
03180         SU = TopQ.top();
03181         TopQ.pop();
03182       } while (SU->isScheduled);
03183       IsTopNode = true;
03184     }
03185     else {
03186       do {
03187         if (BottomQ.empty()) return nullptr;
03188         SU = BottomQ.top();
03189         BottomQ.pop();
03190       } while (SU->isScheduled);
03191       IsTopNode = false;
03192     }
03193     if (IsAlternating)
03194       IsTopDown = !IsTopDown;
03195     return SU;
03196   }
03197 
03198   void schedNode(SUnit *SU, bool IsTopNode) override {}
03199 
03200   void releaseTopNode(SUnit *SU) override {
03201     TopQ.push(SU);
03202   }
03203   void releaseBottomNode(SUnit *SU) override {
03204     BottomQ.push(SU);
03205   }
03206 };
03207 } // namespace
03208 
03209 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
03210   bool Alternate = !ForceTopDown && !ForceBottomUp;
03211   bool TopDown = !ForceBottomUp;
03212   assert((TopDown || !ForceTopDown) &&
03213          "-misched-topdown incompatible with -misched-bottomup");
03214   return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
03215 }
03216 static MachineSchedRegistry ShufflerRegistry(
03217   "shuffle", "Shuffle machine instructions alternating directions",
03218   createInstructionShuffler);
03219 #endif // !NDEBUG
03220 
03221 //===----------------------------------------------------------------------===//
03222 // GraphWriter support for ScheduleDAGMILive.
03223 //===----------------------------------------------------------------------===//
03224 
03225 #ifndef NDEBUG
03226 namespace llvm {
03227 
03228 template<> struct GraphTraits<
03229   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
03230 
03231 template<>
03232 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
03233 
03234   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
03235 
03236   static std::string getGraphName(const ScheduleDAG *G) {
03237     return G->MF.getName();
03238   }
03239 
03240   static bool renderGraphFromBottomUp() {
03241     return true;
03242   }
03243 
03244   static bool isNodeHidden(const SUnit *Node) {
03245     return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
03246   }
03247 
03248   static bool hasNodeAddressLabel(const SUnit *Node,
03249                                   const ScheduleDAG *Graph) {
03250     return false;
03251   }
03252 
03253   /// If you want to override the dot attributes printed for a particular
03254   /// edge, override this method.
03255   static std::string getEdgeAttributes(const SUnit *Node,
03256                                        SUnitIterator EI,
03257                                        const ScheduleDAG *Graph) {
03258     if (EI.isArtificialDep())
03259       return "color=cyan,style=dashed";
03260     if (EI.isCtrlDep())
03261       return "color=blue,style=dashed";
03262     return "";
03263   }
03264 
03265   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
03266     std::string Str;
03267     raw_string_ostream SS(Str);
03268     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
03269     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
03270       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
03271     SS << "SU:" << SU->NodeNum;
03272     if (DFS)
03273       SS << " I:" << DFS->getNumInstrs(SU);
03274     return SS.str();
03275   }
03276   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
03277     return G->getGraphNodeLabel(SU);
03278   }
03279 
03280   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
03281     std::string Str("shape=Mrecord");
03282     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
03283     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
03284       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
03285     if (DFS) {
03286       Str += ",style=filled,fillcolor=\"#";
03287       Str += DOT::getColorString(DFS->getSubtreeID(N));
03288       Str += '"';
03289     }
03290     return Str;
03291   }
03292 };
03293 } // namespace llvm
03294 #endif // NDEBUG
03295 
03296 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
03297 /// rendered using 'dot'.
03298 ///
03299 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
03300 #ifndef NDEBUG
03301   ViewGraph(this, Name, false, Title);
03302 #else
03303   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
03304          << "systems with Graphviz or gv!\n";
03305 #endif  // NDEBUG
03306 }
03307 
03308 /// Out-of-line implementation with no arguments is handy for gdb.
03309 void ScheduleDAGMI::viewGraph() {
03310   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
03311 }