LLVM API Documentation
00001 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // MachineScheduler schedules machine instructions after phi elimination. It 00011 // preserves LiveIntervals so it can be invoked before register allocation. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/CodeGen/MachineScheduler.h" 00016 #include "llvm/ADT/PriorityQueue.h" 00017 #include "llvm/Analysis/AliasAnalysis.h" 00018 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 00019 #include "llvm/CodeGen/MachineDominators.h" 00020 #include "llvm/CodeGen/MachineLoopInfo.h" 00021 #include "llvm/CodeGen/MachineRegisterInfo.h" 00022 #include "llvm/CodeGen/Passes.h" 00023 #include "llvm/CodeGen/RegisterClassInfo.h" 00024 #include "llvm/CodeGen/ScheduleDFS.h" 00025 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 00026 #include "llvm/Support/CommandLine.h" 00027 #include "llvm/Support/Debug.h" 00028 #include "llvm/Support/ErrorHandling.h" 00029 #include "llvm/Support/GraphWriter.h" 00030 #include "llvm/Support/raw_ostream.h" 00031 #include "llvm/Target/TargetInstrInfo.h" 00032 #include <queue> 00033 00034 using namespace llvm; 00035 00036 #define DEBUG_TYPE "misched" 00037 00038 namespace llvm { 00039 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 00040 cl::desc("Force top-down list scheduling")); 00041 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 00042 cl::desc("Force bottom-up list scheduling")); 00043 cl::opt<bool> 00044 DumpCriticalPathLength("misched-dcpl", cl::Hidden, 00045 cl::desc("Print critical path length to stdout")); 00046 } 00047 00048 #ifndef NDEBUG 00049 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 00050 cl::desc("Pop up a window to show MISched dags after they are processed")); 00051 00052 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 00053 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 00054 00055 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, 00056 cl::desc("Only schedule this function")); 00057 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, 00058 cl::desc("Only schedule this MBB#")); 00059 #else 00060 static bool ViewMISchedDAGs = false; 00061 #endif // NDEBUG 00062 00063 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, 00064 cl::desc("Enable register pressure scheduling."), cl::init(true)); 00065 00066 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, 00067 cl::desc("Enable cyclic critical path analysis."), cl::init(true)); 00068 00069 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 00070 cl::desc("Enable load clustering."), cl::init(true)); 00071 00072 // Experimental heuristics 00073 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 00074 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 00075 00076 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, 00077 cl::desc("Verify machine instrs before and after machine scheduling")); 00078 00079 // DAG subtrees must have at least this many nodes. 00080 static const unsigned MinSubtreeSize = 8; 00081 00082 // Pin the vtables to this file. 00083 void MachineSchedStrategy::anchor() {} 00084 void ScheduleDAGMutation::anchor() {} 00085 00086 //===----------------------------------------------------------------------===// 00087 // Machine Instruction Scheduling Pass and Registry 00088 //===----------------------------------------------------------------------===// 00089 00090 MachineSchedContext::MachineSchedContext(): 00091 MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { 00092 RegClassInfo = new RegisterClassInfo(); 00093 } 00094 00095 MachineSchedContext::~MachineSchedContext() { 00096 delete RegClassInfo; 00097 } 00098 00099 namespace { 00100 /// Base class for a machine scheduler class that can run at any point. 00101 class MachineSchedulerBase : public MachineSchedContext, 00102 public MachineFunctionPass { 00103 public: 00104 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} 00105 00106 void print(raw_ostream &O, const Module* = nullptr) const override; 00107 00108 protected: 00109 void scheduleRegions(ScheduleDAGInstrs &Scheduler); 00110 }; 00111 00112 /// MachineScheduler runs after coalescing and before register allocation. 00113 class MachineScheduler : public MachineSchedulerBase { 00114 public: 00115 MachineScheduler(); 00116 00117 void getAnalysisUsage(AnalysisUsage &AU) const override; 00118 00119 bool runOnMachineFunction(MachineFunction&) override; 00120 00121 static char ID; // Class identification, replacement for typeinfo 00122 00123 protected: 00124 ScheduleDAGInstrs *createMachineScheduler(); 00125 }; 00126 00127 /// PostMachineScheduler runs after shortly before code emission. 00128 class PostMachineScheduler : public MachineSchedulerBase { 00129 public: 00130 PostMachineScheduler(); 00131 00132 void getAnalysisUsage(AnalysisUsage &AU) const override; 00133 00134 bool runOnMachineFunction(MachineFunction&) override; 00135 00136 static char ID; // Class identification, replacement for typeinfo 00137 00138 protected: 00139 ScheduleDAGInstrs *createPostMachineScheduler(); 00140 }; 00141 } // namespace 00142 00143 char MachineScheduler::ID = 0; 00144 00145 char &llvm::MachineSchedulerID = MachineScheduler::ID; 00146 00147 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 00148 "Machine Instruction Scheduler", false, false) 00149 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 00150 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 00151 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 00152 INITIALIZE_PASS_END(MachineScheduler, "misched", 00153 "Machine Instruction Scheduler", false, false) 00154 00155 MachineScheduler::MachineScheduler() 00156 : MachineSchedulerBase(ID) { 00157 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 00158 } 00159 00160 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 00161 AU.setPreservesCFG(); 00162 AU.addRequiredID(MachineDominatorsID); 00163 AU.addRequired<MachineLoopInfo>(); 00164 AU.addRequired<AliasAnalysis>(); 00165 AU.addRequired<TargetPassConfig>(); 00166 AU.addRequired<SlotIndexes>(); 00167 AU.addPreserved<SlotIndexes>(); 00168 AU.addRequired<LiveIntervals>(); 00169 AU.addPreserved<LiveIntervals>(); 00170 MachineFunctionPass::getAnalysisUsage(AU); 00171 } 00172 00173 char PostMachineScheduler::ID = 0; 00174 00175 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; 00176 00177 INITIALIZE_PASS(PostMachineScheduler, "postmisched", 00178 "PostRA Machine Instruction Scheduler", false, false) 00179 00180 PostMachineScheduler::PostMachineScheduler() 00181 : MachineSchedulerBase(ID) { 00182 initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); 00183 } 00184 00185 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 00186 AU.setPreservesCFG(); 00187 AU.addRequiredID(MachineDominatorsID); 00188 AU.addRequired<MachineLoopInfo>(); 00189 AU.addRequired<TargetPassConfig>(); 00190 MachineFunctionPass::getAnalysisUsage(AU); 00191 } 00192 00193 MachinePassRegistry MachineSchedRegistry::Registry; 00194 00195 /// A dummy default scheduler factory indicates whether the scheduler 00196 /// is overridden on the command line. 00197 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 00198 return nullptr; 00199 } 00200 00201 /// MachineSchedOpt allows command line selection of the scheduler. 00202 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 00203 RegisterPassParser<MachineSchedRegistry> > 00204 MachineSchedOpt("misched", 00205 cl::init(&useDefaultMachineSched), cl::Hidden, 00206 cl::desc("Machine instruction scheduler to use")); 00207 00208 static MachineSchedRegistry 00209 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 00210 useDefaultMachineSched); 00211 00212 /// Forward declare the standard machine scheduler. This will be used as the 00213 /// default scheduler if the target does not set a default. 00214 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); 00215 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); 00216 00217 /// Decrement this iterator until reaching the top or a non-debug instr. 00218 static MachineBasicBlock::const_iterator 00219 priorNonDebug(MachineBasicBlock::const_iterator I, 00220 MachineBasicBlock::const_iterator Beg) { 00221 assert(I != Beg && "reached the top of the region, cannot decrement"); 00222 while (--I != Beg) { 00223 if (!I->isDebugValue()) 00224 break; 00225 } 00226 return I; 00227 } 00228 00229 /// Non-const version. 00230 static MachineBasicBlock::iterator 00231 priorNonDebug(MachineBasicBlock::iterator I, 00232 MachineBasicBlock::const_iterator Beg) { 00233 return const_cast<MachineInstr*>( 00234 &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); 00235 } 00236 00237 /// If this iterator is a debug value, increment until reaching the End or a 00238 /// non-debug instruction. 00239 static MachineBasicBlock::const_iterator 00240 nextIfDebug(MachineBasicBlock::const_iterator I, 00241 MachineBasicBlock::const_iterator End) { 00242 for(; I != End; ++I) { 00243 if (!I->isDebugValue()) 00244 break; 00245 } 00246 return I; 00247 } 00248 00249 /// Non-const version. 00250 static MachineBasicBlock::iterator 00251 nextIfDebug(MachineBasicBlock::iterator I, 00252 MachineBasicBlock::const_iterator End) { 00253 // Cast the return value to nonconst MachineInstr, then cast to an 00254 // instr_iterator, which does not check for null, finally return a 00255 // bundle_iterator. 00256 return MachineBasicBlock::instr_iterator( 00257 const_cast<MachineInstr*>( 00258 &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); 00259 } 00260 00261 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. 00262 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { 00263 // Select the scheduler, or set the default. 00264 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 00265 if (Ctor != useDefaultMachineSched) 00266 return Ctor(this); 00267 00268 // Get the default scheduler set by the target for this function. 00269 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); 00270 if (Scheduler) 00271 return Scheduler; 00272 00273 // Default to GenericScheduler. 00274 return createGenericSchedLive(this); 00275 } 00276 00277 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by 00278 /// the caller. We don't have a command line option to override the postRA 00279 /// scheduler. The Target must configure it. 00280 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { 00281 // Get the postRA scheduler set by the target for this function. 00282 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); 00283 if (Scheduler) 00284 return Scheduler; 00285 00286 // Default to GenericScheduler. 00287 return createGenericSchedPostRA(this); 00288 } 00289 00290 /// Top-level MachineScheduler pass driver. 00291 /// 00292 /// Visit blocks in function order. Divide each block into scheduling regions 00293 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 00294 /// consistent with the DAG builder, which traverses the interior of the 00295 /// scheduling regions bottom-up. 00296 /// 00297 /// This design avoids exposing scheduling boundaries to the DAG builder, 00298 /// simplifying the DAG builder's support for "special" target instructions. 00299 /// At the same time the design allows target schedulers to operate across 00300 /// scheduling boundaries, for example to bundle the boudary instructions 00301 /// without reordering them. This creates complexity, because the target 00302 /// scheduler must update the RegionBegin and RegionEnd positions cached by 00303 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 00304 /// design would be to split blocks at scheduling boundaries, but LLVM has a 00305 /// general bias against block splitting purely for implementation simplicity. 00306 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 00307 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 00308 00309 // Initialize the context of the pass. 00310 MF = &mf; 00311 MLI = &getAnalysis<MachineLoopInfo>(); 00312 MDT = &getAnalysis<MachineDominatorTree>(); 00313 PassConfig = &getAnalysis<TargetPassConfig>(); 00314 AA = &getAnalysis<AliasAnalysis>(); 00315 00316 LIS = &getAnalysis<LiveIntervals>(); 00317 00318 if (VerifyScheduling) { 00319 DEBUG(LIS->dump()); 00320 MF->verify(this, "Before machine scheduling."); 00321 } 00322 RegClassInfo->runOnMachineFunction(*MF); 00323 00324 // Instantiate the selected scheduler for this target, function, and 00325 // optimization level. 00326 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); 00327 scheduleRegions(*Scheduler); 00328 00329 DEBUG(LIS->dump()); 00330 if (VerifyScheduling) 00331 MF->verify(this, "After machine scheduling."); 00332 return true; 00333 } 00334 00335 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { 00336 if (skipOptnoneFunction(*mf.getFunction())) 00337 return false; 00338 00339 const TargetSubtargetInfo &ST = 00340 mf.getTarget().getSubtarget<TargetSubtargetInfo>(); 00341 if (!ST.enablePostMachineScheduler()) { 00342 DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); 00343 return false; 00344 } 00345 DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); 00346 00347 // Initialize the context of the pass. 00348 MF = &mf; 00349 PassConfig = &getAnalysis<TargetPassConfig>(); 00350 00351 if (VerifyScheduling) 00352 MF->verify(this, "Before post machine scheduling."); 00353 00354 // Instantiate the selected scheduler for this target, function, and 00355 // optimization level. 00356 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); 00357 scheduleRegions(*Scheduler); 00358 00359 if (VerifyScheduling) 00360 MF->verify(this, "After post machine scheduling."); 00361 return true; 00362 } 00363 00364 /// Return true of the given instruction should not be included in a scheduling 00365 /// region. 00366 /// 00367 /// MachineScheduler does not currently support scheduling across calls. To 00368 /// handle calls, the DAG builder needs to be modified to create register 00369 /// anti/output dependencies on the registers clobbered by the call's regmask 00370 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents 00371 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce 00372 /// the boundary, but there would be no benefit to postRA scheduling across 00373 /// calls this late anyway. 00374 static bool isSchedBoundary(MachineBasicBlock::iterator MI, 00375 MachineBasicBlock *MBB, 00376 MachineFunction *MF, 00377 const TargetInstrInfo *TII, 00378 bool IsPostRA) { 00379 return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); 00380 } 00381 00382 /// Main driver for both MachineScheduler and PostMachineScheduler. 00383 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { 00384 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 00385 bool IsPostRA = Scheduler.isPostRA(); 00386 00387 // Visit all machine basic blocks. 00388 // 00389 // TODO: Visit blocks in global postorder or postorder within the bottom-up 00390 // loop tree. Then we can optionally compute global RegPressure. 00391 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 00392 MBB != MBBEnd; ++MBB) { 00393 00394 Scheduler.startBlock(MBB); 00395 00396 #ifndef NDEBUG 00397 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) 00398 continue; 00399 if (SchedOnlyBlock.getNumOccurrences() 00400 && (int)SchedOnlyBlock != MBB->getNumber()) 00401 continue; 00402 #endif 00403 00404 // Break the block into scheduling regions [I, RegionEnd), and schedule each 00405 // region as soon as it is discovered. RegionEnd points the scheduling 00406 // boundary at the bottom of the region. The DAG does not include RegionEnd, 00407 // but the region does (i.e. the next RegionEnd is above the previous 00408 // RegionBegin). If the current block has no terminator then RegionEnd == 00409 // MBB->end() for the bottom region. 00410 // 00411 // The Scheduler may insert instructions during either schedule() or 00412 // exitRegion(), even for empty regions. So the local iterators 'I' and 00413 // 'RegionEnd' are invalid across these calls. 00414 // 00415 // MBB::size() uses instr_iterator to count. Here we need a bundle to count 00416 // as a single instruction. 00417 unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); 00418 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 00419 RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { 00420 00421 // Avoid decrementing RegionEnd for blocks with no terminator. 00422 if (RegionEnd != MBB->end() || 00423 isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { 00424 --RegionEnd; 00425 // Count the boundary instruction. 00426 --RemainingInstrs; 00427 } 00428 00429 // The next region starts above the previous region. Look backward in the 00430 // instruction stream until we find the nearest boundary. 00431 unsigned NumRegionInstrs = 0; 00432 MachineBasicBlock::iterator I = RegionEnd; 00433 for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { 00434 if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) 00435 break; 00436 } 00437 // Notify the scheduler of the region, even if we may skip scheduling 00438 // it. Perhaps it still needs to be bundled. 00439 Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); 00440 00441 // Skip empty scheduling regions (0 or 1 schedulable instructions). 00442 if (I == RegionEnd || I == std::prev(RegionEnd)) { 00443 // Close the current region. Bundle the terminator if needed. 00444 // This invalidates 'RegionEnd' and 'I'. 00445 Scheduler.exitRegion(); 00446 continue; 00447 } 00448 DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") 00449 << "MI Scheduling **********\n"); 00450 DEBUG(dbgs() << MF->getName() 00451 << ":BB#" << MBB->getNumber() << " " << MBB->getName() 00452 << "\n From: " << *I << " To: "; 00453 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 00454 else dbgs() << "End"; 00455 dbgs() << " RegionInstrs: " << NumRegionInstrs 00456 << " Remaining: " << RemainingInstrs << "\n"); 00457 if (DumpCriticalPathLength) { 00458 errs() << MF->getName(); 00459 errs() << ":BB# " << MBB->getNumber(); 00460 errs() << " " << MBB->getName() << " \n"; 00461 } 00462 00463 // Schedule a region: possibly reorder instructions. 00464 // This invalidates 'RegionEnd' and 'I'. 00465 Scheduler.schedule(); 00466 00467 // Close the current region. 00468 Scheduler.exitRegion(); 00469 00470 // Scheduling has invalidated the current iterator 'I'. Ask the 00471 // scheduler for the top of it's scheduled region. 00472 RegionEnd = Scheduler.begin(); 00473 } 00474 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 00475 Scheduler.finishBlock(); 00476 if (Scheduler.isPostRA()) { 00477 // FIXME: Ideally, no further passes should rely on kill flags. However, 00478 // thumb2 size reduction is currently an exception. 00479 Scheduler.fixupKills(MBB); 00480 } 00481 } 00482 Scheduler.finalizeSchedule(); 00483 } 00484 00485 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { 00486 // unimplemented 00487 } 00488 00489 LLVM_DUMP_METHOD 00490 void ReadyQueue::dump() { 00491 dbgs() << Name << ": "; 00492 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 00493 dbgs() << Queue[i]->NodeNum << " "; 00494 dbgs() << "\n"; 00495 } 00496 00497 //===----------------------------------------------------------------------===// 00498 // ScheduleDAGMI - Basic machine instruction scheduling. This is 00499 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for 00500 // virtual registers. 00501 // ===----------------------------------------------------------------------===/ 00502 00503 // Provide a vtable anchor. 00504 ScheduleDAGMI::~ScheduleDAGMI() { 00505 } 00506 00507 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { 00508 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); 00509 } 00510 00511 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 00512 if (SuccSU != &ExitSU) { 00513 // Do not use WillCreateCycle, it assumes SD scheduling. 00514 // If Pred is reachable from Succ, then the edge creates a cycle. 00515 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 00516 return false; 00517 Topo.AddPred(SuccSU, PredDep.getSUnit()); 00518 } 00519 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 00520 // Return true regardless of whether a new edge needed to be inserted. 00521 return true; 00522 } 00523 00524 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 00525 /// NumPredsLeft reaches zero, release the successor node. 00526 /// 00527 /// FIXME: Adjust SuccSU height based on MinLatency. 00528 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 00529 SUnit *SuccSU = SuccEdge->getSUnit(); 00530 00531 if (SuccEdge->isWeak()) { 00532 --SuccSU->WeakPredsLeft; 00533 if (SuccEdge->isCluster()) 00534 NextClusterSucc = SuccSU; 00535 return; 00536 } 00537 #ifndef NDEBUG 00538 if (SuccSU->NumPredsLeft == 0) { 00539 dbgs() << "*** Scheduling failed! ***\n"; 00540 SuccSU->dump(this); 00541 dbgs() << " has been released too many times!\n"; 00542 llvm_unreachable(nullptr); 00543 } 00544 #endif 00545 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, 00546 // CurrCycle may have advanced since then. 00547 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) 00548 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); 00549 00550 --SuccSU->NumPredsLeft; 00551 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 00552 SchedImpl->releaseTopNode(SuccSU); 00553 } 00554 00555 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 00556 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 00557 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 00558 I != E; ++I) { 00559 releaseSucc(SU, &*I); 00560 } 00561 } 00562 00563 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 00564 /// NumSuccsLeft reaches zero, release the predecessor node. 00565 /// 00566 /// FIXME: Adjust PredSU height based on MinLatency. 00567 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 00568 SUnit *PredSU = PredEdge->getSUnit(); 00569 00570 if (PredEdge->isWeak()) { 00571 --PredSU->WeakSuccsLeft; 00572 if (PredEdge->isCluster()) 00573 NextClusterPred = PredSU; 00574 return; 00575 } 00576 #ifndef NDEBUG 00577 if (PredSU->NumSuccsLeft == 0) { 00578 dbgs() << "*** Scheduling failed! ***\n"; 00579 PredSU->dump(this); 00580 dbgs() << " has been released too many times!\n"; 00581 llvm_unreachable(nullptr); 00582 } 00583 #endif 00584 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, 00585 // CurrCycle may have advanced since then. 00586 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) 00587 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); 00588 00589 --PredSU->NumSuccsLeft; 00590 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 00591 SchedImpl->releaseBottomNode(PredSU); 00592 } 00593 00594 /// releasePredecessors - Call releasePred on each of SU's predecessors. 00595 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 00596 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 00597 I != E; ++I) { 00598 releasePred(SU, &*I); 00599 } 00600 } 00601 00602 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 00603 /// crossing a scheduling boundary. [begin, end) includes all instructions in 00604 /// the region, including the boundary itself and single-instruction regions 00605 /// that don't get scheduled. 00606 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 00607 MachineBasicBlock::iterator begin, 00608 MachineBasicBlock::iterator end, 00609 unsigned regioninstrs) 00610 { 00611 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 00612 00613 SchedImpl->initPolicy(begin, end, regioninstrs); 00614 } 00615 00616 /// This is normally called from the main scheduler loop but may also be invoked 00617 /// by the scheduling strategy to perform additional code motion. 00618 void ScheduleDAGMI::moveInstruction( 00619 MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { 00620 // Advance RegionBegin if the first instruction moves down. 00621 if (&*RegionBegin == MI) 00622 ++RegionBegin; 00623 00624 // Update the instruction stream. 00625 BB->splice(InsertPos, BB, MI); 00626 00627 // Update LiveIntervals 00628 if (LIS) 00629 LIS->handleMove(MI, /*UpdateFlags=*/true); 00630 00631 // Recede RegionBegin if an instruction moves above the first. 00632 if (RegionBegin == InsertPos) 00633 RegionBegin = MI; 00634 } 00635 00636 bool ScheduleDAGMI::checkSchedLimit() { 00637 #ifndef NDEBUG 00638 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 00639 CurrentTop = CurrentBottom; 00640 return false; 00641 } 00642 ++NumInstrsScheduled; 00643 #endif 00644 return true; 00645 } 00646 00647 /// Per-region scheduling driver, called back from 00648 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that 00649 /// does not consider liveness or register pressure. It is useful for PostRA 00650 /// scheduling and potentially other custom schedulers. 00651 void ScheduleDAGMI::schedule() { 00652 // Build the DAG. 00653 buildSchedGraph(AA); 00654 00655 Topo.InitDAGTopologicalSorting(); 00656 00657 postprocessDAG(); 00658 00659 SmallVector<SUnit*, 8> TopRoots, BotRoots; 00660 findRootsAndBiasEdges(TopRoots, BotRoots); 00661 00662 // Initialize the strategy before modifying the DAG. 00663 // This may initialize a DFSResult to be used for queue priority. 00664 SchedImpl->initialize(this); 00665 00666 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 00667 SUnits[su].dumpAll(this)); 00668 if (ViewMISchedDAGs) viewGraph(); 00669 00670 // Initialize ready queues now that the DAG and priority data are finalized. 00671 initQueues(TopRoots, BotRoots); 00672 00673 bool IsTopNode = false; 00674 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 00675 assert(!SU->isScheduled && "Node already scheduled"); 00676 if (!checkSchedLimit()) 00677 break; 00678 00679 MachineInstr *MI = SU->getInstr(); 00680 if (IsTopNode) { 00681 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 00682 if (&*CurrentTop == MI) 00683 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 00684 else 00685 moveInstruction(MI, CurrentTop); 00686 } 00687 else { 00688 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 00689 MachineBasicBlock::iterator priorII = 00690 priorNonDebug(CurrentBottom, CurrentTop); 00691 if (&*priorII == MI) 00692 CurrentBottom = priorII; 00693 else { 00694 if (&*CurrentTop == MI) 00695 CurrentTop = nextIfDebug(++CurrentTop, priorII); 00696 moveInstruction(MI, CurrentBottom); 00697 CurrentBottom = MI; 00698 } 00699 } 00700 // Notify the scheduling strategy before updating the DAG. 00701 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues 00702 // runs, it can then use the accurate ReadyCycle time to determine whether 00703 // newly released nodes can move to the readyQ. 00704 SchedImpl->schedNode(SU, IsTopNode); 00705 00706 updateQueues(SU, IsTopNode); 00707 } 00708 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 00709 00710 placeDebugValues(); 00711 00712 DEBUG({ 00713 unsigned BBNum = begin()->getParent()->getNumber(); 00714 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 00715 dumpSchedule(); 00716 dbgs() << '\n'; 00717 }); 00718 } 00719 00720 /// Apply each ScheduleDAGMutation step in order. 00721 void ScheduleDAGMI::postprocessDAG() { 00722 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 00723 Mutations[i]->apply(this); 00724 } 00725 } 00726 00727 void ScheduleDAGMI:: 00728 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 00729 SmallVectorImpl<SUnit*> &BotRoots) { 00730 for (std::vector<SUnit>::iterator 00731 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 00732 SUnit *SU = &(*I); 00733 assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); 00734 00735 // Order predecessors so DFSResult follows the critical path. 00736 SU->biasCriticalPath(); 00737 00738 // A SUnit is ready to top schedule if it has no predecessors. 00739 if (!I->NumPredsLeft) 00740 TopRoots.push_back(SU); 00741 // A SUnit is ready to bottom schedule if it has no successors. 00742 if (!I->NumSuccsLeft) 00743 BotRoots.push_back(SU); 00744 } 00745 ExitSU.biasCriticalPath(); 00746 } 00747 00748 /// Identify DAG roots and setup scheduler queues. 00749 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 00750 ArrayRef<SUnit*> BotRoots) { 00751 NextClusterSucc = nullptr; 00752 NextClusterPred = nullptr; 00753 00754 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 00755 // 00756 // Nodes with unreleased weak edges can still be roots. 00757 // Release top roots in forward order. 00758 for (SmallVectorImpl<SUnit*>::const_iterator 00759 I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { 00760 SchedImpl->releaseTopNode(*I); 00761 } 00762 // Release bottom roots in reverse order so the higher priority nodes appear 00763 // first. This is more natural and slightly more efficient. 00764 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 00765 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 00766 SchedImpl->releaseBottomNode(*I); 00767 } 00768 00769 releaseSuccessors(&EntrySU); 00770 releasePredecessors(&ExitSU); 00771 00772 SchedImpl->registerRoots(); 00773 00774 // Advance past initial DebugValues. 00775 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 00776 CurrentBottom = RegionEnd; 00777 } 00778 00779 /// Update scheduler queues after scheduling an instruction. 00780 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 00781 // Release dependent instructions for scheduling. 00782 if (IsTopNode) 00783 releaseSuccessors(SU); 00784 else 00785 releasePredecessors(SU); 00786 00787 SU->isScheduled = true; 00788 } 00789 00790 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 00791 void ScheduleDAGMI::placeDebugValues() { 00792 // If first instruction was a DBG_VALUE then put it back. 00793 if (FirstDbgValue) { 00794 BB->splice(RegionBegin, BB, FirstDbgValue); 00795 RegionBegin = FirstDbgValue; 00796 } 00797 00798 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 00799 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 00800 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); 00801 MachineInstr *DbgValue = P.first; 00802 MachineBasicBlock::iterator OrigPrevMI = P.second; 00803 if (&*RegionBegin == DbgValue) 00804 ++RegionBegin; 00805 BB->splice(++OrigPrevMI, BB, DbgValue); 00806 if (OrigPrevMI == std::prev(RegionEnd)) 00807 RegionEnd = DbgValue; 00808 } 00809 DbgValues.clear(); 00810 FirstDbgValue = nullptr; 00811 } 00812 00813 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 00814 void ScheduleDAGMI::dumpSchedule() const { 00815 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 00816 if (SUnit *SU = getSUnit(&(*MI))) 00817 SU->dump(this); 00818 else 00819 dbgs() << "Missing SUnit\n"; 00820 } 00821 } 00822 #endif 00823 00824 //===----------------------------------------------------------------------===// 00825 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals 00826 // preservation. 00827 //===----------------------------------------------------------------------===// 00828 00829 ScheduleDAGMILive::~ScheduleDAGMILive() { 00830 delete DFSResult; 00831 } 00832 00833 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 00834 /// crossing a scheduling boundary. [begin, end) includes all instructions in 00835 /// the region, including the boundary itself and single-instruction regions 00836 /// that don't get scheduled. 00837 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, 00838 MachineBasicBlock::iterator begin, 00839 MachineBasicBlock::iterator end, 00840 unsigned regioninstrs) 00841 { 00842 // ScheduleDAGMI initializes SchedImpl's per-region policy. 00843 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); 00844 00845 // For convenience remember the end of the liveness region. 00846 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); 00847 00848 SUPressureDiffs.clear(); 00849 00850 ShouldTrackPressure = SchedImpl->shouldTrackPressure(); 00851 } 00852 00853 // Setup the register pressure trackers for the top scheduled top and bottom 00854 // scheduled regions. 00855 void ScheduleDAGMILive::initRegPressure() { 00856 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 00857 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 00858 00859 // Close the RPTracker to finalize live ins. 00860 RPTracker.closeRegion(); 00861 00862 DEBUG(RPTracker.dump()); 00863 00864 // Initialize the live ins and live outs. 00865 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 00866 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 00867 00868 // Close one end of the tracker so we can call 00869 // getMaxUpward/DownwardPressureDelta before advancing across any 00870 // instructions. This converts currently live regs into live ins/outs. 00871 TopRPTracker.closeTop(); 00872 BotRPTracker.closeBottom(); 00873 00874 BotRPTracker.initLiveThru(RPTracker); 00875 if (!BotRPTracker.getLiveThru().empty()) { 00876 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); 00877 DEBUG(dbgs() << "Live Thru: "; 00878 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); 00879 }; 00880 00881 // For each live out vreg reduce the pressure change associated with other 00882 // uses of the same vreg below the live-out reaching def. 00883 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); 00884 00885 // Account for liveness generated by the region boundary. 00886 if (LiveRegionEnd != RegionEnd) { 00887 SmallVector<unsigned, 8> LiveUses; 00888 BotRPTracker.recede(&LiveUses); 00889 updatePressureDiffs(LiveUses); 00890 } 00891 00892 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 00893 00894 // Cache the list of excess pressure sets in this region. This will also track 00895 // the max pressure in the scheduled code for these sets. 00896 RegionCriticalPSets.clear(); 00897 const std::vector<unsigned> &RegionPressure = 00898 RPTracker.getPressure().MaxSetPressure; 00899 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 00900 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); 00901 if (RegionPressure[i] > Limit) { 00902 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 00903 << " Limit " << Limit 00904 << " Actual " << RegionPressure[i] << "\n"); 00905 RegionCriticalPSets.push_back(PressureChange(i)); 00906 } 00907 } 00908 DEBUG(dbgs() << "Excess PSets: "; 00909 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 00910 dbgs() << TRI->getRegPressureSetName( 00911 RegionCriticalPSets[i].getPSet()) << " "; 00912 dbgs() << "\n"); 00913 } 00914 00915 void ScheduleDAGMILive:: 00916 updateScheduledPressure(const SUnit *SU, 00917 const std::vector<unsigned> &NewMaxPressure) { 00918 const PressureDiff &PDiff = getPressureDiff(SU); 00919 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); 00920 for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); 00921 I != E; ++I) { 00922 if (!I->isValid()) 00923 break; 00924 unsigned ID = I->getPSet(); 00925 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) 00926 ++CritIdx; 00927 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { 00928 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() 00929 && NewMaxPressure[ID] <= INT16_MAX) 00930 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); 00931 } 00932 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); 00933 if (NewMaxPressure[ID] >= Limit - 2) { 00934 DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " 00935 << NewMaxPressure[ID] << " > " << Limit << "(+ " 00936 << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); 00937 } 00938 } 00939 } 00940 00941 /// Update the PressureDiff array for liveness after scheduling this 00942 /// instruction. 00943 void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { 00944 for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { 00945 /// FIXME: Currently assuming single-use physregs. 00946 unsigned Reg = LiveUses[LUIdx]; 00947 DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); 00948 if (!TRI->isVirtualRegister(Reg)) 00949 continue; 00950 00951 // This may be called before CurrentBottom has been initialized. However, 00952 // BotRPTracker must have a valid position. We want the value live into the 00953 // instruction or live out of the block, so ask for the previous 00954 // instruction's live-out. 00955 const LiveInterval &LI = LIS->getInterval(Reg); 00956 VNInfo *VNI; 00957 MachineBasicBlock::const_iterator I = 00958 nextIfDebug(BotRPTracker.getPos(), BB->end()); 00959 if (I == BB->end()) 00960 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 00961 else { 00962 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); 00963 VNI = LRQ.valueIn(); 00964 } 00965 // RegisterPressureTracker guarantees that readsReg is true for LiveUses. 00966 assert(VNI && "No live value at use."); 00967 for (VReg2UseMap::iterator 00968 UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { 00969 SUnit *SU = UI->SU; 00970 DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " 00971 << *SU->getInstr()); 00972 // If this use comes before the reaching def, it cannot be a last use, so 00973 // descrease its pressure change. 00974 if (!SU->isScheduled && SU != &ExitSU) { 00975 LiveQueryResult LRQ 00976 = LI.Query(LIS->getInstructionIndex(SU->getInstr())); 00977 if (LRQ.valueIn() == VNI) 00978 getPressureDiff(SU).addPressureChange(Reg, true, &MRI); 00979 } 00980 } 00981 } 00982 } 00983 00984 /// schedule - Called back from MachineScheduler::runOnMachineFunction 00985 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 00986 /// only includes instructions that have DAG nodes, not scheduling boundaries. 00987 /// 00988 /// This is a skeletal driver, with all the functionality pushed into helpers, 00989 /// so that it can be easilly extended by experimental schedulers. Generally, 00990 /// implementing MachineSchedStrategy should be sufficient to implement a new 00991 /// scheduling algorithm. However, if a scheduler further subclasses 00992 /// ScheduleDAGMILive then it will want to override this virtual method in order 00993 /// to update any specialized state. 00994 void ScheduleDAGMILive::schedule() { 00995 buildDAGWithRegPressure(); 00996 00997 Topo.InitDAGTopologicalSorting(); 00998 00999 postprocessDAG(); 01000 01001 SmallVector<SUnit*, 8> TopRoots, BotRoots; 01002 findRootsAndBiasEdges(TopRoots, BotRoots); 01003 01004 // Initialize the strategy before modifying the DAG. 01005 // This may initialize a DFSResult to be used for queue priority. 01006 SchedImpl->initialize(this); 01007 01008 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 01009 SUnits[su].dumpAll(this)); 01010 if (ViewMISchedDAGs) viewGraph(); 01011 01012 // Initialize ready queues now that the DAG and priority data are finalized. 01013 initQueues(TopRoots, BotRoots); 01014 01015 if (ShouldTrackPressure) { 01016 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 01017 TopRPTracker.setPos(CurrentTop); 01018 } 01019 01020 bool IsTopNode = false; 01021 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 01022 assert(!SU->isScheduled && "Node already scheduled"); 01023 if (!checkSchedLimit()) 01024 break; 01025 01026 scheduleMI(SU, IsTopNode); 01027 01028 updateQueues(SU, IsTopNode); 01029 01030 if (DFSResult) { 01031 unsigned SubtreeID = DFSResult->getSubtreeID(SU); 01032 if (!ScheduledTrees.test(SubtreeID)) { 01033 ScheduledTrees.set(SubtreeID); 01034 DFSResult->scheduleTree(SubtreeID); 01035 SchedImpl->scheduleTree(SubtreeID); 01036 } 01037 } 01038 01039 // Notify the scheduling strategy after updating the DAG. 01040 SchedImpl->schedNode(SU, IsTopNode); 01041 } 01042 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 01043 01044 placeDebugValues(); 01045 01046 DEBUG({ 01047 unsigned BBNum = begin()->getParent()->getNumber(); 01048 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 01049 dumpSchedule(); 01050 dbgs() << '\n'; 01051 }); 01052 } 01053 01054 /// Build the DAG and setup three register pressure trackers. 01055 void ScheduleDAGMILive::buildDAGWithRegPressure() { 01056 if (!ShouldTrackPressure) { 01057 RPTracker.reset(); 01058 RegionCriticalPSets.clear(); 01059 buildSchedGraph(AA); 01060 return; 01061 } 01062 01063 // Initialize the register pressure tracker used by buildSchedGraph. 01064 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 01065 /*TrackUntiedDefs=*/true); 01066 01067 // Account for liveness generate by the region boundary. 01068 if (LiveRegionEnd != RegionEnd) 01069 RPTracker.recede(); 01070 01071 // Build the DAG, and compute current register pressure. 01072 buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); 01073 01074 // Initialize top/bottom trackers after computing region pressure. 01075 initRegPressure(); 01076 } 01077 01078 void ScheduleDAGMILive::computeDFSResult() { 01079 if (!DFSResult) 01080 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 01081 DFSResult->clear(); 01082 ScheduledTrees.clear(); 01083 DFSResult->resize(SUnits.size()); 01084 DFSResult->compute(SUnits); 01085 ScheduledTrees.resize(DFSResult->getNumSubtrees()); 01086 } 01087 01088 /// Compute the max cyclic critical path through the DAG. The scheduling DAG 01089 /// only provides the critical path for single block loops. To handle loops that 01090 /// span blocks, we could use the vreg path latencies provided by 01091 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently 01092 /// available for use in the scheduler. 01093 /// 01094 /// The cyclic path estimation identifies a def-use pair that crosses the back 01095 /// edge and considers the depth and height of the nodes. For example, consider 01096 /// the following instruction sequence where each instruction has unit latency 01097 /// and defines an epomymous virtual register: 01098 /// 01099 /// a->b(a,c)->c(b)->d(c)->exit 01100 /// 01101 /// The cyclic critical path is a two cycles: b->c->b 01102 /// The acyclic critical path is four cycles: a->b->c->d->exit 01103 /// LiveOutHeight = height(c) = len(c->d->exit) = 2 01104 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 01105 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 01106 /// LiveInDepth = depth(b) = len(a->b) = 1 01107 /// 01108 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 01109 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 01110 /// CyclicCriticalPath = min(2, 2) = 2 01111 /// 01112 /// This could be relevant to PostRA scheduling, but is currently implemented 01113 /// assuming LiveIntervals. 01114 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { 01115 // This only applies to single block loop. 01116 if (!BB->isSuccessor(BB)) 01117 return 0; 01118 01119 unsigned MaxCyclicLatency = 0; 01120 // Visit each live out vreg def to find def/use pairs that cross iterations. 01121 ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; 01122 for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); 01123 RI != RE; ++RI) { 01124 unsigned Reg = *RI; 01125 if (!TRI->isVirtualRegister(Reg)) 01126 continue; 01127 const LiveInterval &LI = LIS->getInterval(Reg); 01128 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 01129 if (!DefVNI) 01130 continue; 01131 01132 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); 01133 const SUnit *DefSU = getSUnit(DefMI); 01134 if (!DefSU) 01135 continue; 01136 01137 unsigned LiveOutHeight = DefSU->getHeight(); 01138 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; 01139 // Visit all local users of the vreg def. 01140 for (VReg2UseMap::iterator 01141 UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { 01142 if (UI->SU == &ExitSU) 01143 continue; 01144 01145 // Only consider uses of the phi. 01146 LiveQueryResult LRQ = 01147 LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); 01148 if (!LRQ.valueIn()->isPHIDef()) 01149 continue; 01150 01151 // Assume that a path spanning two iterations is a cycle, which could 01152 // overestimate in strange cases. This allows cyclic latency to be 01153 // estimated as the minimum slack of the vreg's depth or height. 01154 unsigned CyclicLatency = 0; 01155 if (LiveOutDepth > UI->SU->getDepth()) 01156 CyclicLatency = LiveOutDepth - UI->SU->getDepth(); 01157 01158 unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; 01159 if (LiveInHeight > LiveOutHeight) { 01160 if (LiveInHeight - LiveOutHeight < CyclicLatency) 01161 CyclicLatency = LiveInHeight - LiveOutHeight; 01162 } 01163 else 01164 CyclicLatency = 0; 01165 01166 DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" 01167 << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); 01168 if (CyclicLatency > MaxCyclicLatency) 01169 MaxCyclicLatency = CyclicLatency; 01170 } 01171 } 01172 DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); 01173 return MaxCyclicLatency; 01174 } 01175 01176 /// Move an instruction and update register pressure. 01177 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { 01178 // Move the instruction to its new location in the instruction stream. 01179 MachineInstr *MI = SU->getInstr(); 01180 01181 if (IsTopNode) { 01182 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 01183 if (&*CurrentTop == MI) 01184 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 01185 else { 01186 moveInstruction(MI, CurrentTop); 01187 TopRPTracker.setPos(MI); 01188 } 01189 01190 if (ShouldTrackPressure) { 01191 // Update top scheduled pressure. 01192 TopRPTracker.advance(); 01193 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 01194 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); 01195 } 01196 } 01197 else { 01198 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 01199 MachineBasicBlock::iterator priorII = 01200 priorNonDebug(CurrentBottom, CurrentTop); 01201 if (&*priorII == MI) 01202 CurrentBottom = priorII; 01203 else { 01204 if (&*CurrentTop == MI) { 01205 CurrentTop = nextIfDebug(++CurrentTop, priorII); 01206 TopRPTracker.setPos(CurrentTop); 01207 } 01208 moveInstruction(MI, CurrentBottom); 01209 CurrentBottom = MI; 01210 } 01211 if (ShouldTrackPressure) { 01212 // Update bottom scheduled pressure. 01213 SmallVector<unsigned, 8> LiveUses; 01214 BotRPTracker.recede(&LiveUses); 01215 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 01216 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); 01217 updatePressureDiffs(LiveUses); 01218 } 01219 } 01220 } 01221 01222 //===----------------------------------------------------------------------===// 01223 // LoadClusterMutation - DAG post-processing to cluster loads. 01224 //===----------------------------------------------------------------------===// 01225 01226 namespace { 01227 /// \brief Post-process the DAG to create cluster edges between neighboring 01228 /// loads. 01229 class LoadClusterMutation : public ScheduleDAGMutation { 01230 struct LoadInfo { 01231 SUnit *SU; 01232 unsigned BaseReg; 01233 unsigned Offset; 01234 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 01235 : SU(su), BaseReg(reg), Offset(ofs) {} 01236 01237 bool operator<(const LoadInfo &RHS) const { 01238 return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); 01239 } 01240 }; 01241 01242 const TargetInstrInfo *TII; 01243 const TargetRegisterInfo *TRI; 01244 public: 01245 LoadClusterMutation(const TargetInstrInfo *tii, 01246 const TargetRegisterInfo *tri) 01247 : TII(tii), TRI(tri) {} 01248 01249 void apply(ScheduleDAGMI *DAG) override; 01250 protected: 01251 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 01252 }; 01253 } // anonymous 01254 01255 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 01256 ScheduleDAGMI *DAG) { 01257 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 01258 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 01259 SUnit *SU = Loads[Idx]; 01260 unsigned BaseReg; 01261 unsigned Offset; 01262 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 01263 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 01264 } 01265 if (LoadRecords.size() < 2) 01266 return; 01267 std::sort(LoadRecords.begin(), LoadRecords.end()); 01268 unsigned ClusterLength = 1; 01269 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 01270 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 01271 ClusterLength = 1; 01272 continue; 01273 } 01274 01275 SUnit *SUa = LoadRecords[Idx].SU; 01276 SUnit *SUb = LoadRecords[Idx+1].SU; 01277 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 01278 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 01279 01280 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 01281 << SUb->NodeNum << ")\n"); 01282 // Copy successor edges from SUa to SUb. Interleaving computation 01283 // dependent on SUa can prevent load combining due to register reuse. 01284 // Predecessor edges do not need to be copied from SUb to SUa since nearby 01285 // loads should have effectively the same inputs. 01286 for (SUnit::const_succ_iterator 01287 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 01288 if (SI->getSUnit() == SUb) 01289 continue; 01290 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 01291 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 01292 } 01293 ++ClusterLength; 01294 } 01295 else 01296 ClusterLength = 1; 01297 } 01298 } 01299 01300 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 01301 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 01302 // Map DAG NodeNum to store chain ID. 01303 DenseMap<unsigned, unsigned> StoreChainIDs; 01304 // Map each store chain to a set of dependent loads. 01305 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 01306 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 01307 SUnit *SU = &DAG->SUnits[Idx]; 01308 if (!SU->getInstr()->mayLoad()) 01309 continue; 01310 unsigned ChainPredID = DAG->SUnits.size(); 01311 for (SUnit::const_pred_iterator 01312 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 01313 if (PI->isCtrl()) { 01314 ChainPredID = PI->getSUnit()->NodeNum; 01315 break; 01316 } 01317 } 01318 // Check if this chain-like pred has been seen 01319 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 01320 unsigned NumChains = StoreChainDependents.size(); 01321 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 01322 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 01323 if (Result.second) 01324 StoreChainDependents.resize(NumChains + 1); 01325 StoreChainDependents[Result.first->second].push_back(SU); 01326 } 01327 // Iterate over the store chains. 01328 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 01329 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 01330 } 01331 01332 //===----------------------------------------------------------------------===// 01333 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 01334 //===----------------------------------------------------------------------===// 01335 01336 namespace { 01337 /// \brief Post-process the DAG to create cluster edges between instructions 01338 /// that may be fused by the processor into a single operation. 01339 class MacroFusion : public ScheduleDAGMutation { 01340 const TargetInstrInfo *TII; 01341 public: 01342 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 01343 01344 void apply(ScheduleDAGMI *DAG) override; 01345 }; 01346 } // anonymous 01347 01348 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 01349 /// fused operations. 01350 void MacroFusion::apply(ScheduleDAGMI *DAG) { 01351 // For now, assume targets can only fuse with the branch. 01352 MachineInstr *Branch = DAG->ExitSU.getInstr(); 01353 if (!Branch) 01354 return; 01355 01356 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 01357 SUnit *SU = &DAG->SUnits[--Idx]; 01358 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 01359 continue; 01360 01361 // Create a single weak edge from SU to ExitSU. The only effect is to cause 01362 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 01363 // need to copy predecessor edges from ExitSU to SU, since top-down 01364 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 01365 // of SU, we could create an artificial edge from the deepest root, but it 01366 // hasn't been needed yet. 01367 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 01368 (void)Success; 01369 assert(Success && "No DAG nodes should be reachable from ExitSU"); 01370 01371 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 01372 break; 01373 } 01374 } 01375 01376 //===----------------------------------------------------------------------===// 01377 // CopyConstrain - DAG post-processing to encourage copy elimination. 01378 //===----------------------------------------------------------------------===// 01379 01380 namespace { 01381 /// \brief Post-process the DAG to create weak edges from all uses of a copy to 01382 /// the one use that defines the copy's source vreg, most likely an induction 01383 /// variable increment. 01384 class CopyConstrain : public ScheduleDAGMutation { 01385 // Transient state. 01386 SlotIndex RegionBeginIdx; 01387 // RegionEndIdx is the slot index of the last non-debug instruction in the 01388 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. 01389 SlotIndex RegionEndIdx; 01390 public: 01391 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} 01392 01393 void apply(ScheduleDAGMI *DAG) override; 01394 01395 protected: 01396 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); 01397 }; 01398 } // anonymous 01399 01400 /// constrainLocalCopy handles two possibilities: 01401 /// 1) Local src: 01402 /// I0: = dst 01403 /// I1: src = ... 01404 /// I2: = dst 01405 /// I3: dst = src (copy) 01406 /// (create pred->succ edges I0->I1, I2->I1) 01407 /// 01408 /// 2) Local copy: 01409 /// I0: dst = src (copy) 01410 /// I1: = dst 01411 /// I2: src = ... 01412 /// I3: = dst 01413 /// (create pred->succ edges I1->I2, I3->I2) 01414 /// 01415 /// Although the MachineScheduler is currently constrained to single blocks, 01416 /// this algorithm should handle extended blocks. An EBB is a set of 01417 /// contiguously numbered blocks such that the previous block in the EBB is 01418 /// always the single predecessor. 01419 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { 01420 LiveIntervals *LIS = DAG->getLIS(); 01421 MachineInstr *Copy = CopySU->getInstr(); 01422 01423 // Check for pure vreg copies. 01424 unsigned SrcReg = Copy->getOperand(1).getReg(); 01425 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 01426 return; 01427 01428 unsigned DstReg = Copy->getOperand(0).getReg(); 01429 if (!TargetRegisterInfo::isVirtualRegister(DstReg)) 01430 return; 01431 01432 // Check if either the dest or source is local. If it's live across a back 01433 // edge, it's not local. Note that if both vregs are live across the back 01434 // edge, we cannot successfully contrain the copy without cyclic scheduling. 01435 unsigned LocalReg = DstReg; 01436 unsigned GlobalReg = SrcReg; 01437 LiveInterval *LocalLI = &LIS->getInterval(LocalReg); 01438 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { 01439 LocalReg = SrcReg; 01440 GlobalReg = DstReg; 01441 LocalLI = &LIS->getInterval(LocalReg); 01442 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) 01443 return; 01444 } 01445 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); 01446 01447 // Find the global segment after the start of the local LI. 01448 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); 01449 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a 01450 // local live range. We could create edges from other global uses to the local 01451 // start, but the coalescer should have already eliminated these cases, so 01452 // don't bother dealing with it. 01453 if (GlobalSegment == GlobalLI->end()) 01454 return; 01455 01456 // If GlobalSegment is killed at the LocalLI->start, the call to find() 01457 // returned the next global segment. But if GlobalSegment overlaps with 01458 // LocalLI->start, then advance to the next segement. If a hole in GlobalLI 01459 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. 01460 if (GlobalSegment->contains(LocalLI->beginIndex())) 01461 ++GlobalSegment; 01462 01463 if (GlobalSegment == GlobalLI->end()) 01464 return; 01465 01466 // Check if GlobalLI contains a hole in the vicinity of LocalLI. 01467 if (GlobalSegment != GlobalLI->begin()) { 01468 // Two address defs have no hole. 01469 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, 01470 GlobalSegment->start)) { 01471 return; 01472 } 01473 // If the prior global segment may be defined by the same two-address 01474 // instruction that also defines LocalLI, then can't make a hole here. 01475 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, 01476 LocalLI->beginIndex())) { 01477 return; 01478 } 01479 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise 01480 // it would be a disconnected component in the live range. 01481 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && 01482 "Disconnected LRG within the scheduling region."); 01483 } 01484 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); 01485 if (!GlobalDef) 01486 return; 01487 01488 SUnit *GlobalSU = DAG->getSUnit(GlobalDef); 01489 if (!GlobalSU) 01490 return; 01491 01492 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by 01493 // constraining the uses of the last local def to precede GlobalDef. 01494 SmallVector<SUnit*,8> LocalUses; 01495 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); 01496 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); 01497 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); 01498 for (SUnit::const_succ_iterator 01499 I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); 01500 I != E; ++I) { 01501 if (I->getKind() != SDep::Data || I->getReg() != LocalReg) 01502 continue; 01503 if (I->getSUnit() == GlobalSU) 01504 continue; 01505 if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) 01506 return; 01507 LocalUses.push_back(I->getSUnit()); 01508 } 01509 // Open the top of the GlobalLI hole by constraining any earlier global uses 01510 // to precede the start of LocalLI. 01511 SmallVector<SUnit*,8> GlobalUses; 01512 MachineInstr *FirstLocalDef = 01513 LIS->getInstructionFromIndex(LocalLI->beginIndex()); 01514 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); 01515 for (SUnit::const_pred_iterator 01516 I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { 01517 if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) 01518 continue; 01519 if (I->getSUnit() == FirstLocalSU) 01520 continue; 01521 if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) 01522 return; 01523 GlobalUses.push_back(I->getSUnit()); 01524 } 01525 DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); 01526 // Add the weak edges. 01527 for (SmallVectorImpl<SUnit*>::const_iterator 01528 I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { 01529 DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" 01530 << GlobalSU->NodeNum << ")\n"); 01531 DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); 01532 } 01533 for (SmallVectorImpl<SUnit*>::const_iterator 01534 I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { 01535 DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" 01536 << FirstLocalSU->NodeNum << ")\n"); 01537 DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); 01538 } 01539 } 01540 01541 /// \brief Callback from DAG postProcessing to create weak edges to encourage 01542 /// copy elimination. 01543 void CopyConstrain::apply(ScheduleDAGMI *DAG) { 01544 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); 01545 01546 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); 01547 if (FirstPos == DAG->end()) 01548 return; 01549 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); 01550 RegionEndIdx = DAG->getLIS()->getInstructionIndex( 01551 &*priorNonDebug(DAG->end(), DAG->begin())); 01552 01553 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 01554 SUnit *SU = &DAG->SUnits[Idx]; 01555 if (!SU->getInstr()->isCopy()) 01556 continue; 01557 01558 constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); 01559 } 01560 } 01561 01562 //===----------------------------------------------------------------------===// 01563 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler 01564 // and possibly other custom schedulers. 01565 //===----------------------------------------------------------------------===// 01566 01567 static const unsigned InvalidCycle = ~0U; 01568 01569 SchedBoundary::~SchedBoundary() { delete HazardRec; } 01570 01571 void SchedBoundary::reset() { 01572 // A new HazardRec is created for each DAG and owned by SchedBoundary. 01573 // Destroying and reconstructing it is very expensive though. So keep 01574 // invalid, placeholder HazardRecs. 01575 if (HazardRec && HazardRec->isEnabled()) { 01576 delete HazardRec; 01577 HazardRec = nullptr; 01578 } 01579 Available.clear(); 01580 Pending.clear(); 01581 CheckPending = false; 01582 NextSUs.clear(); 01583 CurrCycle = 0; 01584 CurrMOps = 0; 01585 MinReadyCycle = UINT_MAX; 01586 ExpectedLatency = 0; 01587 DependentLatency = 0; 01588 RetiredMOps = 0; 01589 MaxExecutedResCount = 0; 01590 ZoneCritResIdx = 0; 01591 IsResourceLimited = false; 01592 ReservedCycles.clear(); 01593 #ifndef NDEBUG 01594 // Track the maximum number of stall cycles that could arise either from the 01595 // latency of a DAG edge or the number of cycles that a processor resource is 01596 // reserved (SchedBoundary::ReservedCycles). 01597 MaxObservedStall = 0; 01598 #endif 01599 // Reserve a zero-count for invalid CritResIdx. 01600 ExecutedResCounts.resize(1); 01601 assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); 01602 } 01603 01604 void SchedRemainder:: 01605 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 01606 reset(); 01607 if (!SchedModel->hasInstrSchedModel()) 01608 return; 01609 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 01610 for (std::vector<SUnit>::iterator 01611 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 01612 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 01613 RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) 01614 * SchedModel->getMicroOpFactor(); 01615 for (TargetSchedModel::ProcResIter 01616 PI = SchedModel->getWriteProcResBegin(SC), 01617 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 01618 unsigned PIdx = PI->ProcResourceIdx; 01619 unsigned Factor = SchedModel->getResourceFactor(PIdx); 01620 RemainingCounts[PIdx] += (Factor * PI->Cycles); 01621 } 01622 } 01623 } 01624 01625 void SchedBoundary:: 01626 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 01627 reset(); 01628 DAG = dag; 01629 SchedModel = smodel; 01630 Rem = rem; 01631 if (SchedModel->hasInstrSchedModel()) { 01632 ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); 01633 ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); 01634 } 01635 } 01636 01637 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat 01638 /// these "soft stalls" differently than the hard stall cycles based on CPU 01639 /// resources and computed by checkHazard(). A fully in-order model 01640 /// (MicroOpBufferSize==0) will not make use of this since instructions are not 01641 /// available for scheduling until they are ready. However, a weaker in-order 01642 /// model may use this for heuristics. For example, if a processor has in-order 01643 /// behavior when reading certain resources, this may come into play. 01644 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { 01645 if (!SU->isUnbuffered) 01646 return 0; 01647 01648 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 01649 if (ReadyCycle > CurrCycle) 01650 return ReadyCycle - CurrCycle; 01651 return 0; 01652 } 01653 01654 /// Compute the next cycle at which the given processor resource can be 01655 /// scheduled. 01656 unsigned SchedBoundary:: 01657 getNextResourceCycle(unsigned PIdx, unsigned Cycles) { 01658 unsigned NextUnreserved = ReservedCycles[PIdx]; 01659 // If this resource has never been used, always return cycle zero. 01660 if (NextUnreserved == InvalidCycle) 01661 return 0; 01662 // For bottom-up scheduling add the cycles needed for the current operation. 01663 if (!isTop()) 01664 NextUnreserved += Cycles; 01665 return NextUnreserved; 01666 } 01667 01668 /// Does this SU have a hazard within the current instruction group. 01669 /// 01670 /// The scheduler supports two modes of hazard recognition. The first is the 01671 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 01672 /// supports highly complicated in-order reservation tables 01673 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 01674 /// 01675 /// The second is a streamlined mechanism that checks for hazards based on 01676 /// simple counters that the scheduler itself maintains. It explicitly checks 01677 /// for instruction dispatch limitations, including the number of micro-ops that 01678 /// can dispatch per cycle. 01679 /// 01680 /// TODO: Also check whether the SU must start a new group. 01681 bool SchedBoundary::checkHazard(SUnit *SU) { 01682 if (HazardRec->isEnabled() 01683 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { 01684 return true; 01685 } 01686 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 01687 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 01688 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 01689 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 01690 return true; 01691 } 01692 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { 01693 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 01694 for (TargetSchedModel::ProcResIter 01695 PI = SchedModel->getWriteProcResBegin(SC), 01696 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 01697 unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); 01698 if (NRCycle > CurrCycle) { 01699 #ifndef NDEBUG 01700 MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); 01701 #endif 01702 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " 01703 << SchedModel->getResourceName(PI->ProcResourceIdx) 01704 << "=" << NRCycle << "c\n"); 01705 return true; 01706 } 01707 } 01708 } 01709 return false; 01710 } 01711 01712 // Find the unscheduled node in ReadySUs with the highest latency. 01713 unsigned SchedBoundary:: 01714 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { 01715 SUnit *LateSU = nullptr; 01716 unsigned RemLatency = 0; 01717 for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); 01718 I != E; ++I) { 01719 unsigned L = getUnscheduledLatency(*I); 01720 if (L > RemLatency) { 01721 RemLatency = L; 01722 LateSU = *I; 01723 } 01724 } 01725 if (LateSU) { 01726 DEBUG(dbgs() << Available.getName() << " RemLatency SU(" 01727 << LateSU->NodeNum << ") " << RemLatency << "c\n"); 01728 } 01729 return RemLatency; 01730 } 01731 01732 // Count resources in this zone and the remaining unscheduled 01733 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical 01734 // resource index, or zero if the zone is issue limited. 01735 unsigned SchedBoundary:: 01736 getOtherResourceCount(unsigned &OtherCritIdx) { 01737 OtherCritIdx = 0; 01738 if (!SchedModel->hasInstrSchedModel()) 01739 return 0; 01740 01741 unsigned OtherCritCount = Rem->RemIssueCount 01742 + (RetiredMOps * SchedModel->getMicroOpFactor()); 01743 DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " 01744 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); 01745 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); 01746 PIdx != PEnd; ++PIdx) { 01747 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; 01748 if (OtherCount > OtherCritCount) { 01749 OtherCritCount = OtherCount; 01750 OtherCritIdx = PIdx; 01751 } 01752 } 01753 if (OtherCritIdx) { 01754 DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " 01755 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 01756 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); 01757 } 01758 return OtherCritCount; 01759 } 01760 01761 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { 01762 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 01763 01764 #ifndef NDEBUG 01765 // ReadyCycle was been bumped up to the CurrCycle when this node was 01766 // scheduled, but CurrCycle may have been eagerly advanced immediately after 01767 // scheduling, so may now be greater than ReadyCycle. 01768 if (ReadyCycle > CurrCycle) 01769 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); 01770 #endif 01771 01772 if (ReadyCycle < MinReadyCycle) 01773 MinReadyCycle = ReadyCycle; 01774 01775 // Check for interlocks first. For the purpose of other heuristics, an 01776 // instruction that cannot issue appears as if it's not in the ReadyQueue. 01777 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 01778 if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) 01779 Pending.push(SU); 01780 else 01781 Available.push(SU); 01782 01783 // Record this node as an immediate dependent of the scheduled node. 01784 NextSUs.insert(SU); 01785 } 01786 01787 void SchedBoundary::releaseTopNode(SUnit *SU) { 01788 if (SU->isScheduled) 01789 return; 01790 01791 releaseNode(SU, SU->TopReadyCycle); 01792 } 01793 01794 void SchedBoundary::releaseBottomNode(SUnit *SU) { 01795 if (SU->isScheduled) 01796 return; 01797 01798 releaseNode(SU, SU->BotReadyCycle); 01799 } 01800 01801 /// Move the boundary of scheduled code by one cycle. 01802 void SchedBoundary::bumpCycle(unsigned NextCycle) { 01803 if (SchedModel->getMicroOpBufferSize() == 0) { 01804 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 01805 if (MinReadyCycle > NextCycle) 01806 NextCycle = MinReadyCycle; 01807 } 01808 // Update the current micro-ops, which will issue in the next cycle. 01809 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); 01810 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; 01811 01812 // Decrement DependentLatency based on the next cycle. 01813 if ((NextCycle - CurrCycle) > DependentLatency) 01814 DependentLatency = 0; 01815 else 01816 DependentLatency -= (NextCycle - CurrCycle); 01817 01818 if (!HazardRec->isEnabled()) { 01819 // Bypass HazardRec virtual calls. 01820 CurrCycle = NextCycle; 01821 } 01822 else { 01823 // Bypass getHazardType calls in case of long latency. 01824 for (; CurrCycle != NextCycle; ++CurrCycle) { 01825 if (isTop()) 01826 HazardRec->AdvanceCycle(); 01827 else 01828 HazardRec->RecedeCycle(); 01829 } 01830 } 01831 CheckPending = true; 01832 unsigned LFactor = SchedModel->getLatencyFactor(); 01833 IsResourceLimited = 01834 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 01835 > (int)LFactor; 01836 01837 DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); 01838 } 01839 01840 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { 01841 ExecutedResCounts[PIdx] += Count; 01842 if (ExecutedResCounts[PIdx] > MaxExecutedResCount) 01843 MaxExecutedResCount = ExecutedResCounts[PIdx]; 01844 } 01845 01846 /// Add the given processor resource to this scheduled zone. 01847 /// 01848 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles 01849 /// during which this resource is consumed. 01850 /// 01851 /// \return the next cycle at which the instruction may execute without 01852 /// oversubscribing resources. 01853 unsigned SchedBoundary:: 01854 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { 01855 unsigned Factor = SchedModel->getResourceFactor(PIdx); 01856 unsigned Count = Factor * Cycles; 01857 DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) 01858 << " +" << Cycles << "x" << Factor << "u\n"); 01859 01860 // Update Executed resources counts. 01861 incExecutedResources(PIdx, Count); 01862 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 01863 Rem->RemainingCounts[PIdx] -= Count; 01864 01865 // Check if this resource exceeds the current critical resource. If so, it 01866 // becomes the critical resource. 01867 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { 01868 ZoneCritResIdx = PIdx; 01869 DEBUG(dbgs() << " *** Critical resource " 01870 << SchedModel->getResourceName(PIdx) << ": " 01871 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); 01872 } 01873 // For reserved resources, record the highest cycle using the resource. 01874 unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); 01875 if (NextAvailable > CurrCycle) { 01876 DEBUG(dbgs() << " Resource conflict: " 01877 << SchedModel->getProcResource(PIdx)->Name << " reserved until @" 01878 << NextAvailable << "\n"); 01879 } 01880 return NextAvailable; 01881 } 01882 01883 /// Move the boundary of scheduled code by one SUnit. 01884 void SchedBoundary::bumpNode(SUnit *SU) { 01885 // Update the reservation table. 01886 if (HazardRec->isEnabled()) { 01887 if (!isTop() && SU->isCall) { 01888 // Calls are scheduled with their preceding instructions. For bottom-up 01889 // scheduling, clear the pipeline state before emitting. 01890 HazardRec->Reset(); 01891 } 01892 HazardRec->EmitInstruction(SU); 01893 } 01894 // checkHazard should prevent scheduling multiple instructions per cycle that 01895 // exceed the issue width. 01896 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 01897 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); 01898 assert( 01899 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && 01900 "Cannot schedule this instruction's MicroOps in the current cycle."); 01901 01902 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 01903 DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); 01904 01905 unsigned NextCycle = CurrCycle; 01906 switch (SchedModel->getMicroOpBufferSize()) { 01907 case 0: 01908 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); 01909 break; 01910 case 1: 01911 if (ReadyCycle > NextCycle) { 01912 NextCycle = ReadyCycle; 01913 DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); 01914 } 01915 break; 01916 default: 01917 // We don't currently model the OOO reorder buffer, so consider all 01918 // scheduled MOps to be "retired". We do loosely model in-order resource 01919 // latency. If this instruction uses an in-order resource, account for any 01920 // likely stall cycles. 01921 if (SU->isUnbuffered && ReadyCycle > NextCycle) 01922 NextCycle = ReadyCycle; 01923 break; 01924 } 01925 RetiredMOps += IncMOps; 01926 01927 // Update resource counts and critical resource. 01928 if (SchedModel->hasInstrSchedModel()) { 01929 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); 01930 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); 01931 Rem->RemIssueCount -= DecRemIssue; 01932 if (ZoneCritResIdx) { 01933 // Scale scheduled micro-ops for comparing with the critical resource. 01934 unsigned ScaledMOps = 01935 RetiredMOps * SchedModel->getMicroOpFactor(); 01936 01937 // If scaled micro-ops are now more than the previous critical resource by 01938 // a full cycle, then micro-ops issue becomes critical. 01939 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) 01940 >= (int)SchedModel->getLatencyFactor()) { 01941 ZoneCritResIdx = 0; 01942 DEBUG(dbgs() << " *** Critical resource NumMicroOps: " 01943 << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); 01944 } 01945 } 01946 for (TargetSchedModel::ProcResIter 01947 PI = SchedModel->getWriteProcResBegin(SC), 01948 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 01949 unsigned RCycle = 01950 countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); 01951 if (RCycle > NextCycle) 01952 NextCycle = RCycle; 01953 } 01954 if (SU->hasReservedResource) { 01955 // For reserved resources, record the highest cycle using the resource. 01956 // For top-down scheduling, this is the cycle in which we schedule this 01957 // instruction plus the number of cycles the operations reserves the 01958 // resource. For bottom-up is it simply the instruction's cycle. 01959 for (TargetSchedModel::ProcResIter 01960 PI = SchedModel->getWriteProcResBegin(SC), 01961 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 01962 unsigned PIdx = PI->ProcResourceIdx; 01963 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { 01964 if (isTop()) { 01965 ReservedCycles[PIdx] = 01966 std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); 01967 } 01968 else 01969 ReservedCycles[PIdx] = NextCycle; 01970 } 01971 } 01972 } 01973 } 01974 // Update ExpectedLatency and DependentLatency. 01975 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; 01976 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; 01977 if (SU->getDepth() > TopLatency) { 01978 TopLatency = SU->getDepth(); 01979 DEBUG(dbgs() << " " << Available.getName() 01980 << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); 01981 } 01982 if (SU->getHeight() > BotLatency) { 01983 BotLatency = SU->getHeight(); 01984 DEBUG(dbgs() << " " << Available.getName() 01985 << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); 01986 } 01987 // If we stall for any reason, bump the cycle. 01988 if (NextCycle > CurrCycle) { 01989 bumpCycle(NextCycle); 01990 } 01991 else { 01992 // After updating ZoneCritResIdx and ExpectedLatency, check if we're 01993 // resource limited. If a stall occurred, bumpCycle does this. 01994 unsigned LFactor = SchedModel->getLatencyFactor(); 01995 IsResourceLimited = 01996 (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) 01997 > (int)LFactor; 01998 } 01999 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle 02000 // resets CurrMOps. Loop to handle instructions with more MOps than issue in 02001 // one cycle. Since we commonly reach the max MOps here, opportunistically 02002 // bump the cycle to avoid uselessly checking everything in the readyQ. 02003 CurrMOps += IncMOps; 02004 while (CurrMOps >= SchedModel->getIssueWidth()) { 02005 DEBUG(dbgs() << " *** Max MOps " << CurrMOps 02006 << " at cycle " << CurrCycle << '\n'); 02007 bumpCycle(++NextCycle); 02008 } 02009 DEBUG(dumpScheduledState()); 02010 } 02011 02012 /// Release pending ready nodes in to the available queue. This makes them 02013 /// visible to heuristics. 02014 void SchedBoundary::releasePending() { 02015 // If the available queue is empty, it is safe to reset MinReadyCycle. 02016 if (Available.empty()) 02017 MinReadyCycle = UINT_MAX; 02018 02019 // Check to see if any of the pending instructions are ready to issue. If 02020 // so, add them to the available queue. 02021 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 02022 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 02023 SUnit *SU = *(Pending.begin()+i); 02024 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 02025 02026 if (ReadyCycle < MinReadyCycle) 02027 MinReadyCycle = ReadyCycle; 02028 02029 if (!IsBuffered && ReadyCycle > CurrCycle) 02030 continue; 02031 02032 if (checkHazard(SU)) 02033 continue; 02034 02035 Available.push(SU); 02036 Pending.remove(Pending.begin()+i); 02037 --i; --e; 02038 } 02039 DEBUG(if (!Pending.empty()) Pending.dump()); 02040 CheckPending = false; 02041 } 02042 02043 /// Remove SU from the ready set for this boundary. 02044 void SchedBoundary::removeReady(SUnit *SU) { 02045 if (Available.isInQueue(SU)) 02046 Available.remove(Available.find(SU)); 02047 else { 02048 assert(Pending.isInQueue(SU) && "bad ready count"); 02049 Pending.remove(Pending.find(SU)); 02050 } 02051 } 02052 02053 /// If this queue only has one ready candidate, return it. As a side effect, 02054 /// defer any nodes that now hit a hazard, and advance the cycle until at least 02055 /// one node is ready. If multiple instructions are ready, return NULL. 02056 SUnit *SchedBoundary::pickOnlyChoice() { 02057 if (CheckPending) 02058 releasePending(); 02059 02060 if (CurrMOps > 0) { 02061 // Defer any ready instrs that now have a hazard. 02062 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 02063 if (checkHazard(*I)) { 02064 Pending.push(*I); 02065 I = Available.remove(I); 02066 continue; 02067 } 02068 ++I; 02069 } 02070 } 02071 for (unsigned i = 0; Available.empty(); ++i) { 02072 // FIXME: Re-enable assert once PR20057 is resolved. 02073 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && 02074 // "permanent hazard"); 02075 (void)i; 02076 bumpCycle(CurrCycle + 1); 02077 releasePending(); 02078 } 02079 if (Available.size() == 1) 02080 return *Available.begin(); 02081 return nullptr; 02082 } 02083 02084 #ifndef NDEBUG 02085 // This is useful information to dump after bumpNode. 02086 // Note that the Queue contents are more useful before pickNodeFromQueue. 02087 void SchedBoundary::dumpScheduledState() { 02088 unsigned ResFactor; 02089 unsigned ResCount; 02090 if (ZoneCritResIdx) { 02091 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); 02092 ResCount = getResourceCount(ZoneCritResIdx); 02093 } 02094 else { 02095 ResFactor = SchedModel->getMicroOpFactor(); 02096 ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); 02097 } 02098 unsigned LFactor = SchedModel->getLatencyFactor(); 02099 dbgs() << Available.getName() << " @" << CurrCycle << "c\n" 02100 << " Retired: " << RetiredMOps; 02101 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; 02102 dbgs() << "\n Critical: " << ResCount / LFactor << "c, " 02103 << ResCount / ResFactor << " " 02104 << SchedModel->getResourceName(ZoneCritResIdx) 02105 << "\n ExpectedLatency: " << ExpectedLatency << "c\n" 02106 << (IsResourceLimited ? " - Resource" : " - Latency") 02107 << " limited.\n"; 02108 } 02109 #endif 02110 02111 //===----------------------------------------------------------------------===// 02112 // GenericScheduler - Generic implementation of MachineSchedStrategy. 02113 //===----------------------------------------------------------------------===// 02114 02115 void GenericSchedulerBase::SchedCandidate:: 02116 initResourceDelta(const ScheduleDAGMI *DAG, 02117 const TargetSchedModel *SchedModel) { 02118 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 02119 return; 02120 02121 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 02122 for (TargetSchedModel::ProcResIter 02123 PI = SchedModel->getWriteProcResBegin(SC), 02124 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 02125 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 02126 ResDelta.CritResources += PI->Cycles; 02127 if (PI->ProcResourceIdx == Policy.DemandResIdx) 02128 ResDelta.DemandedResources += PI->Cycles; 02129 } 02130 } 02131 02132 /// Set the CandPolicy given a scheduling zone given the current resources and 02133 /// latencies inside and outside the zone. 02134 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, 02135 bool IsPostRA, 02136 SchedBoundary &CurrZone, 02137 SchedBoundary *OtherZone) { 02138 // Apply preemptive heuristics based on the the total latency and resources 02139 // inside and outside this zone. Potential stalls should be considered before 02140 // following this policy. 02141 02142 // Compute remaining latency. We need this both to determine whether the 02143 // overall schedule has become latency-limited and whether the instructions 02144 // outside this zone are resource or latency limited. 02145 // 02146 // The "dependent" latency is updated incrementally during scheduling as the 02147 // max height/depth of scheduled nodes minus the cycles since it was 02148 // scheduled: 02149 // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone 02150 // 02151 // The "independent" latency is the max ready queue depth: 02152 // ILat = max N.depth for N in Available|Pending 02153 // 02154 // RemainingLatency is the greater of independent and dependent latency. 02155 unsigned RemLatency = CurrZone.getDependentLatency(); 02156 RemLatency = std::max(RemLatency, 02157 CurrZone.findMaxLatency(CurrZone.Available.elements())); 02158 RemLatency = std::max(RemLatency, 02159 CurrZone.findMaxLatency(CurrZone.Pending.elements())); 02160 02161 // Compute the critical resource outside the zone. 02162 unsigned OtherCritIdx = 0; 02163 unsigned OtherCount = 02164 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; 02165 02166 bool OtherResLimited = false; 02167 if (SchedModel->hasInstrSchedModel()) { 02168 unsigned LFactor = SchedModel->getLatencyFactor(); 02169 OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; 02170 } 02171 // Schedule aggressively for latency in PostRA mode. We don't check for 02172 // acyclic latency during PostRA, and highly out-of-order processors will 02173 // skip PostRA scheduling. 02174 if (!OtherResLimited) { 02175 if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { 02176 Policy.ReduceLatency |= true; 02177 DEBUG(dbgs() << " " << CurrZone.Available.getName() 02178 << " RemainingLatency " << RemLatency << " + " 02179 << CurrZone.getCurrCycle() << "c > CritPath " 02180 << Rem.CriticalPath << "\n"); 02181 } 02182 } 02183 // If the same resource is limiting inside and outside the zone, do nothing. 02184 if (CurrZone.getZoneCritResIdx() == OtherCritIdx) 02185 return; 02186 02187 DEBUG( 02188 if (CurrZone.isResourceLimited()) { 02189 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " 02190 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) 02191 << "\n"; 02192 } 02193 if (OtherResLimited) 02194 dbgs() << " RemainingLimit: " 02195 << SchedModel->getResourceName(OtherCritIdx) << "\n"; 02196 if (!CurrZone.isResourceLimited() && !OtherResLimited) 02197 dbgs() << " Latency limited both directions.\n"); 02198 02199 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) 02200 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); 02201 02202 if (OtherResLimited) 02203 Policy.DemandResIdx = OtherCritIdx; 02204 } 02205 02206 #ifndef NDEBUG 02207 const char *GenericSchedulerBase::getReasonStr( 02208 GenericSchedulerBase::CandReason Reason) { 02209 switch (Reason) { 02210 case NoCand: return "NOCAND "; 02211 case PhysRegCopy: return "PREG-COPY"; 02212 case RegExcess: return "REG-EXCESS"; 02213 case RegCritical: return "REG-CRIT "; 02214 case Stall: return "STALL "; 02215 case Cluster: return "CLUSTER "; 02216 case Weak: return "WEAK "; 02217 case RegMax: return "REG-MAX "; 02218 case ResourceReduce: return "RES-REDUCE"; 02219 case ResourceDemand: return "RES-DEMAND"; 02220 case TopDepthReduce: return "TOP-DEPTH "; 02221 case TopPathReduce: return "TOP-PATH "; 02222 case BotHeightReduce:return "BOT-HEIGHT"; 02223 case BotPathReduce: return "BOT-PATH "; 02224 case NextDefUse: return "DEF-USE "; 02225 case NodeOrder: return "ORDER "; 02226 }; 02227 llvm_unreachable("Unknown reason!"); 02228 } 02229 02230 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { 02231 PressureChange P; 02232 unsigned ResIdx = 0; 02233 unsigned Latency = 0; 02234 switch (Cand.Reason) { 02235 default: 02236 break; 02237 case RegExcess: 02238 P = Cand.RPDelta.Excess; 02239 break; 02240 case RegCritical: 02241 P = Cand.RPDelta.CriticalMax; 02242 break; 02243 case RegMax: 02244 P = Cand.RPDelta.CurrentMax; 02245 break; 02246 case ResourceReduce: 02247 ResIdx = Cand.Policy.ReduceResIdx; 02248 break; 02249 case ResourceDemand: 02250 ResIdx = Cand.Policy.DemandResIdx; 02251 break; 02252 case TopDepthReduce: 02253 Latency = Cand.SU->getDepth(); 02254 break; 02255 case TopPathReduce: 02256 Latency = Cand.SU->getHeight(); 02257 break; 02258 case BotHeightReduce: 02259 Latency = Cand.SU->getHeight(); 02260 break; 02261 case BotPathReduce: 02262 Latency = Cand.SU->getDepth(); 02263 break; 02264 } 02265 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 02266 if (P.isValid()) 02267 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) 02268 << ":" << P.getUnitInc() << " "; 02269 else 02270 dbgs() << " "; 02271 if (ResIdx) 02272 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 02273 else 02274 dbgs() << " "; 02275 if (Latency) 02276 dbgs() << " " << Latency << " cycles "; 02277 else 02278 dbgs() << " "; 02279 dbgs() << '\n'; 02280 } 02281 #endif 02282 02283 /// Return true if this heuristic determines order. 02284 static bool tryLess(int TryVal, int CandVal, 02285 GenericSchedulerBase::SchedCandidate &TryCand, 02286 GenericSchedulerBase::SchedCandidate &Cand, 02287 GenericSchedulerBase::CandReason Reason) { 02288 if (TryVal < CandVal) { 02289 TryCand.Reason = Reason; 02290 return true; 02291 } 02292 if (TryVal > CandVal) { 02293 if (Cand.Reason > Reason) 02294 Cand.Reason = Reason; 02295 return true; 02296 } 02297 Cand.setRepeat(Reason); 02298 return false; 02299 } 02300 02301 static bool tryGreater(int TryVal, int CandVal, 02302 GenericSchedulerBase::SchedCandidate &TryCand, 02303 GenericSchedulerBase::SchedCandidate &Cand, 02304 GenericSchedulerBase::CandReason Reason) { 02305 if (TryVal > CandVal) { 02306 TryCand.Reason = Reason; 02307 return true; 02308 } 02309 if (TryVal < CandVal) { 02310 if (Cand.Reason > Reason) 02311 Cand.Reason = Reason; 02312 return true; 02313 } 02314 Cand.setRepeat(Reason); 02315 return false; 02316 } 02317 02318 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 02319 GenericSchedulerBase::SchedCandidate &Cand, 02320 SchedBoundary &Zone) { 02321 if (Zone.isTop()) { 02322 if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { 02323 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 02324 TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) 02325 return true; 02326 } 02327 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 02328 TryCand, Cand, GenericSchedulerBase::TopPathReduce)) 02329 return true; 02330 } 02331 else { 02332 if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { 02333 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 02334 TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) 02335 return true; 02336 } 02337 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 02338 TryCand, Cand, GenericSchedulerBase::BotPathReduce)) 02339 return true; 02340 } 02341 return false; 02342 } 02343 02344 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, 02345 bool IsTop) { 02346 DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 02347 << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); 02348 } 02349 02350 void GenericScheduler::initialize(ScheduleDAGMI *dag) { 02351 assert(dag->hasVRegLiveness() && 02352 "(PreRA)GenericScheduler needs vreg liveness"); 02353 DAG = static_cast<ScheduleDAGMILive*>(dag); 02354 SchedModel = DAG->getSchedModel(); 02355 TRI = DAG->TRI; 02356 02357 Rem.init(DAG, SchedModel); 02358 Top.init(DAG, SchedModel, &Rem); 02359 Bot.init(DAG, SchedModel, &Rem); 02360 02361 // Initialize resource counts. 02362 02363 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 02364 // are disabled, then these HazardRecs will be disabled. 02365 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 02366 const TargetMachine &TM = DAG->MF.getTarget(); 02367 if (!Top.HazardRec) { 02368 Top.HazardRec = 02369 TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( 02370 Itin, DAG); 02371 } 02372 if (!Bot.HazardRec) { 02373 Bot.HazardRec = 02374 TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( 02375 Itin, DAG); 02376 } 02377 } 02378 02379 /// Initialize the per-region scheduling policy. 02380 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 02381 MachineBasicBlock::iterator End, 02382 unsigned NumRegionInstrs) { 02383 const TargetMachine &TM = Context->MF->getTarget(); 02384 const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); 02385 02386 // Avoid setting up the register pressure tracker for small regions to save 02387 // compile time. As a rough heuristic, only track pressure when the number of 02388 // schedulable instructions exceeds half the integer register file. 02389 RegionPolicy.ShouldTrackPressure = true; 02390 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { 02391 MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; 02392 if (TLI->isTypeLegal(LegalIntVT)) { 02393 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 02394 TLI->getRegClassFor(LegalIntVT)); 02395 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 02396 } 02397 } 02398 02399 // For generic targets, we default to bottom-up, because it's simpler and more 02400 // compile-time optimizations have been implemented in that direction. 02401 RegionPolicy.OnlyBottomUp = true; 02402 02403 // Allow the subtarget to override default policy. 02404 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 02405 ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); 02406 02407 // After subtarget overrides, apply command line options. 02408 if (!EnableRegPressure) 02409 RegionPolicy.ShouldTrackPressure = false; 02410 02411 // Check -misched-topdown/bottomup can force or unforce scheduling direction. 02412 // e.g. -misched-bottomup=false allows scheduling in both directions. 02413 assert((!ForceTopDown || !ForceBottomUp) && 02414 "-misched-topdown incompatible with -misched-bottomup"); 02415 if (ForceBottomUp.getNumOccurrences() > 0) { 02416 RegionPolicy.OnlyBottomUp = ForceBottomUp; 02417 if (RegionPolicy.OnlyBottomUp) 02418 RegionPolicy.OnlyTopDown = false; 02419 } 02420 if (ForceTopDown.getNumOccurrences() > 0) { 02421 RegionPolicy.OnlyTopDown = ForceTopDown; 02422 if (RegionPolicy.OnlyTopDown) 02423 RegionPolicy.OnlyBottomUp = false; 02424 } 02425 } 02426 02427 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic 02428 /// critical path by more cycles than it takes to drain the instruction buffer. 02429 /// We estimate an upper bounds on in-flight instructions as: 02430 /// 02431 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) 02432 /// InFlightIterations = AcyclicPath / CyclesPerIteration 02433 /// InFlightResources = InFlightIterations * LoopResources 02434 /// 02435 /// TODO: Check execution resources in addition to IssueCount. 02436 void GenericScheduler::checkAcyclicLatency() { 02437 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) 02438 return; 02439 02440 // Scaled number of cycles per loop iteration. 02441 unsigned IterCount = 02442 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), 02443 Rem.RemIssueCount); 02444 // Scaled acyclic critical path. 02445 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); 02446 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop 02447 unsigned InFlightCount = 02448 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; 02449 unsigned BufferLimit = 02450 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); 02451 02452 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; 02453 02454 DEBUG(dbgs() << "IssueCycles=" 02455 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " 02456 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() 02457 << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount 02458 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() 02459 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; 02460 if (Rem.IsAcyclicLatencyLimited) 02461 dbgs() << " ACYCLIC LATENCY LIMIT\n"); 02462 } 02463 02464 void GenericScheduler::registerRoots() { 02465 Rem.CriticalPath = DAG->ExitSU.getDepth(); 02466 02467 // Some roots may not feed into ExitSU. Check all of them in case. 02468 for (std::vector<SUnit*>::const_iterator 02469 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 02470 if ((*I)->getDepth() > Rem.CriticalPath) 02471 Rem.CriticalPath = (*I)->getDepth(); 02472 } 02473 DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); 02474 if (DumpCriticalPathLength) { 02475 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; 02476 } 02477 02478 if (EnableCyclicPath) { 02479 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); 02480 checkAcyclicLatency(); 02481 } 02482 } 02483 02484 static bool tryPressure(const PressureChange &TryP, 02485 const PressureChange &CandP, 02486 GenericSchedulerBase::SchedCandidate &TryCand, 02487 GenericSchedulerBase::SchedCandidate &Cand, 02488 GenericSchedulerBase::CandReason Reason) { 02489 int TryRank = TryP.getPSetOrMax(); 02490 int CandRank = CandP.getPSetOrMax(); 02491 // If both candidates affect the same set, go with the smallest increase. 02492 if (TryRank == CandRank) { 02493 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, 02494 Reason); 02495 } 02496 // If one candidate decreases and the other increases, go with it. 02497 // Invalid candidates have UnitInc==0. 02498 if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, 02499 Reason)) { 02500 return true; 02501 } 02502 // If the candidates are decreasing pressure, reverse priority. 02503 if (TryP.getUnitInc() < 0) 02504 std::swap(TryRank, CandRank); 02505 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); 02506 } 02507 02508 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 02509 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 02510 } 02511 02512 /// Minimize physical register live ranges. Regalloc wants them adjacent to 02513 /// their physreg def/use. 02514 /// 02515 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 02516 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 02517 /// with the operation that produces or consumes the physreg. We'll do this when 02518 /// regalloc has support for parallel copies. 02519 static int biasPhysRegCopy(const SUnit *SU, bool isTop) { 02520 const MachineInstr *MI = SU->getInstr(); 02521 if (!MI->isCopy()) 02522 return 0; 02523 02524 unsigned ScheduledOper = isTop ? 1 : 0; 02525 unsigned UnscheduledOper = isTop ? 0 : 1; 02526 // If we have already scheduled the physreg produce/consumer, immediately 02527 // schedule the copy. 02528 if (TargetRegisterInfo::isPhysicalRegister( 02529 MI->getOperand(ScheduledOper).getReg())) 02530 return 1; 02531 // If the physreg is at the boundary, defer it. Otherwise schedule it 02532 // immediately to free the dependent. We can hoist the copy later. 02533 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 02534 if (TargetRegisterInfo::isPhysicalRegister( 02535 MI->getOperand(UnscheduledOper).getReg())) 02536 return AtBoundary ? -1 : 1; 02537 return 0; 02538 } 02539 02540 /// Apply a set of heursitics to a new candidate. Heuristics are currently 02541 /// hierarchical. This may be more efficient than a graduated cost model because 02542 /// we don't need to evaluate all aspects of the model for each node in the 02543 /// queue. But it's really done to make the heuristics easier to debug and 02544 /// statistically analyze. 02545 /// 02546 /// \param Cand provides the policy and current best candidate. 02547 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 02548 /// \param Zone describes the scheduled zone that we are extending. 02549 /// \param RPTracker describes reg pressure within the scheduled zone. 02550 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 02551 void GenericScheduler::tryCandidate(SchedCandidate &Cand, 02552 SchedCandidate &TryCand, 02553 SchedBoundary &Zone, 02554 const RegPressureTracker &RPTracker, 02555 RegPressureTracker &TempTracker) { 02556 02557 if (DAG->isTrackingPressure()) { 02558 // Always initialize TryCand's RPDelta. 02559 if (Zone.isTop()) { 02560 TempTracker.getMaxDownwardPressureDelta( 02561 TryCand.SU->getInstr(), 02562 TryCand.RPDelta, 02563 DAG->getRegionCriticalPSets(), 02564 DAG->getRegPressure().MaxSetPressure); 02565 } 02566 else { 02567 if (VerifyScheduling) { 02568 TempTracker.getMaxUpwardPressureDelta( 02569 TryCand.SU->getInstr(), 02570 &DAG->getPressureDiff(TryCand.SU), 02571 TryCand.RPDelta, 02572 DAG->getRegionCriticalPSets(), 02573 DAG->getRegPressure().MaxSetPressure); 02574 } 02575 else { 02576 RPTracker.getUpwardPressureDelta( 02577 TryCand.SU->getInstr(), 02578 DAG->getPressureDiff(TryCand.SU), 02579 TryCand.RPDelta, 02580 DAG->getRegionCriticalPSets(), 02581 DAG->getRegPressure().MaxSetPressure); 02582 } 02583 } 02584 } 02585 DEBUG(if (TryCand.RPDelta.Excess.isValid()) 02586 dbgs() << " SU(" << TryCand.SU->NodeNum << ") " 02587 << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) 02588 << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); 02589 02590 // Initialize the candidate if needed. 02591 if (!Cand.isValid()) { 02592 TryCand.Reason = NodeOrder; 02593 return; 02594 } 02595 02596 if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), 02597 biasPhysRegCopy(Cand.SU, Zone.isTop()), 02598 TryCand, Cand, PhysRegCopy)) 02599 return; 02600 02601 // Avoid exceeding the target's limit. If signed PSetID is negative, it is 02602 // invalid; convert it to INT_MAX to give it lowest priority. 02603 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, 02604 Cand.RPDelta.Excess, 02605 TryCand, Cand, RegExcess)) 02606 return; 02607 02608 // Avoid increasing the max critical pressure in the scheduled region. 02609 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, 02610 Cand.RPDelta.CriticalMax, 02611 TryCand, Cand, RegCritical)) 02612 return; 02613 02614 // For loops that are acyclic path limited, aggressively schedule for latency. 02615 // This can result in very long dependence chains scheduled in sequence, so 02616 // once every cycle (when CurrMOps == 0), switch to normal heuristics. 02617 if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() 02618 && tryLatency(TryCand, Cand, Zone)) 02619 return; 02620 02621 // Prioritize instructions that read unbuffered resources by stall cycles. 02622 if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), 02623 Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 02624 return; 02625 02626 // Keep clustered nodes together to encourage downstream peephole 02627 // optimizations which may reduce resource requirements. 02628 // 02629 // This is a best effort to set things up for a post-RA pass. Optimizations 02630 // like generating loads of multiple registers should ideally be done within 02631 // the scheduler pass by combining the loads during DAG postprocessing. 02632 const SUnit *NextClusterSU = 02633 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 02634 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 02635 TryCand, Cand, Cluster)) 02636 return; 02637 02638 // Weak edges are for clustering and other constraints. 02639 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 02640 getWeakLeft(Cand.SU, Zone.isTop()), 02641 TryCand, Cand, Weak)) { 02642 return; 02643 } 02644 // Avoid increasing the max pressure of the entire region. 02645 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, 02646 Cand.RPDelta.CurrentMax, 02647 TryCand, Cand, RegMax)) 02648 return; 02649 02650 // Avoid critical resource consumption and balance the schedule. 02651 TryCand.initResourceDelta(DAG, SchedModel); 02652 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 02653 TryCand, Cand, ResourceReduce)) 02654 return; 02655 if (tryGreater(TryCand.ResDelta.DemandedResources, 02656 Cand.ResDelta.DemandedResources, 02657 TryCand, Cand, ResourceDemand)) 02658 return; 02659 02660 // Avoid serializing long latency dependence chains. 02661 // For acyclic path limited loops, latency was already checked above. 02662 if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited 02663 && tryLatency(TryCand, Cand, Zone)) { 02664 return; 02665 } 02666 02667 // Prefer immediate defs/users of the last scheduled instruction. This is a 02668 // local pressure avoidance strategy that also makes the machine code 02669 // readable. 02670 if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), 02671 TryCand, Cand, NextDefUse)) 02672 return; 02673 02674 // Fall through to original instruction order. 02675 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 02676 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 02677 TryCand.Reason = NodeOrder; 02678 } 02679 } 02680 02681 /// Pick the best candidate from the queue. 02682 /// 02683 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 02684 /// DAG building. To adjust for the current scheduling location we need to 02685 /// maintain the number of vreg uses remaining to be top-scheduled. 02686 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 02687 const RegPressureTracker &RPTracker, 02688 SchedCandidate &Cand) { 02689 ReadyQueue &Q = Zone.Available; 02690 02691 DEBUG(Q.dump()); 02692 02693 // getMaxPressureDelta temporarily modifies the tracker. 02694 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 02695 02696 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 02697 02698 SchedCandidate TryCand(Cand.Policy); 02699 TryCand.SU = *I; 02700 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 02701 if (TryCand.Reason != NoCand) { 02702 // Initialize resource delta if needed in case future heuristics query it. 02703 if (TryCand.ResDelta == SchedResourceDelta()) 02704 TryCand.initResourceDelta(DAG, SchedModel); 02705 Cand.setBest(TryCand); 02706 DEBUG(traceCandidate(Cand)); 02707 } 02708 } 02709 } 02710 02711 /// Pick the best candidate node from either the top or bottom queue. 02712 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { 02713 // Schedule as far as possible in the direction of no choice. This is most 02714 // efficient, but also provides the best heuristics for CriticalPSets. 02715 if (SUnit *SU = Bot.pickOnlyChoice()) { 02716 IsTopNode = false; 02717 DEBUG(dbgs() << "Pick Bot NOCAND\n"); 02718 return SU; 02719 } 02720 if (SUnit *SU = Top.pickOnlyChoice()) { 02721 IsTopNode = true; 02722 DEBUG(dbgs() << "Pick Top NOCAND\n"); 02723 return SU; 02724 } 02725 CandPolicy NoPolicy; 02726 SchedCandidate BotCand(NoPolicy); 02727 SchedCandidate TopCand(NoPolicy); 02728 // Set the bottom-up policy based on the state of the current bottom zone and 02729 // the instructions outside the zone, including the top zone. 02730 setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); 02731 // Set the top-down policy based on the state of the current top zone and 02732 // the instructions outside the zone, including the bottom zone. 02733 setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); 02734 02735 // Prefer bottom scheduling when heuristics are silent. 02736 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 02737 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 02738 02739 // If either Q has a single candidate that provides the least increase in 02740 // Excess pressure, we can immediately schedule from that Q. 02741 // 02742 // RegionCriticalPSets summarizes the pressure within the scheduled region and 02743 // affects picking from either Q. If scheduling in one direction must 02744 // increase pressure for one of the excess PSets, then schedule in that 02745 // direction first to provide more freedom in the other direction. 02746 if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) 02747 || (BotCand.Reason == RegCritical 02748 && !BotCand.isRepeat(RegCritical))) 02749 { 02750 IsTopNode = false; 02751 tracePick(BotCand, IsTopNode); 02752 return BotCand.SU; 02753 } 02754 // Check if the top Q has a better candidate. 02755 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 02756 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 02757 02758 // Choose the queue with the most important (lowest enum) reason. 02759 if (TopCand.Reason < BotCand.Reason) { 02760 IsTopNode = true; 02761 tracePick(TopCand, IsTopNode); 02762 return TopCand.SU; 02763 } 02764 // Otherwise prefer the bottom candidate, in node order if all else failed. 02765 IsTopNode = false; 02766 tracePick(BotCand, IsTopNode); 02767 return BotCand.SU; 02768 } 02769 02770 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 02771 SUnit *GenericScheduler::pickNode(bool &IsTopNode) { 02772 if (DAG->top() == DAG->bottom()) { 02773 assert(Top.Available.empty() && Top.Pending.empty() && 02774 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 02775 return nullptr; 02776 } 02777 SUnit *SU; 02778 do { 02779 if (RegionPolicy.OnlyTopDown) { 02780 SU = Top.pickOnlyChoice(); 02781 if (!SU) { 02782 CandPolicy NoPolicy; 02783 SchedCandidate TopCand(NoPolicy); 02784 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 02785 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 02786 tracePick(TopCand, true); 02787 SU = TopCand.SU; 02788 } 02789 IsTopNode = true; 02790 } 02791 else if (RegionPolicy.OnlyBottomUp) { 02792 SU = Bot.pickOnlyChoice(); 02793 if (!SU) { 02794 CandPolicy NoPolicy; 02795 SchedCandidate BotCand(NoPolicy); 02796 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 02797 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 02798 tracePick(BotCand, false); 02799 SU = BotCand.SU; 02800 } 02801 IsTopNode = false; 02802 } 02803 else { 02804 SU = pickNodeBidirectional(IsTopNode); 02805 } 02806 } while (SU->isScheduled); 02807 02808 if (SU->isTopReady()) 02809 Top.removeReady(SU); 02810 if (SU->isBottomReady()) 02811 Bot.removeReady(SU); 02812 02813 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 02814 return SU; 02815 } 02816 02817 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { 02818 02819 MachineBasicBlock::iterator InsertPos = SU->getInstr(); 02820 if (!isTop) 02821 ++InsertPos; 02822 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 02823 02824 // Find already scheduled copies with a single physreg dependence and move 02825 // them just above the scheduled instruction. 02826 for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); 02827 I != E; ++I) { 02828 if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) 02829 continue; 02830 SUnit *DepSU = I->getSUnit(); 02831 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 02832 continue; 02833 MachineInstr *Copy = DepSU->getInstr(); 02834 if (!Copy->isCopy()) 02835 continue; 02836 DEBUG(dbgs() << " Rescheduling physreg copy "; 02837 I->getSUnit()->dump(DAG)); 02838 DAG->moveInstruction(Copy, InsertPos); 02839 } 02840 } 02841 02842 /// Update the scheduler's state after scheduling a node. This is the same node 02843 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to 02844 /// update it's state based on the current cycle before MachineSchedStrategy 02845 /// does. 02846 /// 02847 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 02848 /// them here. See comments in biasPhysRegCopy. 02849 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 02850 if (IsTopNode) { 02851 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 02852 Top.bumpNode(SU); 02853 if (SU->hasPhysRegUses) 02854 reschedulePhysRegCopies(SU, true); 02855 } 02856 else { 02857 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); 02858 Bot.bumpNode(SU); 02859 if (SU->hasPhysRegDefs) 02860 reschedulePhysRegCopies(SU, false); 02861 } 02862 } 02863 02864 /// Create the standard converging machine scheduler. This will be used as the 02865 /// default scheduler if the target does not set a default. 02866 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { 02867 ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); 02868 // Register DAG post-processors. 02869 // 02870 // FIXME: extend the mutation API to allow earlier mutations to instantiate 02871 // data and pass it to later mutations. Have a single mutation that gathers 02872 // the interesting nodes in one pass. 02873 DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); 02874 if (EnableLoadCluster && DAG->TII->enableClusterLoads()) 02875 DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); 02876 if (EnableMacroFusion) 02877 DAG->addMutation(make_unique<MacroFusion>(DAG->TII)); 02878 return DAG; 02879 } 02880 02881 static MachineSchedRegistry 02882 GenericSchedRegistry("converge", "Standard converging scheduler.", 02883 createGenericSchedLive); 02884 02885 //===----------------------------------------------------------------------===// 02886 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. 02887 //===----------------------------------------------------------------------===// 02888 02889 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { 02890 DAG = Dag; 02891 SchedModel = DAG->getSchedModel(); 02892 TRI = DAG->TRI; 02893 02894 Rem.init(DAG, SchedModel); 02895 Top.init(DAG, SchedModel, &Rem); 02896 BotRoots.clear(); 02897 02898 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, 02899 // or are disabled, then these HazardRecs will be disabled. 02900 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 02901 const TargetMachine &TM = DAG->MF.getTarget(); 02902 if (!Top.HazardRec) { 02903 Top.HazardRec = 02904 TM.getSubtargetImpl()->getInstrInfo()->CreateTargetMIHazardRecognizer( 02905 Itin, DAG); 02906 } 02907 } 02908 02909 02910 void PostGenericScheduler::registerRoots() { 02911 Rem.CriticalPath = DAG->ExitSU.getDepth(); 02912 02913 // Some roots may not feed into ExitSU. Check all of them in case. 02914 for (SmallVectorImpl<SUnit*>::const_iterator 02915 I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { 02916 if ((*I)->getDepth() > Rem.CriticalPath) 02917 Rem.CriticalPath = (*I)->getDepth(); 02918 } 02919 DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); 02920 if (DumpCriticalPathLength) { 02921 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; 02922 } 02923 } 02924 02925 /// Apply a set of heursitics to a new candidate for PostRA scheduling. 02926 /// 02927 /// \param Cand provides the policy and current best candidate. 02928 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 02929 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, 02930 SchedCandidate &TryCand) { 02931 02932 // Initialize the candidate if needed. 02933 if (!Cand.isValid()) { 02934 TryCand.Reason = NodeOrder; 02935 return; 02936 } 02937 02938 // Prioritize instructions that read unbuffered resources by stall cycles. 02939 if (tryLess(Top.getLatencyStallCycles(TryCand.SU), 02940 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 02941 return; 02942 02943 // Avoid critical resource consumption and balance the schedule. 02944 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 02945 TryCand, Cand, ResourceReduce)) 02946 return; 02947 if (tryGreater(TryCand.ResDelta.DemandedResources, 02948 Cand.ResDelta.DemandedResources, 02949 TryCand, Cand, ResourceDemand)) 02950 return; 02951 02952 // Avoid serializing long latency dependence chains. 02953 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { 02954 return; 02955 } 02956 02957 // Fall through to original instruction order. 02958 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) 02959 TryCand.Reason = NodeOrder; 02960 } 02961 02962 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { 02963 ReadyQueue &Q = Top.Available; 02964 02965 DEBUG(Q.dump()); 02966 02967 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 02968 SchedCandidate TryCand(Cand.Policy); 02969 TryCand.SU = *I; 02970 TryCand.initResourceDelta(DAG, SchedModel); 02971 tryCandidate(Cand, TryCand); 02972 if (TryCand.Reason != NoCand) { 02973 Cand.setBest(TryCand); 02974 DEBUG(traceCandidate(Cand)); 02975 } 02976 } 02977 } 02978 02979 /// Pick the next node to schedule. 02980 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { 02981 if (DAG->top() == DAG->bottom()) { 02982 assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); 02983 return nullptr; 02984 } 02985 SUnit *SU; 02986 do { 02987 SU = Top.pickOnlyChoice(); 02988 if (!SU) { 02989 CandPolicy NoPolicy; 02990 SchedCandidate TopCand(NoPolicy); 02991 // Set the top-down policy based on the state of the current top zone and 02992 // the instructions outside the zone, including the bottom zone. 02993 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); 02994 pickNodeFromQueue(TopCand); 02995 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 02996 tracePick(TopCand, true); 02997 SU = TopCand.SU; 02998 } 02999 } while (SU->isScheduled); 03000 03001 IsTopNode = true; 03002 Top.removeReady(SU); 03003 03004 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 03005 return SU; 03006 } 03007 03008 /// Called after ScheduleDAGMI has scheduled an instruction and updated 03009 /// scheduled/remaining flags in the DAG nodes. 03010 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 03011 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 03012 Top.bumpNode(SU); 03013 } 03014 03015 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 03016 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { 03017 return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true); 03018 } 03019 03020 //===----------------------------------------------------------------------===// 03021 // ILP Scheduler. Currently for experimental analysis of heuristics. 03022 //===----------------------------------------------------------------------===// 03023 03024 namespace { 03025 /// \brief Order nodes by the ILP metric. 03026 struct ILPOrder { 03027 const SchedDFSResult *DFSResult; 03028 const BitVector *ScheduledTrees; 03029 bool MaximizeILP; 03030 03031 ILPOrder(bool MaxILP) 03032 : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} 03033 03034 /// \brief Apply a less-than relation on node priority. 03035 /// 03036 /// (Return true if A comes after B in the Q.) 03037 bool operator()(const SUnit *A, const SUnit *B) const { 03038 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 03039 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 03040 if (SchedTreeA != SchedTreeB) { 03041 // Unscheduled trees have lower priority. 03042 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 03043 return ScheduledTrees->test(SchedTreeB); 03044 03045 // Trees with shallower connections have have lower priority. 03046 if (DFSResult->getSubtreeLevel(SchedTreeA) 03047 != DFSResult->getSubtreeLevel(SchedTreeB)) { 03048 return DFSResult->getSubtreeLevel(SchedTreeA) 03049 < DFSResult->getSubtreeLevel(SchedTreeB); 03050 } 03051 } 03052 if (MaximizeILP) 03053 return DFSResult->getILP(A) < DFSResult->getILP(B); 03054 else 03055 return DFSResult->getILP(A) > DFSResult->getILP(B); 03056 } 03057 }; 03058 03059 /// \brief Schedule based on the ILP metric. 03060 class ILPScheduler : public MachineSchedStrategy { 03061 ScheduleDAGMILive *DAG; 03062 ILPOrder Cmp; 03063 03064 std::vector<SUnit*> ReadyQ; 03065 public: 03066 ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} 03067 03068 void initialize(ScheduleDAGMI *dag) override { 03069 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); 03070 DAG = static_cast<ScheduleDAGMILive*>(dag); 03071 DAG->computeDFSResult(); 03072 Cmp.DFSResult = DAG->getDFSResult(); 03073 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 03074 ReadyQ.clear(); 03075 } 03076 03077 void registerRoots() override { 03078 // Restore the heap in ReadyQ with the updated DFS results. 03079 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 03080 } 03081 03082 /// Implement MachineSchedStrategy interface. 03083 /// ----------------------------------------- 03084 03085 /// Callback to select the highest priority node from the ready Q. 03086 SUnit *pickNode(bool &IsTopNode) override { 03087 if (ReadyQ.empty()) return nullptr; 03088 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 03089 SUnit *SU = ReadyQ.back(); 03090 ReadyQ.pop_back(); 03091 IsTopNode = false; 03092 DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " 03093 << " ILP: " << DAG->getDFSResult()->getILP(SU) 03094 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" 03095 << DAG->getDFSResult()->getSubtreeLevel( 03096 DAG->getDFSResult()->getSubtreeID(SU)) << '\n' 03097 << "Scheduling " << *SU->getInstr()); 03098 return SU; 03099 } 03100 03101 /// \brief Scheduler callback to notify that a new subtree is scheduled. 03102 void scheduleTree(unsigned SubtreeID) override { 03103 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 03104 } 03105 03106 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 03107 /// DFSResults, and resort the priority Q. 03108 void schedNode(SUnit *SU, bool IsTopNode) override { 03109 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 03110 } 03111 03112 void releaseTopNode(SUnit *) override { /*only called for top roots*/ } 03113 03114 void releaseBottomNode(SUnit *SU) override { 03115 ReadyQ.push_back(SU); 03116 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 03117 } 03118 }; 03119 } // namespace 03120 03121 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 03122 return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true)); 03123 } 03124 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 03125 return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false)); 03126 } 03127 static MachineSchedRegistry ILPMaxRegistry( 03128 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 03129 static MachineSchedRegistry ILPMinRegistry( 03130 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 03131 03132 //===----------------------------------------------------------------------===// 03133 // Machine Instruction Shuffler for Correctness Testing 03134 //===----------------------------------------------------------------------===// 03135 03136 #ifndef NDEBUG 03137 namespace { 03138 /// Apply a less-than relation on the node order, which corresponds to the 03139 /// instruction order prior to scheduling. IsReverse implements greater-than. 03140 template<bool IsReverse> 03141 struct SUnitOrder { 03142 bool operator()(SUnit *A, SUnit *B) const { 03143 if (IsReverse) 03144 return A->NodeNum > B->NodeNum; 03145 else 03146 return A->NodeNum < B->NodeNum; 03147 } 03148 }; 03149 03150 /// Reorder instructions as much as possible. 03151 class InstructionShuffler : public MachineSchedStrategy { 03152 bool IsAlternating; 03153 bool IsTopDown; 03154 03155 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 03156 // gives nodes with a higher number higher priority causing the latest 03157 // instructions to be scheduled first. 03158 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 03159 TopQ; 03160 // When scheduling bottom-up, use greater-than as the queue priority. 03161 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 03162 BottomQ; 03163 public: 03164 InstructionShuffler(bool alternate, bool topdown) 03165 : IsAlternating(alternate), IsTopDown(topdown) {} 03166 03167 void initialize(ScheduleDAGMI*) override { 03168 TopQ.clear(); 03169 BottomQ.clear(); 03170 } 03171 03172 /// Implement MachineSchedStrategy interface. 03173 /// ----------------------------------------- 03174 03175 SUnit *pickNode(bool &IsTopNode) override { 03176 SUnit *SU; 03177 if (IsTopDown) { 03178 do { 03179 if (TopQ.empty()) return nullptr; 03180 SU = TopQ.top(); 03181 TopQ.pop(); 03182 } while (SU->isScheduled); 03183 IsTopNode = true; 03184 } 03185 else { 03186 do { 03187 if (BottomQ.empty()) return nullptr; 03188 SU = BottomQ.top(); 03189 BottomQ.pop(); 03190 } while (SU->isScheduled); 03191 IsTopNode = false; 03192 } 03193 if (IsAlternating) 03194 IsTopDown = !IsTopDown; 03195 return SU; 03196 } 03197 03198 void schedNode(SUnit *SU, bool IsTopNode) override {} 03199 03200 void releaseTopNode(SUnit *SU) override { 03201 TopQ.push(SU); 03202 } 03203 void releaseBottomNode(SUnit *SU) override { 03204 BottomQ.push(SU); 03205 } 03206 }; 03207 } // namespace 03208 03209 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 03210 bool Alternate = !ForceTopDown && !ForceBottomUp; 03211 bool TopDown = !ForceBottomUp; 03212 assert((TopDown || !ForceTopDown) && 03213 "-misched-topdown incompatible with -misched-bottomup"); 03214 return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown)); 03215 } 03216 static MachineSchedRegistry ShufflerRegistry( 03217 "shuffle", "Shuffle machine instructions alternating directions", 03218 createInstructionShuffler); 03219 #endif // !NDEBUG 03220 03221 //===----------------------------------------------------------------------===// 03222 // GraphWriter support for ScheduleDAGMILive. 03223 //===----------------------------------------------------------------------===// 03224 03225 #ifndef NDEBUG 03226 namespace llvm { 03227 03228 template<> struct GraphTraits< 03229 ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 03230 03231 template<> 03232 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 03233 03234 DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} 03235 03236 static std::string getGraphName(const ScheduleDAG *G) { 03237 return G->MF.getName(); 03238 } 03239 03240 static bool renderGraphFromBottomUp() { 03241 return true; 03242 } 03243 03244 static bool isNodeHidden(const SUnit *Node) { 03245 return (Node->Preds.size() > 10 || Node->Succs.size() > 10); 03246 } 03247 03248 static bool hasNodeAddressLabel(const SUnit *Node, 03249 const ScheduleDAG *Graph) { 03250 return false; 03251 } 03252 03253 /// If you want to override the dot attributes printed for a particular 03254 /// edge, override this method. 03255 static std::string getEdgeAttributes(const SUnit *Node, 03256 SUnitIterator EI, 03257 const ScheduleDAG *Graph) { 03258 if (EI.isArtificialDep()) 03259 return "color=cyan,style=dashed"; 03260 if (EI.isCtrlDep()) 03261 return "color=blue,style=dashed"; 03262 return ""; 03263 } 03264 03265 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 03266 std::string Str; 03267 raw_string_ostream SS(Str); 03268 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 03269 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 03270 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 03271 SS << "SU:" << SU->NodeNum; 03272 if (DFS) 03273 SS << " I:" << DFS->getNumInstrs(SU); 03274 return SS.str(); 03275 } 03276 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 03277 return G->getGraphNodeLabel(SU); 03278 } 03279 03280 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { 03281 std::string Str("shape=Mrecord"); 03282 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 03283 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 03284 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 03285 if (DFS) { 03286 Str += ",style=filled,fillcolor=\"#"; 03287 Str += DOT::getColorString(DFS->getSubtreeID(N)); 03288 Str += '"'; 03289 } 03290 return Str; 03291 } 03292 }; 03293 } // namespace llvm 03294 #endif // NDEBUG 03295 03296 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 03297 /// rendered using 'dot'. 03298 /// 03299 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 03300 #ifndef NDEBUG 03301 ViewGraph(this, Name, false, Title); 03302 #else 03303 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 03304 << "systems with Graphviz or gv!\n"; 03305 #endif // NDEBUG 03306 } 03307 03308 /// Out-of-line implementation with no arguments is handy for gdb. 03309 void ScheduleDAGMI::viewGraph() { 03310 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 03311 }