LLVM API Documentation
00001 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements bottom-up and top-down register pressure reduction list 00011 // schedulers, using standard algorithms. The basic approach uses a priority 00012 // queue of available nodes to schedule. One at a time, nodes are taken from 00013 // the priority queue (thus in priority order), checked for legality to 00014 // schedule, and emitted if legal. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #include "llvm/CodeGen/SchedulerRegistry.h" 00019 #include "ScheduleDAGSDNodes.h" 00020 #include "llvm/ADT/STLExtras.h" 00021 #include "llvm/ADT/SmallSet.h" 00022 #include "llvm/ADT/Statistic.h" 00023 #include "llvm/CodeGen/MachineRegisterInfo.h" 00024 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 00025 #include "llvm/CodeGen/SelectionDAGISel.h" 00026 #include "llvm/IR/DataLayout.h" 00027 #include "llvm/IR/InlineAsm.h" 00028 #include "llvm/Support/Debug.h" 00029 #include "llvm/Support/ErrorHandling.h" 00030 #include "llvm/Support/raw_ostream.h" 00031 #include "llvm/Target/TargetInstrInfo.h" 00032 #include "llvm/Target/TargetLowering.h" 00033 #include "llvm/Target/TargetMachine.h" 00034 #include "llvm/Target/TargetRegisterInfo.h" 00035 #include "llvm/Target/TargetSubtargetInfo.h" 00036 #include <climits> 00037 using namespace llvm; 00038 00039 #define DEBUG_TYPE "pre-RA-sched" 00040 00041 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 00042 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 00043 STATISTIC(NumDups, "Number of duplicated nodes"); 00044 STATISTIC(NumPRCopies, "Number of physical register copies"); 00045 00046 static RegisterScheduler 00047 burrListDAGScheduler("list-burr", 00048 "Bottom-up register reduction list scheduling", 00049 createBURRListDAGScheduler); 00050 static RegisterScheduler 00051 sourceListDAGScheduler("source", 00052 "Similar to list-burr but schedules in source " 00053 "order when possible", 00054 createSourceListDAGScheduler); 00055 00056 static RegisterScheduler 00057 hybridListDAGScheduler("list-hybrid", 00058 "Bottom-up register pressure aware list scheduling " 00059 "which tries to balance latency and register pressure", 00060 createHybridListDAGScheduler); 00061 00062 static RegisterScheduler 00063 ILPListDAGScheduler("list-ilp", 00064 "Bottom-up register pressure aware list scheduling " 00065 "which tries to balance ILP and register pressure", 00066 createILPListDAGScheduler); 00067 00068 static cl::opt<bool> DisableSchedCycles( 00069 "disable-sched-cycles", cl::Hidden, cl::init(false), 00070 cl::desc("Disable cycle-level precision during preRA scheduling")); 00071 00072 // Temporary sched=list-ilp flags until the heuristics are robust. 00073 // Some options are also available under sched=list-hybrid. 00074 static cl::opt<bool> DisableSchedRegPressure( 00075 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 00076 cl::desc("Disable regpressure priority in sched=list-ilp")); 00077 static cl::opt<bool> DisableSchedLiveUses( 00078 "disable-sched-live-uses", cl::Hidden, cl::init(true), 00079 cl::desc("Disable live use priority in sched=list-ilp")); 00080 static cl::opt<bool> DisableSchedVRegCycle( 00081 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 00082 cl::desc("Disable virtual register cycle interference checks")); 00083 static cl::opt<bool> DisableSchedPhysRegJoin( 00084 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 00085 cl::desc("Disable physreg def-use affinity")); 00086 static cl::opt<bool> DisableSchedStalls( 00087 "disable-sched-stalls", cl::Hidden, cl::init(true), 00088 cl::desc("Disable no-stall priority in sched=list-ilp")); 00089 static cl::opt<bool> DisableSchedCriticalPath( 00090 "disable-sched-critical-path", cl::Hidden, cl::init(false), 00091 cl::desc("Disable critical path priority in sched=list-ilp")); 00092 static cl::opt<bool> DisableSchedHeight( 00093 "disable-sched-height", cl::Hidden, cl::init(false), 00094 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 00095 static cl::opt<bool> Disable2AddrHack( 00096 "disable-2addr-hack", cl::Hidden, cl::init(true), 00097 cl::desc("Disable scheduler's two-address hack")); 00098 00099 static cl::opt<int> MaxReorderWindow( 00100 "max-sched-reorder", cl::Hidden, cl::init(6), 00101 cl::desc("Number of instructions to allow ahead of the critical path " 00102 "in sched=list-ilp")); 00103 00104 static cl::opt<unsigned> AvgIPC( 00105 "sched-avg-ipc", cl::Hidden, cl::init(1), 00106 cl::desc("Average inst/cycle whan no target itinerary exists.")); 00107 00108 namespace { 00109 //===----------------------------------------------------------------------===// 00110 /// ScheduleDAGRRList - The actual register reduction list scheduler 00111 /// implementation. This supports both top-down and bottom-up scheduling. 00112 /// 00113 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 00114 private: 00115 /// NeedLatency - True if the scheduler will make use of latency information. 00116 /// 00117 bool NeedLatency; 00118 00119 /// AvailableQueue - The priority queue to use for the available SUnits. 00120 SchedulingPriorityQueue *AvailableQueue; 00121 00122 /// PendingQueue - This contains all of the instructions whose operands have 00123 /// been issued, but their results are not ready yet (due to the latency of 00124 /// the operation). Once the operands becomes available, the instruction is 00125 /// added to the AvailableQueue. 00126 std::vector<SUnit*> PendingQueue; 00127 00128 /// HazardRec - The hazard recognizer to use. 00129 ScheduleHazardRecognizer *HazardRec; 00130 00131 /// CurCycle - The current scheduler state corresponds to this cycle. 00132 unsigned CurCycle; 00133 00134 /// MinAvailableCycle - Cycle of the soonest available instruction. 00135 unsigned MinAvailableCycle; 00136 00137 /// IssueCount - Count instructions issued in this cycle 00138 /// Currently valid only for bottom-up scheduling. 00139 unsigned IssueCount; 00140 00141 /// LiveRegDefs - A set of physical registers and their definition 00142 /// that are "live". These nodes must be scheduled before any other nodes that 00143 /// modifies the registers can be scheduled. 00144 unsigned NumLiveRegs; 00145 std::vector<SUnit*> LiveRegDefs; 00146 std::vector<SUnit*> LiveRegGens; 00147 00148 // Collect interferences between physical register use/defs. 00149 // Each interference is an SUnit and set of physical registers. 00150 SmallVector<SUnit*, 4> Interferences; 00151 typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; 00152 LRegsMapT LRegsMap; 00153 00154 /// Topo - A topological ordering for SUnits which permits fast IsReachable 00155 /// and similar queries. 00156 ScheduleDAGTopologicalSort Topo; 00157 00158 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 00159 // DAG crawling. 00160 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 00161 00162 public: 00163 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 00164 SchedulingPriorityQueue *availqueue, 00165 CodeGenOpt::Level OptLevel) 00166 : ScheduleDAGSDNodes(mf), 00167 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 00168 Topo(SUnits, nullptr) { 00169 00170 const TargetMachine &tm = mf.getTarget(); 00171 if (DisableSchedCycles || !NeedLatency) 00172 HazardRec = new ScheduleHazardRecognizer(); 00173 else 00174 HazardRec = 00175 tm.getSubtargetImpl()->getInstrInfo()->CreateTargetHazardRecognizer( 00176 tm.getSubtargetImpl(), this); 00177 } 00178 00179 ~ScheduleDAGRRList() { 00180 delete HazardRec; 00181 delete AvailableQueue; 00182 } 00183 00184 void Schedule() override; 00185 00186 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 00187 00188 /// IsReachable - Checks if SU is reachable from TargetSU. 00189 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 00190 return Topo.IsReachable(SU, TargetSU); 00191 } 00192 00193 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 00194 /// create a cycle. 00195 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 00196 return Topo.WillCreateCycle(SU, TargetSU); 00197 } 00198 00199 /// AddPred - adds a predecessor edge to SUnit SU. 00200 /// This returns true if this is a new predecessor. 00201 /// Updates the topological ordering if required. 00202 void AddPred(SUnit *SU, const SDep &D) { 00203 Topo.AddPred(SU, D.getSUnit()); 00204 SU->addPred(D); 00205 } 00206 00207 /// RemovePred - removes a predecessor edge from SUnit SU. 00208 /// This returns true if an edge was removed. 00209 /// Updates the topological ordering if required. 00210 void RemovePred(SUnit *SU, const SDep &D) { 00211 Topo.RemovePred(SU, D.getSUnit()); 00212 SU->removePred(D); 00213 } 00214 00215 private: 00216 bool isReady(SUnit *SU) { 00217 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 00218 AvailableQueue->isReady(SU); 00219 } 00220 00221 void ReleasePred(SUnit *SU, const SDep *PredEdge); 00222 void ReleasePredecessors(SUnit *SU); 00223 void ReleasePending(); 00224 void AdvanceToCycle(unsigned NextCycle); 00225 void AdvancePastStalls(SUnit *SU); 00226 void EmitNode(SUnit *SU); 00227 void ScheduleNodeBottomUp(SUnit*); 00228 void CapturePred(SDep *PredEdge); 00229 void UnscheduleNodeBottomUp(SUnit*); 00230 void RestoreHazardCheckerBottomUp(); 00231 void BacktrackBottomUp(SUnit*, SUnit*); 00232 SUnit *CopyAndMoveSuccessors(SUnit*); 00233 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 00234 const TargetRegisterClass*, 00235 const TargetRegisterClass*, 00236 SmallVectorImpl<SUnit*>&); 00237 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); 00238 00239 void releaseInterferences(unsigned Reg = 0); 00240 00241 SUnit *PickNodeToScheduleBottomUp(); 00242 void ListScheduleBottomUp(); 00243 00244 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 00245 /// Updates the topological ordering if required. 00246 SUnit *CreateNewSUnit(SDNode *N) { 00247 unsigned NumSUnits = SUnits.size(); 00248 SUnit *NewNode = newSUnit(N); 00249 // Update the topological ordering. 00250 if (NewNode->NodeNum >= NumSUnits) 00251 Topo.InitDAGTopologicalSorting(); 00252 return NewNode; 00253 } 00254 00255 /// CreateClone - Creates a new SUnit from an existing one. 00256 /// Updates the topological ordering if required. 00257 SUnit *CreateClone(SUnit *N) { 00258 unsigned NumSUnits = SUnits.size(); 00259 SUnit *NewNode = Clone(N); 00260 // Update the topological ordering. 00261 if (NewNode->NodeNum >= NumSUnits) 00262 Topo.InitDAGTopologicalSorting(); 00263 return NewNode; 00264 } 00265 00266 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 00267 /// need actual latency information but the hybrid scheduler does. 00268 bool forceUnitLatencies() const override { 00269 return !NeedLatency; 00270 } 00271 }; 00272 } // end anonymous namespace 00273 00274 /// GetCostForDef - Looks up the register class and cost for a given definition. 00275 /// Typically this just means looking up the representative register class, 00276 /// but for untyped values (MVT::Untyped) it means inspecting the node's 00277 /// opcode to determine what register class is being generated. 00278 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 00279 const TargetLowering *TLI, 00280 const TargetInstrInfo *TII, 00281 const TargetRegisterInfo *TRI, 00282 unsigned &RegClass, unsigned &Cost, 00283 const MachineFunction &MF) { 00284 MVT VT = RegDefPos.GetValue(); 00285 00286 // Special handling for untyped values. These values can only come from 00287 // the expansion of custom DAG-to-DAG patterns. 00288 if (VT == MVT::Untyped) { 00289 const SDNode *Node = RegDefPos.GetNode(); 00290 00291 // Special handling for CopyFromReg of untyped values. 00292 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { 00293 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 00294 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); 00295 RegClass = RC->getID(); 00296 Cost = 1; 00297 return; 00298 } 00299 00300 unsigned Opcode = Node->getMachineOpcode(); 00301 if (Opcode == TargetOpcode::REG_SEQUENCE) { 00302 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 00303 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 00304 RegClass = RC->getID(); 00305 Cost = 1; 00306 return; 00307 } 00308 00309 unsigned Idx = RegDefPos.GetIdx(); 00310 const MCInstrDesc Desc = TII->get(Opcode); 00311 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); 00312 RegClass = RC->getID(); 00313 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 00314 // better way to determine it. 00315 Cost = 1; 00316 } else { 00317 RegClass = TLI->getRepRegClassFor(VT)->getID(); 00318 Cost = TLI->getRepRegClassCostFor(VT); 00319 } 00320 } 00321 00322 /// Schedule - Schedule the DAG using list scheduling. 00323 void ScheduleDAGRRList::Schedule() { 00324 DEBUG(dbgs() 00325 << "********** List Scheduling BB#" << BB->getNumber() 00326 << " '" << BB->getName() << "' **********\n"); 00327 00328 CurCycle = 0; 00329 IssueCount = 0; 00330 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 00331 NumLiveRegs = 0; 00332 // Allocate slots for each physical register, plus one for a special register 00333 // to track the virtual resource of a calling sequence. 00334 LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); 00335 LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); 00336 CallSeqEndForStart.clear(); 00337 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); 00338 00339 // Build the scheduling graph. 00340 BuildSchedGraph(nullptr); 00341 00342 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 00343 SUnits[su].dumpAll(this)); 00344 Topo.InitDAGTopologicalSorting(); 00345 00346 AvailableQueue->initNodes(SUnits); 00347 00348 HazardRec->Reset(); 00349 00350 // Execute the actual scheduling loop. 00351 ListScheduleBottomUp(); 00352 00353 AvailableQueue->releaseState(); 00354 00355 DEBUG({ 00356 dbgs() << "*** Final schedule ***\n"; 00357 dumpSchedule(); 00358 dbgs() << '\n'; 00359 }); 00360 } 00361 00362 //===----------------------------------------------------------------------===// 00363 // Bottom-Up Scheduling 00364 //===----------------------------------------------------------------------===// 00365 00366 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 00367 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 00368 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 00369 SUnit *PredSU = PredEdge->getSUnit(); 00370 00371 #ifndef NDEBUG 00372 if (PredSU->NumSuccsLeft == 0) { 00373 dbgs() << "*** Scheduling failed! ***\n"; 00374 PredSU->dump(this); 00375 dbgs() << " has been released too many times!\n"; 00376 llvm_unreachable(nullptr); 00377 } 00378 #endif 00379 --PredSU->NumSuccsLeft; 00380 00381 if (!forceUnitLatencies()) { 00382 // Updating predecessor's height. This is now the cycle when the 00383 // predecessor can be scheduled without causing a pipeline stall. 00384 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 00385 } 00386 00387 // If all the node's successors are scheduled, this node is ready 00388 // to be scheduled. Ignore the special EntrySU node. 00389 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 00390 PredSU->isAvailable = true; 00391 00392 unsigned Height = PredSU->getHeight(); 00393 if (Height < MinAvailableCycle) 00394 MinAvailableCycle = Height; 00395 00396 if (isReady(PredSU)) { 00397 AvailableQueue->push(PredSU); 00398 } 00399 // CapturePred and others may have left the node in the pending queue, avoid 00400 // adding it twice. 00401 else if (!PredSU->isPending) { 00402 PredSU->isPending = true; 00403 PendingQueue.push_back(PredSU); 00404 } 00405 } 00406 } 00407 00408 /// IsChainDependent - Test if Outer is reachable from Inner through 00409 /// chain dependencies. 00410 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 00411 unsigned NestLevel, 00412 const TargetInstrInfo *TII) { 00413 SDNode *N = Outer; 00414 for (;;) { 00415 if (N == Inner) 00416 return true; 00417 // For a TokenFactor, examine each operand. There may be multiple ways 00418 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 00419 // most nesting in order to ensure that we find the corresponding match. 00420 if (N->getOpcode() == ISD::TokenFactor) { 00421 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 00422 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 00423 return true; 00424 return false; 00425 } 00426 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 00427 if (N->isMachineOpcode()) { 00428 if (N->getMachineOpcode() == 00429 (unsigned)TII->getCallFrameDestroyOpcode()) { 00430 ++NestLevel; 00431 } else if (N->getMachineOpcode() == 00432 (unsigned)TII->getCallFrameSetupOpcode()) { 00433 if (NestLevel == 0) 00434 return false; 00435 --NestLevel; 00436 } 00437 } 00438 // Otherwise, find the chain and continue climbing. 00439 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 00440 if (N->getOperand(i).getValueType() == MVT::Other) { 00441 N = N->getOperand(i).getNode(); 00442 goto found_chain_operand; 00443 } 00444 return false; 00445 found_chain_operand:; 00446 if (N->getOpcode() == ISD::EntryToken) 00447 return false; 00448 } 00449 } 00450 00451 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 00452 /// the corresponding (lowered) CALLSEQ_BEGIN node. 00453 /// 00454 /// NestLevel and MaxNested are used in recursion to indcate the current level 00455 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 00456 /// level seen so far. 00457 /// 00458 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point 00459 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 00460 static SDNode * 00461 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 00462 const TargetInstrInfo *TII) { 00463 for (;;) { 00464 // For a TokenFactor, examine each operand. There may be multiple ways 00465 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 00466 // most nesting in order to ensure that we find the corresponding match. 00467 if (N->getOpcode() == ISD::TokenFactor) { 00468 SDNode *Best = nullptr; 00469 unsigned BestMaxNest = MaxNest; 00470 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00471 unsigned MyNestLevel = NestLevel; 00472 unsigned MyMaxNest = MaxNest; 00473 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 00474 MyNestLevel, MyMaxNest, TII)) 00475 if (!Best || (MyMaxNest > BestMaxNest)) { 00476 Best = New; 00477 BestMaxNest = MyMaxNest; 00478 } 00479 } 00480 assert(Best); 00481 MaxNest = BestMaxNest; 00482 return Best; 00483 } 00484 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 00485 if (N->isMachineOpcode()) { 00486 if (N->getMachineOpcode() == 00487 (unsigned)TII->getCallFrameDestroyOpcode()) { 00488 ++NestLevel; 00489 MaxNest = std::max(MaxNest, NestLevel); 00490 } else if (N->getMachineOpcode() == 00491 (unsigned)TII->getCallFrameSetupOpcode()) { 00492 assert(NestLevel != 0); 00493 --NestLevel; 00494 if (NestLevel == 0) 00495 return N; 00496 } 00497 } 00498 // Otherwise, find the chain and continue climbing. 00499 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 00500 if (N->getOperand(i).getValueType() == MVT::Other) { 00501 N = N->getOperand(i).getNode(); 00502 goto found_chain_operand; 00503 } 00504 return nullptr; 00505 found_chain_operand:; 00506 if (N->getOpcode() == ISD::EntryToken) 00507 return nullptr; 00508 } 00509 } 00510 00511 /// Call ReleasePred for each predecessor, then update register live def/gen. 00512 /// Always update LiveRegDefs for a register dependence even if the current SU 00513 /// also defines the register. This effectively create one large live range 00514 /// across a sequence of two-address node. This is important because the 00515 /// entire chain must be scheduled together. Example: 00516 /// 00517 /// flags = (3) add 00518 /// flags = (2) addc flags 00519 /// flags = (1) addc flags 00520 /// 00521 /// results in 00522 /// 00523 /// LiveRegDefs[flags] = 3 00524 /// LiveRegGens[flags] = 1 00525 /// 00526 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 00527 /// interference on flags. 00528 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 00529 // Bottom up: release predecessors 00530 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 00531 I != E; ++I) { 00532 ReleasePred(SU, &*I); 00533 if (I->isAssignedRegDep()) { 00534 // This is a physical register dependency and it's impossible or 00535 // expensive to copy the register. Make sure nothing that can 00536 // clobber the register is scheduled between the predecessor and 00537 // this node. 00538 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 00539 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 00540 "interference on register dependence"); 00541 LiveRegDefs[I->getReg()] = I->getSUnit(); 00542 if (!LiveRegGens[I->getReg()]) { 00543 ++NumLiveRegs; 00544 LiveRegGens[I->getReg()] = SU; 00545 } 00546 } 00547 } 00548 00549 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 00550 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 00551 // these nodes, to prevent other calls from being interscheduled with them. 00552 unsigned CallResource = TRI->getNumRegs(); 00553 if (!LiveRegDefs[CallResource]) 00554 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 00555 if (Node->isMachineOpcode() && 00556 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 00557 unsigned NestLevel = 0; 00558 unsigned MaxNest = 0; 00559 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 00560 00561 SUnit *Def = &SUnits[N->getNodeId()]; 00562 CallSeqEndForStart[Def] = SU; 00563 00564 ++NumLiveRegs; 00565 LiveRegDefs[CallResource] = Def; 00566 LiveRegGens[CallResource] = SU; 00567 break; 00568 } 00569 } 00570 00571 /// Check to see if any of the pending instructions are ready to issue. If 00572 /// so, add them to the available queue. 00573 void ScheduleDAGRRList::ReleasePending() { 00574 if (DisableSchedCycles) { 00575 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 00576 return; 00577 } 00578 00579 // If the available queue is empty, it is safe to reset MinAvailableCycle. 00580 if (AvailableQueue->empty()) 00581 MinAvailableCycle = UINT_MAX; 00582 00583 // Check to see if any of the pending instructions are ready to issue. If 00584 // so, add them to the available queue. 00585 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 00586 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 00587 if (ReadyCycle < MinAvailableCycle) 00588 MinAvailableCycle = ReadyCycle; 00589 00590 if (PendingQueue[i]->isAvailable) { 00591 if (!isReady(PendingQueue[i])) 00592 continue; 00593 AvailableQueue->push(PendingQueue[i]); 00594 } 00595 PendingQueue[i]->isPending = false; 00596 PendingQueue[i] = PendingQueue.back(); 00597 PendingQueue.pop_back(); 00598 --i; --e; 00599 } 00600 } 00601 00602 /// Move the scheduler state forward by the specified number of Cycles. 00603 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 00604 if (NextCycle <= CurCycle) 00605 return; 00606 00607 IssueCount = 0; 00608 AvailableQueue->setCurCycle(NextCycle); 00609 if (!HazardRec->isEnabled()) { 00610 // Bypass lots of virtual calls in case of long latency. 00611 CurCycle = NextCycle; 00612 } 00613 else { 00614 for (; CurCycle != NextCycle; ++CurCycle) { 00615 HazardRec->RecedeCycle(); 00616 } 00617 } 00618 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 00619 // available Q to release pending nodes at least once before popping. 00620 ReleasePending(); 00621 } 00622 00623 /// Move the scheduler state forward until the specified node's dependents are 00624 /// ready and can be scheduled with no resource conflicts. 00625 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 00626 if (DisableSchedCycles) 00627 return; 00628 00629 // FIXME: Nodes such as CopyFromReg probably should not advance the current 00630 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 00631 // has predecessors the cycle will be advanced when they are scheduled. 00632 // But given the crude nature of modeling latency though such nodes, we 00633 // currently need to treat these nodes like real instructions. 00634 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 00635 00636 unsigned ReadyCycle = SU->getHeight(); 00637 00638 // Bump CurCycle to account for latency. We assume the latency of other 00639 // available instructions may be hidden by the stall (not a full pipe stall). 00640 // This updates the hazard recognizer's cycle before reserving resources for 00641 // this instruction. 00642 AdvanceToCycle(ReadyCycle); 00643 00644 // Calls are scheduled in their preceding cycle, so don't conflict with 00645 // hazards from instructions after the call. EmitNode will reset the 00646 // scoreboard state before emitting the call. 00647 if (SU->isCall) 00648 return; 00649 00650 // FIXME: For resource conflicts in very long non-pipelined stages, we 00651 // should probably skip ahead here to avoid useless scoreboard checks. 00652 int Stalls = 0; 00653 while (true) { 00654 ScheduleHazardRecognizer::HazardType HT = 00655 HazardRec->getHazardType(SU, -Stalls); 00656 00657 if (HT == ScheduleHazardRecognizer::NoHazard) 00658 break; 00659 00660 ++Stalls; 00661 } 00662 AdvanceToCycle(CurCycle + Stalls); 00663 } 00664 00665 /// Record this SUnit in the HazardRecognizer. 00666 /// Does not update CurCycle. 00667 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 00668 if (!HazardRec->isEnabled()) 00669 return; 00670 00671 // Check for phys reg copy. 00672 if (!SU->getNode()) 00673 return; 00674 00675 switch (SU->getNode()->getOpcode()) { 00676 default: 00677 assert(SU->getNode()->isMachineOpcode() && 00678 "This target-independent node should not be scheduled."); 00679 break; 00680 case ISD::MERGE_VALUES: 00681 case ISD::TokenFactor: 00682 case ISD::LIFETIME_START: 00683 case ISD::LIFETIME_END: 00684 case ISD::CopyToReg: 00685 case ISD::CopyFromReg: 00686 case ISD::EH_LABEL: 00687 // Noops don't affect the scoreboard state. Copies are likely to be 00688 // removed. 00689 return; 00690 case ISD::INLINEASM: 00691 // For inline asm, clear the pipeline state. 00692 HazardRec->Reset(); 00693 return; 00694 } 00695 if (SU->isCall) { 00696 // Calls are scheduled with their preceding instructions. For bottom-up 00697 // scheduling, clear the pipeline state before emitting. 00698 HazardRec->Reset(); 00699 } 00700 00701 HazardRec->EmitInstruction(SU); 00702 } 00703 00704 static void resetVRegCycle(SUnit *SU); 00705 00706 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 00707 /// count of its predecessors. If a predecessor pending count is zero, add it to 00708 /// the Available queue. 00709 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 00710 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 00711 DEBUG(SU->dump(this)); 00712 00713 #ifndef NDEBUG 00714 if (CurCycle < SU->getHeight()) 00715 DEBUG(dbgs() << " Height [" << SU->getHeight() 00716 << "] pipeline stall!\n"); 00717 #endif 00718 00719 // FIXME: Do not modify node height. It may interfere with 00720 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 00721 // node its ready cycle can aid heuristics, and after scheduling it can 00722 // indicate the scheduled cycle. 00723 SU->setHeightToAtLeast(CurCycle); 00724 00725 // Reserve resources for the scheduled instruction. 00726 EmitNode(SU); 00727 00728 Sequence.push_back(SU); 00729 00730 AvailableQueue->scheduledNode(SU); 00731 00732 // If HazardRec is disabled, and each inst counts as one cycle, then 00733 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 00734 // PendingQueue for schedulers that implement HasReadyFilter. 00735 if (!HazardRec->isEnabled() && AvgIPC < 2) 00736 AdvanceToCycle(CurCycle + 1); 00737 00738 // Update liveness of predecessors before successors to avoid treating a 00739 // two-address node as a live range def. 00740 ReleasePredecessors(SU); 00741 00742 // Release all the implicit physical register defs that are live. 00743 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 00744 I != E; ++I) { 00745 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 00746 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 00747 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00748 --NumLiveRegs; 00749 LiveRegDefs[I->getReg()] = nullptr; 00750 LiveRegGens[I->getReg()] = nullptr; 00751 releaseInterferences(I->getReg()); 00752 } 00753 } 00754 // Release the special call resource dependence, if this is the beginning 00755 // of a call. 00756 unsigned CallResource = TRI->getNumRegs(); 00757 if (LiveRegDefs[CallResource] == SU) 00758 for (const SDNode *SUNode = SU->getNode(); SUNode; 00759 SUNode = SUNode->getGluedNode()) { 00760 if (SUNode->isMachineOpcode() && 00761 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 00762 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00763 --NumLiveRegs; 00764 LiveRegDefs[CallResource] = nullptr; 00765 LiveRegGens[CallResource] = nullptr; 00766 releaseInterferences(CallResource); 00767 } 00768 } 00769 00770 resetVRegCycle(SU); 00771 00772 SU->isScheduled = true; 00773 00774 // Conditions under which the scheduler should eagerly advance the cycle: 00775 // (1) No available instructions 00776 // (2) All pipelines full, so available instructions must have hazards. 00777 // 00778 // If HazardRec is disabled, the cycle was pre-advanced before calling 00779 // ReleasePredecessors. In that case, IssueCount should remain 0. 00780 // 00781 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 00782 if (HazardRec->isEnabled() || AvgIPC > 1) { 00783 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 00784 ++IssueCount; 00785 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 00786 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 00787 AdvanceToCycle(CurCycle + 1); 00788 } 00789 } 00790 00791 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 00792 /// unscheduled, incrcease the succ left count of its predecessors. Remove 00793 /// them from AvailableQueue if necessary. 00794 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 00795 SUnit *PredSU = PredEdge->getSUnit(); 00796 if (PredSU->isAvailable) { 00797 PredSU->isAvailable = false; 00798 if (!PredSU->isPending) 00799 AvailableQueue->remove(PredSU); 00800 } 00801 00802 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 00803 ++PredSU->NumSuccsLeft; 00804 } 00805 00806 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 00807 /// its predecessor states to reflect the change. 00808 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 00809 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 00810 DEBUG(SU->dump(this)); 00811 00812 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 00813 I != E; ++I) { 00814 CapturePred(&*I); 00815 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 00816 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00817 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 00818 "Physical register dependency violated?"); 00819 --NumLiveRegs; 00820 LiveRegDefs[I->getReg()] = nullptr; 00821 LiveRegGens[I->getReg()] = nullptr; 00822 releaseInterferences(I->getReg()); 00823 } 00824 } 00825 00826 // Reclaim the special call resource dependence, if this is the beginning 00827 // of a call. 00828 unsigned CallResource = TRI->getNumRegs(); 00829 for (const SDNode *SUNode = SU->getNode(); SUNode; 00830 SUNode = SUNode->getGluedNode()) { 00831 if (SUNode->isMachineOpcode() && 00832 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 00833 ++NumLiveRegs; 00834 LiveRegDefs[CallResource] = SU; 00835 LiveRegGens[CallResource] = CallSeqEndForStart[SU]; 00836 } 00837 } 00838 00839 // Release the special call resource dependence, if this is the end 00840 // of a call. 00841 if (LiveRegGens[CallResource] == SU) 00842 for (const SDNode *SUNode = SU->getNode(); SUNode; 00843 SUNode = SUNode->getGluedNode()) { 00844 if (SUNode->isMachineOpcode() && 00845 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 00846 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00847 --NumLiveRegs; 00848 LiveRegDefs[CallResource] = nullptr; 00849 LiveRegGens[CallResource] = nullptr; 00850 releaseInterferences(CallResource); 00851 } 00852 } 00853 00854 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 00855 I != E; ++I) { 00856 if (I->isAssignedRegDep()) { 00857 if (!LiveRegDefs[I->getReg()]) 00858 ++NumLiveRegs; 00859 // This becomes the nearest def. Note that an earlier def may still be 00860 // pending if this is a two-address node. 00861 LiveRegDefs[I->getReg()] = SU; 00862 if (LiveRegGens[I->getReg()] == nullptr || 00863 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 00864 LiveRegGens[I->getReg()] = I->getSUnit(); 00865 } 00866 } 00867 if (SU->getHeight() < MinAvailableCycle) 00868 MinAvailableCycle = SU->getHeight(); 00869 00870 SU->setHeightDirty(); 00871 SU->isScheduled = false; 00872 SU->isAvailable = true; 00873 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 00874 // Don't make available until backtracking is complete. 00875 SU->isPending = true; 00876 PendingQueue.push_back(SU); 00877 } 00878 else { 00879 AvailableQueue->push(SU); 00880 } 00881 AvailableQueue->unscheduledNode(SU); 00882 } 00883 00884 /// After backtracking, the hazard checker needs to be restored to a state 00885 /// corresponding the current cycle. 00886 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 00887 HazardRec->Reset(); 00888 00889 unsigned LookAhead = std::min((unsigned)Sequence.size(), 00890 HazardRec->getMaxLookAhead()); 00891 if (LookAhead == 0) 00892 return; 00893 00894 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 00895 unsigned HazardCycle = (*I)->getHeight(); 00896 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 00897 SUnit *SU = *I; 00898 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 00899 HazardRec->RecedeCycle(); 00900 } 00901 EmitNode(SU); 00902 } 00903 } 00904 00905 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 00906 /// BTCycle in order to schedule a specific node. 00907 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 00908 SUnit *OldSU = Sequence.back(); 00909 while (true) { 00910 Sequence.pop_back(); 00911 // FIXME: use ready cycle instead of height 00912 CurCycle = OldSU->getHeight(); 00913 UnscheduleNodeBottomUp(OldSU); 00914 AvailableQueue->setCurCycle(CurCycle); 00915 if (OldSU == BtSU) 00916 break; 00917 OldSU = Sequence.back(); 00918 } 00919 00920 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 00921 00922 RestoreHazardCheckerBottomUp(); 00923 00924 ReleasePending(); 00925 00926 ++NumBacktracks; 00927 } 00928 00929 static bool isOperandOf(const SUnit *SU, SDNode *N) { 00930 for (const SDNode *SUNode = SU->getNode(); SUNode; 00931 SUNode = SUNode->getGluedNode()) { 00932 if (SUNode->isOperandOf(N)) 00933 return true; 00934 } 00935 return false; 00936 } 00937 00938 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 00939 /// successors to the newly created node. 00940 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 00941 SDNode *N = SU->getNode(); 00942 if (!N) 00943 return nullptr; 00944 00945 if (SU->getNode()->getGluedNode()) 00946 return nullptr; 00947 00948 SUnit *NewSU; 00949 bool TryUnfold = false; 00950 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 00951 EVT VT = N->getValueType(i); 00952 if (VT == MVT::Glue) 00953 return nullptr; 00954 else if (VT == MVT::Other) 00955 TryUnfold = true; 00956 } 00957 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00958 const SDValue &Op = N->getOperand(i); 00959 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 00960 if (VT == MVT::Glue) 00961 return nullptr; 00962 } 00963 00964 if (TryUnfold) { 00965 SmallVector<SDNode*, 2> NewNodes; 00966 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 00967 return nullptr; 00968 00969 // unfolding an x86 DEC64m operation results in store, dec, load which 00970 // can't be handled here so quit 00971 if (NewNodes.size() == 3) 00972 return nullptr; 00973 00974 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 00975 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 00976 00977 N = NewNodes[1]; 00978 SDNode *LoadNode = NewNodes[0]; 00979 unsigned NumVals = N->getNumValues(); 00980 unsigned OldNumVals = SU->getNode()->getNumValues(); 00981 for (unsigned i = 0; i != NumVals; ++i) 00982 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 00983 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 00984 SDValue(LoadNode, 1)); 00985 00986 // LoadNode may already exist. This can happen when there is another 00987 // load from the same location and producing the same type of value 00988 // but it has different alignment or volatileness. 00989 bool isNewLoad = true; 00990 SUnit *LoadSU; 00991 if (LoadNode->getNodeId() != -1) { 00992 LoadSU = &SUnits[LoadNode->getNodeId()]; 00993 isNewLoad = false; 00994 } else { 00995 LoadSU = CreateNewSUnit(LoadNode); 00996 LoadNode->setNodeId(LoadSU->NodeNum); 00997 00998 InitNumRegDefsLeft(LoadSU); 00999 computeLatency(LoadSU); 01000 } 01001 01002 SUnit *NewSU = CreateNewSUnit(N); 01003 assert(N->getNodeId() == -1 && "Node already inserted!"); 01004 N->setNodeId(NewSU->NodeNum); 01005 01006 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 01007 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 01008 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 01009 NewSU->isTwoAddress = true; 01010 break; 01011 } 01012 } 01013 if (MCID.isCommutable()) 01014 NewSU->isCommutable = true; 01015 01016 InitNumRegDefsLeft(NewSU); 01017 computeLatency(NewSU); 01018 01019 // Record all the edges to and from the old SU, by category. 01020 SmallVector<SDep, 4> ChainPreds; 01021 SmallVector<SDep, 4> ChainSuccs; 01022 SmallVector<SDep, 4> LoadPreds; 01023 SmallVector<SDep, 4> NodePreds; 01024 SmallVector<SDep, 4> NodeSuccs; 01025 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01026 I != E; ++I) { 01027 if (I->isCtrl()) 01028 ChainPreds.push_back(*I); 01029 else if (isOperandOf(I->getSUnit(), LoadNode)) 01030 LoadPreds.push_back(*I); 01031 else 01032 NodePreds.push_back(*I); 01033 } 01034 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 01035 I != E; ++I) { 01036 if (I->isCtrl()) 01037 ChainSuccs.push_back(*I); 01038 else 01039 NodeSuccs.push_back(*I); 01040 } 01041 01042 // Now assign edges to the newly-created nodes. 01043 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 01044 const SDep &Pred = ChainPreds[i]; 01045 RemovePred(SU, Pred); 01046 if (isNewLoad) 01047 AddPred(LoadSU, Pred); 01048 } 01049 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 01050 const SDep &Pred = LoadPreds[i]; 01051 RemovePred(SU, Pred); 01052 if (isNewLoad) 01053 AddPred(LoadSU, Pred); 01054 } 01055 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 01056 const SDep &Pred = NodePreds[i]; 01057 RemovePred(SU, Pred); 01058 AddPred(NewSU, Pred); 01059 } 01060 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 01061 SDep D = NodeSuccs[i]; 01062 SUnit *SuccDep = D.getSUnit(); 01063 D.setSUnit(SU); 01064 RemovePred(SuccDep, D); 01065 D.setSUnit(NewSU); 01066 AddPred(SuccDep, D); 01067 // Balance register pressure. 01068 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 01069 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 01070 --NewSU->NumRegDefsLeft; 01071 } 01072 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 01073 SDep D = ChainSuccs[i]; 01074 SUnit *SuccDep = D.getSUnit(); 01075 D.setSUnit(SU); 01076 RemovePred(SuccDep, D); 01077 if (isNewLoad) { 01078 D.setSUnit(LoadSU); 01079 AddPred(SuccDep, D); 01080 } 01081 } 01082 01083 // Add a data dependency to reflect that NewSU reads the value defined 01084 // by LoadSU. 01085 SDep D(LoadSU, SDep::Data, 0); 01086 D.setLatency(LoadSU->Latency); 01087 AddPred(NewSU, D); 01088 01089 if (isNewLoad) 01090 AvailableQueue->addNode(LoadSU); 01091 AvailableQueue->addNode(NewSU); 01092 01093 ++NumUnfolds; 01094 01095 if (NewSU->NumSuccsLeft == 0) { 01096 NewSU->isAvailable = true; 01097 return NewSU; 01098 } 01099 SU = NewSU; 01100 } 01101 01102 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 01103 NewSU = CreateClone(SU); 01104 01105 // New SUnit has the exact same predecessors. 01106 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01107 I != E; ++I) 01108 if (!I->isArtificial()) 01109 AddPred(NewSU, *I); 01110 01111 // Only copy scheduled successors. Cut them from old node's successor 01112 // list and move them over. 01113 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 01114 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 01115 I != E; ++I) { 01116 if (I->isArtificial()) 01117 continue; 01118 SUnit *SuccSU = I->getSUnit(); 01119 if (SuccSU->isScheduled) { 01120 SDep D = *I; 01121 D.setSUnit(NewSU); 01122 AddPred(SuccSU, D); 01123 D.setSUnit(SU); 01124 DelDeps.push_back(std::make_pair(SuccSU, D)); 01125 } 01126 } 01127 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 01128 RemovePred(DelDeps[i].first, DelDeps[i].second); 01129 01130 AvailableQueue->updateNode(SU); 01131 AvailableQueue->addNode(NewSU); 01132 01133 ++NumDups; 01134 return NewSU; 01135 } 01136 01137 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 01138 /// scheduled successors of the given SUnit to the last copy. 01139 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 01140 const TargetRegisterClass *DestRC, 01141 const TargetRegisterClass *SrcRC, 01142 SmallVectorImpl<SUnit*> &Copies) { 01143 SUnit *CopyFromSU = CreateNewSUnit(nullptr); 01144 CopyFromSU->CopySrcRC = SrcRC; 01145 CopyFromSU->CopyDstRC = DestRC; 01146 01147 SUnit *CopyToSU = CreateNewSUnit(nullptr); 01148 CopyToSU->CopySrcRC = DestRC; 01149 CopyToSU->CopyDstRC = SrcRC; 01150 01151 // Only copy scheduled successors. Cut them from old node's successor 01152 // list and move them over. 01153 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 01154 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 01155 I != E; ++I) { 01156 if (I->isArtificial()) 01157 continue; 01158 SUnit *SuccSU = I->getSUnit(); 01159 if (SuccSU->isScheduled) { 01160 SDep D = *I; 01161 D.setSUnit(CopyToSU); 01162 AddPred(SuccSU, D); 01163 DelDeps.push_back(std::make_pair(SuccSU, *I)); 01164 } 01165 else { 01166 // Avoid scheduling the def-side copy before other successors. Otherwise 01167 // we could introduce another physreg interference on the copy and 01168 // continue inserting copies indefinitely. 01169 AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); 01170 } 01171 } 01172 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 01173 RemovePred(DelDeps[i].first, DelDeps[i].second); 01174 01175 SDep FromDep(SU, SDep::Data, Reg); 01176 FromDep.setLatency(SU->Latency); 01177 AddPred(CopyFromSU, FromDep); 01178 SDep ToDep(CopyFromSU, SDep::Data, 0); 01179 ToDep.setLatency(CopyFromSU->Latency); 01180 AddPred(CopyToSU, ToDep); 01181 01182 AvailableQueue->updateNode(SU); 01183 AvailableQueue->addNode(CopyFromSU); 01184 AvailableQueue->addNode(CopyToSU); 01185 Copies.push_back(CopyFromSU); 01186 Copies.push_back(CopyToSU); 01187 01188 ++NumPRCopies; 01189 } 01190 01191 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 01192 /// definition of the specified node. 01193 /// FIXME: Move to SelectionDAG? 01194 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 01195 const TargetInstrInfo *TII) { 01196 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 01197 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 01198 unsigned NumRes = MCID.getNumDefs(); 01199 for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 01200 if (Reg == *ImpDef) 01201 break; 01202 ++NumRes; 01203 } 01204 return N->getValueType(NumRes); 01205 } 01206 01207 /// CheckForLiveRegDef - Return true and update live register vector if the 01208 /// specified register def of the specified SUnit clobbers any "live" registers. 01209 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 01210 std::vector<SUnit*> &LiveRegDefs, 01211 SmallSet<unsigned, 4> &RegAdded, 01212 SmallVectorImpl<unsigned> &LRegs, 01213 const TargetRegisterInfo *TRI) { 01214 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { 01215 01216 // Check if Ref is live. 01217 if (!LiveRegDefs[*AliasI]) continue; 01218 01219 // Allow multiple uses of the same def. 01220 if (LiveRegDefs[*AliasI] == SU) continue; 01221 01222 // Add Reg to the set of interfering live regs. 01223 if (RegAdded.insert(*AliasI)) { 01224 LRegs.push_back(*AliasI); 01225 } 01226 } 01227 } 01228 01229 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 01230 /// by RegMask, and add them to LRegs. 01231 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 01232 std::vector<SUnit*> &LiveRegDefs, 01233 SmallSet<unsigned, 4> &RegAdded, 01234 SmallVectorImpl<unsigned> &LRegs) { 01235 // Look at all live registers. Skip Reg0 and the special CallResource. 01236 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 01237 if (!LiveRegDefs[i]) continue; 01238 if (LiveRegDefs[i] == SU) continue; 01239 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 01240 if (RegAdded.insert(i)) 01241 LRegs.push_back(i); 01242 } 01243 } 01244 01245 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 01246 static const uint32_t *getNodeRegMask(const SDNode *N) { 01247 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01248 if (const RegisterMaskSDNode *Op = 01249 dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) 01250 return Op->getRegMask(); 01251 return nullptr; 01252 } 01253 01254 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 01255 /// scheduling of the given node to satisfy live physical register dependencies. 01256 /// If the specific node is the last one that's available to schedule, do 01257 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 01258 bool ScheduleDAGRRList:: 01259 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { 01260 if (NumLiveRegs == 0) 01261 return false; 01262 01263 SmallSet<unsigned, 4> RegAdded; 01264 // If this node would clobber any "live" register, then it's not ready. 01265 // 01266 // If SU is the currently live definition of the same register that it uses, 01267 // then we are free to schedule it. 01268 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01269 I != E; ++I) { 01270 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 01271 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 01272 RegAdded, LRegs, TRI); 01273 } 01274 01275 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 01276 if (Node->getOpcode() == ISD::INLINEASM) { 01277 // Inline asm can clobber physical defs. 01278 unsigned NumOps = Node->getNumOperands(); 01279 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 01280 --NumOps; // Ignore the glue operand. 01281 01282 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 01283 unsigned Flags = 01284 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 01285 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 01286 01287 ++i; // Skip the ID value. 01288 if (InlineAsm::isRegDefKind(Flags) || 01289 InlineAsm::isRegDefEarlyClobberKind(Flags) || 01290 InlineAsm::isClobberKind(Flags)) { 01291 // Check for def of register or earlyclobber register. 01292 for (; NumVals; --NumVals, ++i) { 01293 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 01294 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 01295 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 01296 } 01297 } else 01298 i += NumVals; 01299 } 01300 continue; 01301 } 01302 01303 if (!Node->isMachineOpcode()) 01304 continue; 01305 // If we're in the middle of scheduling a call, don't begin scheduling 01306 // another call. Also, don't allow any physical registers to be live across 01307 // the call. 01308 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 01309 // Check the special calling-sequence resource. 01310 unsigned CallResource = TRI->getNumRegs(); 01311 if (LiveRegDefs[CallResource]) { 01312 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 01313 while (SDNode *Glued = Gen->getGluedNode()) 01314 Gen = Glued; 01315 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 01316 LRegs.push_back(CallResource); 01317 } 01318 } 01319 if (const uint32_t *RegMask = getNodeRegMask(Node)) 01320 CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); 01321 01322 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 01323 if (!MCID.ImplicitDefs) 01324 continue; 01325 for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) 01326 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 01327 } 01328 01329 return !LRegs.empty(); 01330 } 01331 01332 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { 01333 // Add the nodes that aren't ready back onto the available list. 01334 for (unsigned i = Interferences.size(); i > 0; --i) { 01335 SUnit *SU = Interferences[i-1]; 01336 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); 01337 if (Reg) { 01338 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; 01339 if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) 01340 continue; 01341 } 01342 SU->isPending = false; 01343 // The interfering node may no longer be available due to backtracking. 01344 // Furthermore, it may have been made available again, in which case it is 01345 // now already in the AvailableQueue. 01346 if (SU->isAvailable && !SU->NodeQueueId) { 01347 DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); 01348 AvailableQueue->push(SU); 01349 } 01350 if (i < Interferences.size()) 01351 Interferences[i-1] = Interferences.back(); 01352 Interferences.pop_back(); 01353 LRegsMap.erase(LRegsPos); 01354 } 01355 } 01356 01357 /// Return a node that can be scheduled in this cycle. Requirements: 01358 /// (1) Ready: latency has been satisfied 01359 /// (2) No Hazards: resources are available 01360 /// (3) No Interferences: may unschedule to break register interferences. 01361 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 01362 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); 01363 while (CurSU) { 01364 SmallVector<unsigned, 4> LRegs; 01365 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 01366 break; 01367 DEBUG(dbgs() << " Interfering reg " << 01368 (LRegs[0] == TRI->getNumRegs() ? "CallResource" 01369 : TRI->getName(LRegs[0])) 01370 << " SU #" << CurSU->NodeNum << '\n'); 01371 std::pair<LRegsMapT::iterator, bool> LRegsPair = 01372 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 01373 if (LRegsPair.second) { 01374 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 01375 Interferences.push_back(CurSU); 01376 } 01377 else { 01378 assert(CurSU->isPending && "Interferences are pending"); 01379 // Update the interference with current live regs. 01380 LRegsPair.first->second = LRegs; 01381 } 01382 CurSU = AvailableQueue->pop(); 01383 } 01384 if (CurSU) 01385 return CurSU; 01386 01387 // All candidates are delayed due to live physical reg dependencies. 01388 // Try backtracking, code duplication, or inserting cross class copies 01389 // to resolve it. 01390 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 01391 SUnit *TrySU = Interferences[i]; 01392 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 01393 01394 // Try unscheduling up to the point where it's safe to schedule 01395 // this node. 01396 SUnit *BtSU = nullptr; 01397 unsigned LiveCycle = UINT_MAX; 01398 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 01399 unsigned Reg = LRegs[j]; 01400 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 01401 BtSU = LiveRegGens[Reg]; 01402 LiveCycle = BtSU->getHeight(); 01403 } 01404 } 01405 if (!WillCreateCycle(TrySU, BtSU)) { 01406 // BacktrackBottomUp mutates Interferences! 01407 BacktrackBottomUp(TrySU, BtSU); 01408 01409 // Force the current node to be scheduled before the node that 01410 // requires the physical reg dep. 01411 if (BtSU->isAvailable) { 01412 BtSU->isAvailable = false; 01413 if (!BtSU->isPending) 01414 AvailableQueue->remove(BtSU); 01415 } 01416 DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" 01417 << TrySU->NodeNum << ")\n"); 01418 AddPred(TrySU, SDep(BtSU, SDep::Artificial)); 01419 01420 // If one or more successors has been unscheduled, then the current 01421 // node is no longer available. 01422 if (!TrySU->isAvailable) 01423 CurSU = AvailableQueue->pop(); 01424 else { 01425 AvailableQueue->remove(TrySU); 01426 CurSU = TrySU; 01427 } 01428 // Interferences has been mutated. We must break. 01429 break; 01430 } 01431 } 01432 01433 if (!CurSU) { 01434 // Can't backtrack. If it's too expensive to copy the value, then try 01435 // duplicate the nodes that produces these "too expensive to copy" 01436 // values to break the dependency. In case even that doesn't work, 01437 // insert cross class copies. 01438 // If it's not too expensive, i.e. cost != -1, issue copies. 01439 SUnit *TrySU = Interferences[0]; 01440 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 01441 assert(LRegs.size() == 1 && "Can't handle this yet!"); 01442 unsigned Reg = LRegs[0]; 01443 SUnit *LRDef = LiveRegDefs[Reg]; 01444 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 01445 const TargetRegisterClass *RC = 01446 TRI->getMinimalPhysRegClass(Reg, VT); 01447 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 01448 01449 // If cross copy register class is the same as RC, then it must be possible 01450 // copy the value directly. Do not try duplicate the def. 01451 // If cross copy register class is not the same as RC, then it's possible to 01452 // copy the value but it require cross register class copies and it is 01453 // expensive. 01454 // If cross copy register class is null, then it's not possible to copy 01455 // the value at all. 01456 SUnit *NewDef = nullptr; 01457 if (DestRC != RC) { 01458 NewDef = CopyAndMoveSuccessors(LRDef); 01459 if (!DestRC && !NewDef) 01460 report_fatal_error("Can't handle live physical register dependency!"); 01461 } 01462 if (!NewDef) { 01463 // Issue copies, these can be expensive cross register class copies. 01464 SmallVector<SUnit*, 2> Copies; 01465 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 01466 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 01467 << " to SU #" << Copies.front()->NodeNum << "\n"); 01468 AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); 01469 NewDef = Copies.back(); 01470 } 01471 01472 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 01473 << " to SU #" << TrySU->NodeNum << "\n"); 01474 LiveRegDefs[Reg] = NewDef; 01475 AddPred(NewDef, SDep(TrySU, SDep::Artificial)); 01476 TrySU->isAvailable = false; 01477 CurSU = NewDef; 01478 } 01479 assert(CurSU && "Unable to resolve live physical register dependencies!"); 01480 return CurSU; 01481 } 01482 01483 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 01484 /// schedulers. 01485 void ScheduleDAGRRList::ListScheduleBottomUp() { 01486 // Release any predecessors of the special Exit node. 01487 ReleasePredecessors(&ExitSU); 01488 01489 // Add root to Available queue. 01490 if (!SUnits.empty()) { 01491 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 01492 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 01493 RootSU->isAvailable = true; 01494 AvailableQueue->push(RootSU); 01495 } 01496 01497 // While Available queue is not empty, grab the node with the highest 01498 // priority. If it is not ready put it back. Schedule the node. 01499 Sequence.reserve(SUnits.size()); 01500 while (!AvailableQueue->empty() || !Interferences.empty()) { 01501 DEBUG(dbgs() << "\nExamining Available:\n"; 01502 AvailableQueue->dump(this)); 01503 01504 // Pick the best node to schedule taking all constraints into 01505 // consideration. 01506 SUnit *SU = PickNodeToScheduleBottomUp(); 01507 01508 AdvancePastStalls(SU); 01509 01510 ScheduleNodeBottomUp(SU); 01511 01512 while (AvailableQueue->empty() && !PendingQueue.empty()) { 01513 // Advance the cycle to free resources. Skip ahead to the next ready SU. 01514 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 01515 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 01516 } 01517 } 01518 01519 // Reverse the order if it is bottom up. 01520 std::reverse(Sequence.begin(), Sequence.end()); 01521 01522 #ifndef NDEBUG 01523 VerifyScheduledSequence(/*isBottomUp=*/true); 01524 #endif 01525 } 01526 01527 //===----------------------------------------------------------------------===// 01528 // RegReductionPriorityQueue Definition 01529 //===----------------------------------------------------------------------===// 01530 // 01531 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 01532 // to reduce register pressure. 01533 // 01534 namespace { 01535 class RegReductionPQBase; 01536 01537 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 01538 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 01539 }; 01540 01541 #ifndef NDEBUG 01542 template<class SF> 01543 struct reverse_sort : public queue_sort { 01544 SF &SortFunc; 01545 reverse_sort(SF &sf) : SortFunc(sf) {} 01546 01547 bool operator()(SUnit* left, SUnit* right) const { 01548 // reverse left/right rather than simply !SortFunc(left, right) 01549 // to expose different paths in the comparison logic. 01550 return SortFunc(right, left); 01551 } 01552 }; 01553 #endif // NDEBUG 01554 01555 /// bu_ls_rr_sort - Priority function for bottom up register pressure 01556 // reduction scheduler. 01557 struct bu_ls_rr_sort : public queue_sort { 01558 enum { 01559 IsBottomUp = true, 01560 HasReadyFilter = false 01561 }; 01562 01563 RegReductionPQBase *SPQ; 01564 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 01565 01566 bool operator()(SUnit* left, SUnit* right) const; 01567 }; 01568 01569 // src_ls_rr_sort - Priority function for source order scheduler. 01570 struct src_ls_rr_sort : public queue_sort { 01571 enum { 01572 IsBottomUp = true, 01573 HasReadyFilter = false 01574 }; 01575 01576 RegReductionPQBase *SPQ; 01577 src_ls_rr_sort(RegReductionPQBase *spq) 01578 : SPQ(spq) {} 01579 01580 bool operator()(SUnit* left, SUnit* right) const; 01581 }; 01582 01583 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 01584 struct hybrid_ls_rr_sort : public queue_sort { 01585 enum { 01586 IsBottomUp = true, 01587 HasReadyFilter = false 01588 }; 01589 01590 RegReductionPQBase *SPQ; 01591 hybrid_ls_rr_sort(RegReductionPQBase *spq) 01592 : SPQ(spq) {} 01593 01594 bool isReady(SUnit *SU, unsigned CurCycle) const; 01595 01596 bool operator()(SUnit* left, SUnit* right) const; 01597 }; 01598 01599 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 01600 // scheduler. 01601 struct ilp_ls_rr_sort : public queue_sort { 01602 enum { 01603 IsBottomUp = true, 01604 HasReadyFilter = false 01605 }; 01606 01607 RegReductionPQBase *SPQ; 01608 ilp_ls_rr_sort(RegReductionPQBase *spq) 01609 : SPQ(spq) {} 01610 01611 bool isReady(SUnit *SU, unsigned CurCycle) const; 01612 01613 bool operator()(SUnit* left, SUnit* right) const; 01614 }; 01615 01616 class RegReductionPQBase : public SchedulingPriorityQueue { 01617 protected: 01618 std::vector<SUnit*> Queue; 01619 unsigned CurQueueId; 01620 bool TracksRegPressure; 01621 bool SrcOrder; 01622 01623 // SUnits - The SUnits for the current graph. 01624 std::vector<SUnit> *SUnits; 01625 01626 MachineFunction &MF; 01627 const TargetInstrInfo *TII; 01628 const TargetRegisterInfo *TRI; 01629 const TargetLowering *TLI; 01630 ScheduleDAGRRList *scheduleDAG; 01631 01632 // SethiUllmanNumbers - The SethiUllman number for each node. 01633 std::vector<unsigned> SethiUllmanNumbers; 01634 01635 /// RegPressure - Tracking current reg pressure per register class. 01636 /// 01637 std::vector<unsigned> RegPressure; 01638 01639 /// RegLimit - Tracking the number of allocatable registers per register 01640 /// class. 01641 std::vector<unsigned> RegLimit; 01642 01643 public: 01644 RegReductionPQBase(MachineFunction &mf, 01645 bool hasReadyFilter, 01646 bool tracksrp, 01647 bool srcorder, 01648 const TargetInstrInfo *tii, 01649 const TargetRegisterInfo *tri, 01650 const TargetLowering *tli) 01651 : SchedulingPriorityQueue(hasReadyFilter), 01652 CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), 01653 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { 01654 if (TracksRegPressure) { 01655 unsigned NumRC = TRI->getNumRegClasses(); 01656 RegLimit.resize(NumRC); 01657 RegPressure.resize(NumRC); 01658 std::fill(RegLimit.begin(), RegLimit.end(), 0); 01659 std::fill(RegPressure.begin(), RegPressure.end(), 0); 01660 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 01661 E = TRI->regclass_end(); I != E; ++I) 01662 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 01663 } 01664 } 01665 01666 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 01667 scheduleDAG = scheduleDag; 01668 } 01669 01670 ScheduleHazardRecognizer* getHazardRec() { 01671 return scheduleDAG->getHazardRec(); 01672 } 01673 01674 void initNodes(std::vector<SUnit> &sunits) override; 01675 01676 void addNode(const SUnit *SU) override; 01677 01678 void updateNode(const SUnit *SU) override; 01679 01680 void releaseState() override { 01681 SUnits = nullptr; 01682 SethiUllmanNumbers.clear(); 01683 std::fill(RegPressure.begin(), RegPressure.end(), 0); 01684 } 01685 01686 unsigned getNodePriority(const SUnit *SU) const; 01687 01688 unsigned getNodeOrdering(const SUnit *SU) const { 01689 if (!SU->getNode()) return 0; 01690 01691 return SU->getNode()->getIROrder(); 01692 } 01693 01694 bool empty() const override { return Queue.empty(); } 01695 01696 void push(SUnit *U) override { 01697 assert(!U->NodeQueueId && "Node in the queue already"); 01698 U->NodeQueueId = ++CurQueueId; 01699 Queue.push_back(U); 01700 } 01701 01702 void remove(SUnit *SU) override { 01703 assert(!Queue.empty() && "Queue is empty!"); 01704 assert(SU->NodeQueueId != 0 && "Not in queue!"); 01705 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 01706 SU); 01707 if (I != std::prev(Queue.end())) 01708 std::swap(*I, Queue.back()); 01709 Queue.pop_back(); 01710 SU->NodeQueueId = 0; 01711 } 01712 01713 bool tracksRegPressure() const override { return TracksRegPressure; } 01714 01715 void dumpRegPressure() const; 01716 01717 bool HighRegPressure(const SUnit *SU) const; 01718 01719 bool MayReduceRegPressure(SUnit *SU) const; 01720 01721 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 01722 01723 void scheduledNode(SUnit *SU) override; 01724 01725 void unscheduledNode(SUnit *SU) override; 01726 01727 protected: 01728 bool canClobber(const SUnit *SU, const SUnit *Op); 01729 void AddPseudoTwoAddrDeps(); 01730 void PrescheduleNodesWithMultipleUses(); 01731 void CalculateSethiUllmanNumbers(); 01732 }; 01733 01734 template<class SF> 01735 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 01736 std::vector<SUnit *>::iterator Best = Q.begin(); 01737 for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), 01738 E = Q.end(); I != E; ++I) 01739 if (Picker(*Best, *I)) 01740 Best = I; 01741 SUnit *V = *Best; 01742 if (Best != std::prev(Q.end())) 01743 std::swap(*Best, Q.back()); 01744 Q.pop_back(); 01745 return V; 01746 } 01747 01748 template<class SF> 01749 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 01750 #ifndef NDEBUG 01751 if (DAG->StressSched) { 01752 reverse_sort<SF> RPicker(Picker); 01753 return popFromQueueImpl(Q, RPicker); 01754 } 01755 #endif 01756 (void)DAG; 01757 return popFromQueueImpl(Q, Picker); 01758 } 01759 01760 template<class SF> 01761 class RegReductionPriorityQueue : public RegReductionPQBase { 01762 SF Picker; 01763 01764 public: 01765 RegReductionPriorityQueue(MachineFunction &mf, 01766 bool tracksrp, 01767 bool srcorder, 01768 const TargetInstrInfo *tii, 01769 const TargetRegisterInfo *tri, 01770 const TargetLowering *tli) 01771 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 01772 tii, tri, tli), 01773 Picker(this) {} 01774 01775 bool isBottomUp() const override { return SF::IsBottomUp; } 01776 01777 bool isReady(SUnit *U) const override { 01778 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 01779 } 01780 01781 SUnit *pop() override { 01782 if (Queue.empty()) return nullptr; 01783 01784 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 01785 V->NodeQueueId = 0; 01786 return V; 01787 } 01788 01789 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01790 void dump(ScheduleDAG *DAG) const override { 01791 // Emulate pop() without clobbering NodeQueueIds. 01792 std::vector<SUnit*> DumpQueue = Queue; 01793 SF DumpPicker = Picker; 01794 while (!DumpQueue.empty()) { 01795 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 01796 dbgs() << "Height " << SU->getHeight() << ": "; 01797 SU->dump(DAG); 01798 } 01799 } 01800 #endif 01801 }; 01802 01803 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 01804 BURegReductionPriorityQueue; 01805 01806 typedef RegReductionPriorityQueue<src_ls_rr_sort> 01807 SrcRegReductionPriorityQueue; 01808 01809 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 01810 HybridBURRPriorityQueue; 01811 01812 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 01813 ILPBURRPriorityQueue; 01814 } // end anonymous namespace 01815 01816 //===----------------------------------------------------------------------===// 01817 // Static Node Priority for Register Pressure Reduction 01818 //===----------------------------------------------------------------------===// 01819 01820 // Check for special nodes that bypass scheduling heuristics. 01821 // Currently this pushes TokenFactor nodes down, but may be used for other 01822 // pseudo-ops as well. 01823 // 01824 // Return -1 to schedule right above left, 1 for left above right. 01825 // Return 0 if no bias exists. 01826 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 01827 bool LSchedLow = left->isScheduleLow; 01828 bool RSchedLow = right->isScheduleLow; 01829 if (LSchedLow != RSchedLow) 01830 return LSchedLow < RSchedLow ? 1 : -1; 01831 return 0; 01832 } 01833 01834 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 01835 /// Smaller number is the higher priority. 01836 static unsigned 01837 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 01838 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 01839 if (SethiUllmanNumber != 0) 01840 return SethiUllmanNumber; 01841 01842 unsigned Extra = 0; 01843 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01844 I != E; ++I) { 01845 if (I->isCtrl()) continue; // ignore chain preds 01846 SUnit *PredSU = I->getSUnit(); 01847 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 01848 if (PredSethiUllman > SethiUllmanNumber) { 01849 SethiUllmanNumber = PredSethiUllman; 01850 Extra = 0; 01851 } else if (PredSethiUllman == SethiUllmanNumber) 01852 ++Extra; 01853 } 01854 01855 SethiUllmanNumber += Extra; 01856 01857 if (SethiUllmanNumber == 0) 01858 SethiUllmanNumber = 1; 01859 01860 return SethiUllmanNumber; 01861 } 01862 01863 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 01864 /// scheduling units. 01865 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 01866 SethiUllmanNumbers.assign(SUnits->size(), 0); 01867 01868 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 01869 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 01870 } 01871 01872 void RegReductionPQBase::addNode(const SUnit *SU) { 01873 unsigned SUSize = SethiUllmanNumbers.size(); 01874 if (SUnits->size() > SUSize) 01875 SethiUllmanNumbers.resize(SUSize*2, 0); 01876 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 01877 } 01878 01879 void RegReductionPQBase::updateNode(const SUnit *SU) { 01880 SethiUllmanNumbers[SU->NodeNum] = 0; 01881 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 01882 } 01883 01884 // Lower priority means schedule further down. For bottom-up scheduling, lower 01885 // priority SUs are scheduled before higher priority SUs. 01886 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 01887 assert(SU->NodeNum < SethiUllmanNumbers.size()); 01888 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 01889 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 01890 // CopyToReg should be close to its uses to facilitate coalescing and 01891 // avoid spilling. 01892 return 0; 01893 if (Opc == TargetOpcode::EXTRACT_SUBREG || 01894 Opc == TargetOpcode::SUBREG_TO_REG || 01895 Opc == TargetOpcode::INSERT_SUBREG) 01896 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 01897 // close to their uses to facilitate coalescing. 01898 return 0; 01899 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 01900 // If SU does not have a register use, i.e. it doesn't produce a value 01901 // that would be consumed (e.g. store), then it terminates a chain of 01902 // computation. Give it a large SethiUllman number so it will be 01903 // scheduled right before its predecessors that it doesn't lengthen 01904 // their live ranges. 01905 return 0xffff; 01906 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 01907 // If SU does not have a register def, schedule it close to its uses 01908 // because it does not lengthen any live ranges. 01909 return 0; 01910 #if 1 01911 return SethiUllmanNumbers[SU->NodeNum]; 01912 #else 01913 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 01914 if (SU->isCallOp) { 01915 // FIXME: This assumes all of the defs are used as call operands. 01916 int NP = (int)Priority - SU->getNode()->getNumValues(); 01917 return (NP > 0) ? NP : 0; 01918 } 01919 return Priority; 01920 #endif 01921 } 01922 01923 //===----------------------------------------------------------------------===// 01924 // Register Pressure Tracking 01925 //===----------------------------------------------------------------------===// 01926 01927 void RegReductionPQBase::dumpRegPressure() const { 01928 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01929 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 01930 E = TRI->regclass_end(); I != E; ++I) { 01931 const TargetRegisterClass *RC = *I; 01932 unsigned Id = RC->getID(); 01933 unsigned RP = RegPressure[Id]; 01934 if (!RP) continue; 01935 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 01936 << '\n'); 01937 } 01938 #endif 01939 } 01940 01941 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 01942 if (!TLI) 01943 return false; 01944 01945 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 01946 I != E; ++I) { 01947 if (I->isCtrl()) 01948 continue; 01949 SUnit *PredSU = I->getSUnit(); 01950 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 01951 // to cover the number of registers defined (they are all live). 01952 if (PredSU->NumRegDefsLeft == 0) { 01953 continue; 01954 } 01955 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 01956 RegDefPos.IsValid(); RegDefPos.Advance()) { 01957 unsigned RCId, Cost; 01958 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 01959 01960 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 01961 return true; 01962 } 01963 } 01964 return false; 01965 } 01966 01967 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 01968 const SDNode *N = SU->getNode(); 01969 01970 if (!N->isMachineOpcode() || !SU->NumSuccs) 01971 return false; 01972 01973 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 01974 for (unsigned i = 0; i != NumDefs; ++i) { 01975 MVT VT = N->getSimpleValueType(i); 01976 if (!N->hasAnyUseOfValue(i)) 01977 continue; 01978 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 01979 if (RegPressure[RCId] >= RegLimit[RCId]) 01980 return true; 01981 } 01982 return false; 01983 } 01984 01985 // Compute the register pressure contribution by this instruction by count up 01986 // for uses that are not live and down for defs. Only count register classes 01987 // that are already under high pressure. As a side effect, compute the number of 01988 // uses of registers that are already live. 01989 // 01990 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 01991 // so could probably be factored. 01992 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 01993 LiveUses = 0; 01994 int PDiff = 0; 01995 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 01996 I != E; ++I) { 01997 if (I->isCtrl()) 01998 continue; 01999 SUnit *PredSU = I->getSUnit(); 02000 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 02001 // to cover the number of registers defined (they are all live). 02002 if (PredSU->NumRegDefsLeft == 0) { 02003 if (PredSU->getNode()->isMachineOpcode()) 02004 ++LiveUses; 02005 continue; 02006 } 02007 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 02008 RegDefPos.IsValid(); RegDefPos.Advance()) { 02009 MVT VT = RegDefPos.GetValue(); 02010 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02011 if (RegPressure[RCId] >= RegLimit[RCId]) 02012 ++PDiff; 02013 } 02014 } 02015 const SDNode *N = SU->getNode(); 02016 02017 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 02018 return PDiff; 02019 02020 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 02021 for (unsigned i = 0; i != NumDefs; ++i) { 02022 MVT VT = N->getSimpleValueType(i); 02023 if (!N->hasAnyUseOfValue(i)) 02024 continue; 02025 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02026 if (RegPressure[RCId] >= RegLimit[RCId]) 02027 --PDiff; 02028 } 02029 return PDiff; 02030 } 02031 02032 void RegReductionPQBase::scheduledNode(SUnit *SU) { 02033 if (!TracksRegPressure) 02034 return; 02035 02036 if (!SU->getNode()) 02037 return; 02038 02039 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02040 I != E; ++I) { 02041 if (I->isCtrl()) 02042 continue; 02043 SUnit *PredSU = I->getSUnit(); 02044 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 02045 // to cover the number of registers defined (they are all live). 02046 if (PredSU->NumRegDefsLeft == 0) { 02047 continue; 02048 } 02049 // FIXME: The ScheduleDAG currently loses information about which of a 02050 // node's values is consumed by each dependence. Consequently, if the node 02051 // defines multiple register classes, we don't know which to pressurize 02052 // here. Instead the following loop consumes the register defs in an 02053 // arbitrary order. At least it handles the common case of clustered loads 02054 // to the same class. For precise liveness, each SDep needs to indicate the 02055 // result number. But that tightly couples the ScheduleDAG with the 02056 // SelectionDAG making updates tricky. A simpler hack would be to attach a 02057 // value type or register class to SDep. 02058 // 02059 // The most important aspect of register tracking is balancing the increase 02060 // here with the reduction further below. Note that this SU may use multiple 02061 // defs in PredSU. The can't be determined here, but we've already 02062 // compensated by reducing NumRegDefsLeft in PredSU during 02063 // ScheduleDAGSDNodes::AddSchedEdges. 02064 --PredSU->NumRegDefsLeft; 02065 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 02066 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 02067 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 02068 if (SkipRegDefs) 02069 continue; 02070 02071 unsigned RCId, Cost; 02072 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 02073 RegPressure[RCId] += Cost; 02074 break; 02075 } 02076 } 02077 02078 // We should have this assert, but there may be dead SDNodes that never 02079 // materialize as SUnits, so they don't appear to generate liveness. 02080 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 02081 int SkipRegDefs = (int)SU->NumRegDefsLeft; 02082 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 02083 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 02084 if (SkipRegDefs > 0) 02085 continue; 02086 unsigned RCId, Cost; 02087 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 02088 if (RegPressure[RCId] < Cost) { 02089 // Register pressure tracking is imprecise. This can happen. But we try 02090 // hard not to let it happen because it likely results in poor scheduling. 02091 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 02092 RegPressure[RCId] = 0; 02093 } 02094 else { 02095 RegPressure[RCId] -= Cost; 02096 } 02097 } 02098 dumpRegPressure(); 02099 } 02100 02101 void RegReductionPQBase::unscheduledNode(SUnit *SU) { 02102 if (!TracksRegPressure) 02103 return; 02104 02105 const SDNode *N = SU->getNode(); 02106 if (!N) return; 02107 02108 if (!N->isMachineOpcode()) { 02109 if (N->getOpcode() != ISD::CopyToReg) 02110 return; 02111 } else { 02112 unsigned Opc = N->getMachineOpcode(); 02113 if (Opc == TargetOpcode::EXTRACT_SUBREG || 02114 Opc == TargetOpcode::INSERT_SUBREG || 02115 Opc == TargetOpcode::SUBREG_TO_REG || 02116 Opc == TargetOpcode::REG_SEQUENCE || 02117 Opc == TargetOpcode::IMPLICIT_DEF) 02118 return; 02119 } 02120 02121 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02122 I != E; ++I) { 02123 if (I->isCtrl()) 02124 continue; 02125 SUnit *PredSU = I->getSUnit(); 02126 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 02127 // counts data deps. 02128 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 02129 continue; 02130 const SDNode *PN = PredSU->getNode(); 02131 if (!PN->isMachineOpcode()) { 02132 if (PN->getOpcode() == ISD::CopyFromReg) { 02133 MVT VT = PN->getSimpleValueType(0); 02134 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02135 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 02136 } 02137 continue; 02138 } 02139 unsigned POpc = PN->getMachineOpcode(); 02140 if (POpc == TargetOpcode::IMPLICIT_DEF) 02141 continue; 02142 if (POpc == TargetOpcode::EXTRACT_SUBREG || 02143 POpc == TargetOpcode::INSERT_SUBREG || 02144 POpc == TargetOpcode::SUBREG_TO_REG) { 02145 MVT VT = PN->getSimpleValueType(0); 02146 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02147 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 02148 continue; 02149 } 02150 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 02151 for (unsigned i = 0; i != NumDefs; ++i) { 02152 MVT VT = PN->getSimpleValueType(i); 02153 if (!PN->hasAnyUseOfValue(i)) 02154 continue; 02155 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02156 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 02157 // Register pressure tracking is imprecise. This can happen. 02158 RegPressure[RCId] = 0; 02159 else 02160 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 02161 } 02162 } 02163 02164 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 02165 // may transfer data dependencies to CopyToReg. 02166 if (SU->NumSuccs && N->isMachineOpcode()) { 02167 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 02168 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 02169 MVT VT = N->getSimpleValueType(i); 02170 if (VT == MVT::Glue || VT == MVT::Other) 02171 continue; 02172 if (!N->hasAnyUseOfValue(i)) 02173 continue; 02174 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02175 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 02176 } 02177 } 02178 02179 dumpRegPressure(); 02180 } 02181 02182 //===----------------------------------------------------------------------===// 02183 // Dynamic Node Priority for Register Pressure Reduction 02184 //===----------------------------------------------------------------------===// 02185 02186 /// closestSucc - Returns the scheduled cycle of the successor which is 02187 /// closest to the current cycle. 02188 static unsigned closestSucc(const SUnit *SU) { 02189 unsigned MaxHeight = 0; 02190 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 02191 I != E; ++I) { 02192 if (I->isCtrl()) continue; // ignore chain succs 02193 unsigned Height = I->getSUnit()->getHeight(); 02194 // If there are bunch of CopyToRegs stacked up, they should be considered 02195 // to be at the same position. 02196 if (I->getSUnit()->getNode() && 02197 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 02198 Height = closestSucc(I->getSUnit())+1; 02199 if (Height > MaxHeight) 02200 MaxHeight = Height; 02201 } 02202 return MaxHeight; 02203 } 02204 02205 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 02206 /// for scratch registers, i.e. number of data dependencies. 02207 static unsigned calcMaxScratches(const SUnit *SU) { 02208 unsigned Scratches = 0; 02209 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02210 I != E; ++I) { 02211 if (I->isCtrl()) continue; // ignore chain preds 02212 Scratches++; 02213 } 02214 return Scratches; 02215 } 02216 02217 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 02218 /// CopyFromReg from a virtual register. 02219 static bool hasOnlyLiveInOpers(const SUnit *SU) { 02220 bool RetVal = false; 02221 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02222 I != E; ++I) { 02223 if (I->isCtrl()) continue; 02224 const SUnit *PredSU = I->getSUnit(); 02225 if (PredSU->getNode() && 02226 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 02227 unsigned Reg = 02228 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 02229 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 02230 RetVal = true; 02231 continue; 02232 } 02233 } 02234 return false; 02235 } 02236 return RetVal; 02237 } 02238 02239 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 02240 /// CopyToReg to a virtual register. This SU def is probably a liveout and 02241 /// it has no other use. It should be scheduled closer to the terminator. 02242 static bool hasOnlyLiveOutUses(const SUnit *SU) { 02243 bool RetVal = false; 02244 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 02245 I != E; ++I) { 02246 if (I->isCtrl()) continue; 02247 const SUnit *SuccSU = I->getSUnit(); 02248 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 02249 unsigned Reg = 02250 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 02251 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 02252 RetVal = true; 02253 continue; 02254 } 02255 } 02256 return false; 02257 } 02258 return RetVal; 02259 } 02260 02261 // Set isVRegCycle for a node with only live in opers and live out uses. Also 02262 // set isVRegCycle for its CopyFromReg operands. 02263 // 02264 // This is only relevant for single-block loops, in which case the VRegCycle 02265 // node is likely an induction variable in which the operand and target virtual 02266 // registers should be coalesced (e.g. pre/post increment values). Setting the 02267 // isVRegCycle flag helps the scheduler prioritize other uses of the same 02268 // CopyFromReg so that this node becomes the virtual register "kill". This 02269 // avoids interference between the values live in and out of the block and 02270 // eliminates a copy inside the loop. 02271 static void initVRegCycle(SUnit *SU) { 02272 if (DisableSchedVRegCycle) 02273 return; 02274 02275 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 02276 return; 02277 02278 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 02279 02280 SU->isVRegCycle = true; 02281 02282 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02283 I != E; ++I) { 02284 if (I->isCtrl()) continue; 02285 I->getSUnit()->isVRegCycle = true; 02286 } 02287 } 02288 02289 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 02290 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 02291 static void resetVRegCycle(SUnit *SU) { 02292 if (!SU->isVRegCycle) 02293 return; 02294 02295 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 02296 I != E; ++I) { 02297 if (I->isCtrl()) continue; // ignore chain preds 02298 SUnit *PredSU = I->getSUnit(); 02299 if (PredSU->isVRegCycle) { 02300 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 02301 "VRegCycle def must be CopyFromReg"); 02302 I->getSUnit()->isVRegCycle = 0; 02303 } 02304 } 02305 } 02306 02307 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 02308 // means a node that defines the VRegCycle has not been scheduled yet. 02309 static bool hasVRegCycleUse(const SUnit *SU) { 02310 // If this SU also defines the VReg, don't hoist it as a "use". 02311 if (SU->isVRegCycle) 02312 return false; 02313 02314 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 02315 I != E; ++I) { 02316 if (I->isCtrl()) continue; // ignore chain preds 02317 if (I->getSUnit()->isVRegCycle && 02318 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 02319 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 02320 return true; 02321 } 02322 } 02323 return false; 02324 } 02325 02326 // Check for either a dependence (latency) or resource (hazard) stall. 02327 // 02328 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 02329 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 02330 if ((int)SPQ->getCurCycle() < Height) return true; 02331 if (SPQ->getHazardRec()->getHazardType(SU, 0) 02332 != ScheduleHazardRecognizer::NoHazard) 02333 return true; 02334 return false; 02335 } 02336 02337 // Return -1 if left has higher priority, 1 if right has higher priority. 02338 // Return 0 if latency-based priority is equivalent. 02339 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 02340 RegReductionPQBase *SPQ) { 02341 // Scheduling an instruction that uses a VReg whose postincrement has not yet 02342 // been scheduled will induce a copy. Model this as an extra cycle of latency. 02343 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 02344 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 02345 int LHeight = (int)left->getHeight() + LPenalty; 02346 int RHeight = (int)right->getHeight() + RPenalty; 02347 02348 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 02349 BUHasStall(left, LHeight, SPQ); 02350 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 02351 BUHasStall(right, RHeight, SPQ); 02352 02353 // If scheduling one of the node will cause a pipeline stall, delay it. 02354 // If scheduling either one of the node will cause a pipeline stall, sort 02355 // them according to their height. 02356 if (LStall) { 02357 if (!RStall) 02358 return 1; 02359 if (LHeight != RHeight) 02360 return LHeight > RHeight ? 1 : -1; 02361 } else if (RStall) 02362 return -1; 02363 02364 // If either node is scheduling for latency, sort them by height/depth 02365 // and latency. 02366 if (!checkPref || (left->SchedulingPref == Sched::ILP || 02367 right->SchedulingPref == Sched::ILP)) { 02368 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 02369 // is enabled, grouping instructions by cycle, then its height is already 02370 // covered so only its depth matters. We also reach this point if both stall 02371 // but have the same height. 02372 if (!SPQ->getHazardRec()->isEnabled()) { 02373 if (LHeight != RHeight) 02374 return LHeight > RHeight ? 1 : -1; 02375 } 02376 int LDepth = left->getDepth() - LPenalty; 02377 int RDepth = right->getDepth() - RPenalty; 02378 if (LDepth != RDepth) { 02379 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 02380 << ") depth " << LDepth << " vs SU (" << right->NodeNum 02381 << ") depth " << RDepth << "\n"); 02382 return LDepth < RDepth ? 1 : -1; 02383 } 02384 if (left->Latency != right->Latency) 02385 return left->Latency > right->Latency ? 1 : -1; 02386 } 02387 return 0; 02388 } 02389 02390 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 02391 // Schedule physical register definitions close to their use. This is 02392 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 02393 // long as shortening physreg live ranges is generally good, we can defer 02394 // creating a subtarget hook. 02395 if (!DisableSchedPhysRegJoin) { 02396 bool LHasPhysReg = left->hasPhysRegDefs; 02397 bool RHasPhysReg = right->hasPhysRegDefs; 02398 if (LHasPhysReg != RHasPhysReg) { 02399 #ifndef NDEBUG 02400 static const char *const PhysRegMsg[] = { " has no physreg", 02401 " defines a physreg" }; 02402 #endif 02403 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 02404 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 02405 << PhysRegMsg[RHasPhysReg] << "\n"); 02406 return LHasPhysReg < RHasPhysReg; 02407 } 02408 } 02409 02410 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 02411 unsigned LPriority = SPQ->getNodePriority(left); 02412 unsigned RPriority = SPQ->getNodePriority(right); 02413 02414 // Be really careful about hoisting call operands above previous calls. 02415 // Only allows it if it would reduce register pressure. 02416 if (left->isCall && right->isCallOp) { 02417 unsigned RNumVals = right->getNode()->getNumValues(); 02418 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 02419 } 02420 if (right->isCall && left->isCallOp) { 02421 unsigned LNumVals = left->getNode()->getNumValues(); 02422 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 02423 } 02424 02425 if (LPriority != RPriority) 02426 return LPriority > RPriority; 02427 02428 // One or both of the nodes are calls and their sethi-ullman numbers are the 02429 // same, then keep source order. 02430 if (left->isCall || right->isCall) { 02431 unsigned LOrder = SPQ->getNodeOrdering(left); 02432 unsigned ROrder = SPQ->getNodeOrdering(right); 02433 02434 // Prefer an ordering where the lower the non-zero order number, the higher 02435 // the preference. 02436 if ((LOrder || ROrder) && LOrder != ROrder) 02437 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 02438 } 02439 02440 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 02441 // e.g. 02442 // t1 = op t2, c1 02443 // t3 = op t4, c2 02444 // 02445 // and the following instructions are both ready. 02446 // t2 = op c3 02447 // t4 = op c4 02448 // 02449 // Then schedule t2 = op first. 02450 // i.e. 02451 // t4 = op c4 02452 // t2 = op c3 02453 // t1 = op t2, c1 02454 // t3 = op t4, c2 02455 // 02456 // This creates more short live intervals. 02457 unsigned LDist = closestSucc(left); 02458 unsigned RDist = closestSucc(right); 02459 if (LDist != RDist) 02460 return LDist < RDist; 02461 02462 // How many registers becomes live when the node is scheduled. 02463 unsigned LScratch = calcMaxScratches(left); 02464 unsigned RScratch = calcMaxScratches(right); 02465 if (LScratch != RScratch) 02466 return LScratch > RScratch; 02467 02468 // Comparing latency against a call makes little sense unless the node 02469 // is register pressure-neutral. 02470 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 02471 return (left->NodeQueueId > right->NodeQueueId); 02472 02473 // Do not compare latencies when one or both of the nodes are calls. 02474 if (!DisableSchedCycles && 02475 !(left->isCall || right->isCall)) { 02476 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 02477 if (result != 0) 02478 return result > 0; 02479 } 02480 else { 02481 if (left->getHeight() != right->getHeight()) 02482 return left->getHeight() > right->getHeight(); 02483 02484 if (left->getDepth() != right->getDepth()) 02485 return left->getDepth() < right->getDepth(); 02486 } 02487 02488 assert(left->NodeQueueId && right->NodeQueueId && 02489 "NodeQueueId cannot be zero"); 02490 return (left->NodeQueueId > right->NodeQueueId); 02491 } 02492 02493 // Bottom up 02494 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02495 if (int res = checkSpecialNodes(left, right)) 02496 return res > 0; 02497 02498 return BURRSort(left, right, SPQ); 02499 } 02500 02501 // Source order, otherwise bottom up. 02502 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02503 if (int res = checkSpecialNodes(left, right)) 02504 return res > 0; 02505 02506 unsigned LOrder = SPQ->getNodeOrdering(left); 02507 unsigned ROrder = SPQ->getNodeOrdering(right); 02508 02509 // Prefer an ordering where the lower the non-zero order number, the higher 02510 // the preference. 02511 if ((LOrder || ROrder) && LOrder != ROrder) 02512 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 02513 02514 return BURRSort(left, right, SPQ); 02515 } 02516 02517 // If the time between now and when the instruction will be ready can cover 02518 // the spill code, then avoid adding it to the ready queue. This gives long 02519 // stalls highest priority and allows hoisting across calls. It should also 02520 // speed up processing the available queue. 02521 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 02522 static const unsigned ReadyDelay = 3; 02523 02524 if (SPQ->MayReduceRegPressure(SU)) return true; 02525 02526 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 02527 02528 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 02529 != ScheduleHazardRecognizer::NoHazard) 02530 return false; 02531 02532 return true; 02533 } 02534 02535 // Return true if right should be scheduled with higher priority than left. 02536 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02537 if (int res = checkSpecialNodes(left, right)) 02538 return res > 0; 02539 02540 if (left->isCall || right->isCall) 02541 // No way to compute latency of calls. 02542 return BURRSort(left, right, SPQ); 02543 02544 bool LHigh = SPQ->HighRegPressure(left); 02545 bool RHigh = SPQ->HighRegPressure(right); 02546 // Avoid causing spills. If register pressure is high, schedule for 02547 // register pressure reduction. 02548 if (LHigh && !RHigh) { 02549 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 02550 << right->NodeNum << ")\n"); 02551 return true; 02552 } 02553 else if (!LHigh && RHigh) { 02554 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 02555 << left->NodeNum << ")\n"); 02556 return false; 02557 } 02558 if (!LHigh && !RHigh) { 02559 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 02560 if (result != 0) 02561 return result > 0; 02562 } 02563 return BURRSort(left, right, SPQ); 02564 } 02565 02566 // Schedule as many instructions in each cycle as possible. So don't make an 02567 // instruction available unless it is ready in the current cycle. 02568 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 02569 if (SU->getHeight() > CurCycle) return false; 02570 02571 if (SPQ->getHazardRec()->getHazardType(SU, 0) 02572 != ScheduleHazardRecognizer::NoHazard) 02573 return false; 02574 02575 return true; 02576 } 02577 02578 static bool canEnableCoalescing(SUnit *SU) { 02579 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 02580 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 02581 // CopyToReg should be close to its uses to facilitate coalescing and 02582 // avoid spilling. 02583 return true; 02584 02585 if (Opc == TargetOpcode::EXTRACT_SUBREG || 02586 Opc == TargetOpcode::SUBREG_TO_REG || 02587 Opc == TargetOpcode::INSERT_SUBREG) 02588 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 02589 // close to their uses to facilitate coalescing. 02590 return true; 02591 02592 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 02593 // If SU does not have a register def, schedule it close to its uses 02594 // because it does not lengthen any live ranges. 02595 return true; 02596 02597 return false; 02598 } 02599 02600 // list-ilp is currently an experimental scheduler that allows various 02601 // heuristics to be enabled prior to the normal register reduction logic. 02602 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02603 if (int res = checkSpecialNodes(left, right)) 02604 return res > 0; 02605 02606 if (left->isCall || right->isCall) 02607 // No way to compute latency of calls. 02608 return BURRSort(left, right, SPQ); 02609 02610 unsigned LLiveUses = 0, RLiveUses = 0; 02611 int LPDiff = 0, RPDiff = 0; 02612 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 02613 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 02614 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 02615 } 02616 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 02617 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 02618 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 02619 return LPDiff > RPDiff; 02620 } 02621 02622 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 02623 bool LReduce = canEnableCoalescing(left); 02624 bool RReduce = canEnableCoalescing(right); 02625 if (LReduce && !RReduce) return false; 02626 if (RReduce && !LReduce) return true; 02627 } 02628 02629 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 02630 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 02631 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 02632 return LLiveUses < RLiveUses; 02633 } 02634 02635 if (!DisableSchedStalls) { 02636 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 02637 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 02638 if (LStall != RStall) 02639 return left->getHeight() > right->getHeight(); 02640 } 02641 02642 if (!DisableSchedCriticalPath) { 02643 int spread = (int)left->getDepth() - (int)right->getDepth(); 02644 if (std::abs(spread) > MaxReorderWindow) { 02645 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 02646 << left->getDepth() << " != SU(" << right->NodeNum << "): " 02647 << right->getDepth() << "\n"); 02648 return left->getDepth() < right->getDepth(); 02649 } 02650 } 02651 02652 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 02653 int spread = (int)left->getHeight() - (int)right->getHeight(); 02654 if (std::abs(spread) > MaxReorderWindow) 02655 return left->getHeight() > right->getHeight(); 02656 } 02657 02658 return BURRSort(left, right, SPQ); 02659 } 02660 02661 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 02662 SUnits = &sunits; 02663 // Add pseudo dependency edges for two-address nodes. 02664 if (!Disable2AddrHack) 02665 AddPseudoTwoAddrDeps(); 02666 // Reroute edges to nodes with multiple uses. 02667 if (!TracksRegPressure && !SrcOrder) 02668 PrescheduleNodesWithMultipleUses(); 02669 // Calculate node priorities. 02670 CalculateSethiUllmanNumbers(); 02671 02672 // For single block loops, mark nodes that look like canonical IV increments. 02673 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 02674 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 02675 initVRegCycle(&sunits[i]); 02676 } 02677 } 02678 } 02679 02680 //===----------------------------------------------------------------------===// 02681 // Preschedule for Register Pressure 02682 //===----------------------------------------------------------------------===// 02683 02684 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 02685 if (SU->isTwoAddress) { 02686 unsigned Opc = SU->getNode()->getMachineOpcode(); 02687 const MCInstrDesc &MCID = TII->get(Opc); 02688 unsigned NumRes = MCID.getNumDefs(); 02689 unsigned NumOps = MCID.getNumOperands() - NumRes; 02690 for (unsigned i = 0; i != NumOps; ++i) { 02691 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 02692 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 02693 if (DU->getNodeId() != -1 && 02694 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 02695 return true; 02696 } 02697 } 02698 } 02699 return false; 02700 } 02701 02702 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 02703 /// successor's explicit physregs whose definition can reach DepSU. 02704 /// i.e. DepSU should not be scheduled above SU. 02705 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 02706 ScheduleDAGRRList *scheduleDAG, 02707 const TargetInstrInfo *TII, 02708 const TargetRegisterInfo *TRI) { 02709 const uint16_t *ImpDefs 02710 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 02711 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 02712 if(!ImpDefs && !RegMask) 02713 return false; 02714 02715 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 02716 SI != SE; ++SI) { 02717 SUnit *SuccSU = SI->getSUnit(); 02718 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 02719 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 02720 if (!PI->isAssignedRegDep()) 02721 continue; 02722 02723 if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && 02724 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 02725 return true; 02726 02727 if (ImpDefs) 02728 for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) 02729 // Return true if SU clobbers this physical register use and the 02730 // definition of the register reaches from DepSU. IsReachable queries 02731 // a topological forward sort of the DAG (following the successors). 02732 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 02733 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 02734 return true; 02735 } 02736 } 02737 return false; 02738 } 02739 02740 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 02741 /// physical register defs. 02742 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 02743 const TargetInstrInfo *TII, 02744 const TargetRegisterInfo *TRI) { 02745 SDNode *N = SuccSU->getNode(); 02746 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 02747 const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 02748 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 02749 for (const SDNode *SUNode = SU->getNode(); SUNode; 02750 SUNode = SUNode->getGluedNode()) { 02751 if (!SUNode->isMachineOpcode()) 02752 continue; 02753 const uint16_t *SUImpDefs = 02754 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 02755 const uint32_t *SURegMask = getNodeRegMask(SUNode); 02756 if (!SUImpDefs && !SURegMask) 02757 continue; 02758 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 02759 EVT VT = N->getValueType(i); 02760 if (VT == MVT::Glue || VT == MVT::Other) 02761 continue; 02762 if (!N->hasAnyUseOfValue(i)) 02763 continue; 02764 unsigned Reg = ImpDefs[i - NumDefs]; 02765 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 02766 return true; 02767 if (!SUImpDefs) 02768 continue; 02769 for (;*SUImpDefs; ++SUImpDefs) { 02770 unsigned SUReg = *SUImpDefs; 02771 if (TRI->regsOverlap(Reg, SUReg)) 02772 return true; 02773 } 02774 } 02775 } 02776 return false; 02777 } 02778 02779 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 02780 /// are not handled well by the general register pressure reduction 02781 /// heuristics. When presented with code like this: 02782 /// 02783 /// N 02784 /// / | 02785 /// / | 02786 /// U store 02787 /// | 02788 /// ... 02789 /// 02790 /// the heuristics tend to push the store up, but since the 02791 /// operand of the store has another use (U), this would increase 02792 /// the length of that other use (the U->N edge). 02793 /// 02794 /// This function transforms code like the above to route U's 02795 /// dependence through the store when possible, like this: 02796 /// 02797 /// N 02798 /// || 02799 /// || 02800 /// store 02801 /// | 02802 /// U 02803 /// | 02804 /// ... 02805 /// 02806 /// This results in the store being scheduled immediately 02807 /// after N, which shortens the U->N live range, reducing 02808 /// register pressure. 02809 /// 02810 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 02811 // Visit all the nodes in topological order, working top-down. 02812 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 02813 SUnit *SU = &(*SUnits)[i]; 02814 // For now, only look at nodes with no data successors, such as stores. 02815 // These are especially important, due to the heuristics in 02816 // getNodePriority for nodes with no data successors. 02817 if (SU->NumSuccs != 0) 02818 continue; 02819 // For now, only look at nodes with exactly one data predecessor. 02820 if (SU->NumPreds != 1) 02821 continue; 02822 // Avoid prescheduling copies to virtual registers, which don't behave 02823 // like other nodes from the perspective of scheduling heuristics. 02824 if (SDNode *N = SU->getNode()) 02825 if (N->getOpcode() == ISD::CopyToReg && 02826 TargetRegisterInfo::isVirtualRegister 02827 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 02828 continue; 02829 02830 // Locate the single data predecessor. 02831 SUnit *PredSU = nullptr; 02832 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 02833 EE = SU->Preds.end(); II != EE; ++II) 02834 if (!II->isCtrl()) { 02835 PredSU = II->getSUnit(); 02836 break; 02837 } 02838 assert(PredSU); 02839 02840 // Don't rewrite edges that carry physregs, because that requires additional 02841 // support infrastructure. 02842 if (PredSU->hasPhysRegDefs) 02843 continue; 02844 // Short-circuit the case where SU is PredSU's only data successor. 02845 if (PredSU->NumSuccs == 1) 02846 continue; 02847 // Avoid prescheduling to copies from virtual registers, which don't behave 02848 // like other nodes from the perspective of scheduling heuristics. 02849 if (SDNode *N = SU->getNode()) 02850 if (N->getOpcode() == ISD::CopyFromReg && 02851 TargetRegisterInfo::isVirtualRegister 02852 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 02853 continue; 02854 02855 // Perform checks on the successors of PredSU. 02856 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 02857 EE = PredSU->Succs.end(); II != EE; ++II) { 02858 SUnit *PredSuccSU = II->getSUnit(); 02859 if (PredSuccSU == SU) continue; 02860 // If PredSU has another successor with no data successors, for 02861 // now don't attempt to choose either over the other. 02862 if (PredSuccSU->NumSuccs == 0) 02863 goto outer_loop_continue; 02864 // Don't break physical register dependencies. 02865 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 02866 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 02867 goto outer_loop_continue; 02868 // Don't introduce graph cycles. 02869 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 02870 goto outer_loop_continue; 02871 } 02872 02873 // Ok, the transformation is safe and the heuristics suggest it is 02874 // profitable. Update the graph. 02875 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 02876 << " next to PredSU #" << PredSU->NodeNum 02877 << " to guide scheduling in the presence of multiple uses\n"); 02878 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 02879 SDep Edge = PredSU->Succs[i]; 02880 assert(!Edge.isAssignedRegDep()); 02881 SUnit *SuccSU = Edge.getSUnit(); 02882 if (SuccSU != SU) { 02883 Edge.setSUnit(PredSU); 02884 scheduleDAG->RemovePred(SuccSU, Edge); 02885 scheduleDAG->AddPred(SU, Edge); 02886 Edge.setSUnit(SU); 02887 scheduleDAG->AddPred(SuccSU, Edge); 02888 --i; 02889 } 02890 } 02891 outer_loop_continue:; 02892 } 02893 } 02894 02895 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 02896 /// it as a def&use operand. Add a pseudo control edge from it to the other 02897 /// node (if it won't create a cycle) so the two-address one will be scheduled 02898 /// first (lower in the schedule). If both nodes are two-address, favor the 02899 /// one that has a CopyToReg use (more likely to be a loop induction update). 02900 /// If both are two-address, but one is commutable while the other is not 02901 /// commutable, favor the one that's not commutable. 02902 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 02903 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 02904 SUnit *SU = &(*SUnits)[i]; 02905 if (!SU->isTwoAddress) 02906 continue; 02907 02908 SDNode *Node = SU->getNode(); 02909 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 02910 continue; 02911 02912 bool isLiveOut = hasOnlyLiveOutUses(SU); 02913 unsigned Opc = Node->getMachineOpcode(); 02914 const MCInstrDesc &MCID = TII->get(Opc); 02915 unsigned NumRes = MCID.getNumDefs(); 02916 unsigned NumOps = MCID.getNumOperands() - NumRes; 02917 for (unsigned j = 0; j != NumOps; ++j) { 02918 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 02919 continue; 02920 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 02921 if (DU->getNodeId() == -1) 02922 continue; 02923 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 02924 if (!DUSU) continue; 02925 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 02926 E = DUSU->Succs.end(); I != E; ++I) { 02927 if (I->isCtrl()) continue; 02928 SUnit *SuccSU = I->getSUnit(); 02929 if (SuccSU == SU) 02930 continue; 02931 // Be conservative. Ignore if nodes aren't at roughly the same 02932 // depth and height. 02933 if (SuccSU->getHeight() < SU->getHeight() && 02934 (SU->getHeight() - SuccSU->getHeight()) > 1) 02935 continue; 02936 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 02937 // constrains whatever is using the copy, instead of the copy 02938 // itself. In the case that the copy is coalesced, this 02939 // preserves the intent of the pseudo two-address heurietics. 02940 while (SuccSU->Succs.size() == 1 && 02941 SuccSU->getNode()->isMachineOpcode() && 02942 SuccSU->getNode()->getMachineOpcode() == 02943 TargetOpcode::COPY_TO_REGCLASS) 02944 SuccSU = SuccSU->Succs.front().getSUnit(); 02945 // Don't constrain non-instruction nodes. 02946 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 02947 continue; 02948 // Don't constrain nodes with physical register defs if the 02949 // predecessor can clobber them. 02950 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 02951 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 02952 continue; 02953 } 02954 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 02955 // these may be coalesced away. We want them close to their uses. 02956 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 02957 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 02958 SuccOpc == TargetOpcode::INSERT_SUBREG || 02959 SuccOpc == TargetOpcode::SUBREG_TO_REG) 02960 continue; 02961 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 02962 (!canClobber(SuccSU, DUSU) || 02963 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 02964 (!SU->isCommutable && SuccSU->isCommutable)) && 02965 !scheduleDAG->IsReachable(SuccSU, SU)) { 02966 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 02967 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 02968 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); 02969 } 02970 } 02971 } 02972 } 02973 } 02974 02975 //===----------------------------------------------------------------------===// 02976 // Public Constructor Functions 02977 //===----------------------------------------------------------------------===// 02978 02979 llvm::ScheduleDAGSDNodes * 02980 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 02981 CodeGenOpt::Level OptLevel) { 02982 const TargetMachine &TM = IS->TM; 02983 const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); 02984 const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); 02985 02986 BURegReductionPriorityQueue *PQ = 02987 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); 02988 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 02989 PQ->setScheduleDAG(SD); 02990 return SD; 02991 } 02992 02993 llvm::ScheduleDAGSDNodes * 02994 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 02995 CodeGenOpt::Level OptLevel) { 02996 const TargetMachine &TM = IS->TM; 02997 const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); 02998 const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); 02999 03000 SrcRegReductionPriorityQueue *PQ = 03001 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); 03002 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 03003 PQ->setScheduleDAG(SD); 03004 return SD; 03005 } 03006 03007 llvm::ScheduleDAGSDNodes * 03008 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 03009 CodeGenOpt::Level OptLevel) { 03010 const TargetMachine &TM = IS->TM; 03011 const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); 03012 const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); 03013 const TargetLowering *TLI = IS->getTargetLowering(); 03014 03015 HybridBURRPriorityQueue *PQ = 03016 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 03017 03018 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 03019 PQ->setScheduleDAG(SD); 03020 return SD; 03021 } 03022 03023 llvm::ScheduleDAGSDNodes * 03024 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 03025 CodeGenOpt::Level OptLevel) { 03026 const TargetMachine &TM = IS->TM; 03027 const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); 03028 const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); 03029 const TargetLowering *TLI = IS->getTargetLowering(); 03030 03031 ILPBURRPriorityQueue *PQ = 03032 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 03033 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 03034 PQ->setScheduleDAG(SD); 03035 return SD; 03036 }