xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp (revision 78cd75393ec79565c63927bf200f06f839a1dc05)
1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/VLIWMachineScheduler.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/CodeGen/DFAPacketizer.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/RegisterClassInfo.h"
22 #include "llvm/CodeGen/RegisterPressure.h"
23 #include "llvm/CodeGen/ScheduleDAG.h"
24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSchedule.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <cassert>
35 #include <iomanip>
36 #include <limits>
37 #include <memory>
38 #include <sstream>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "machine-scheduler"
43 
44 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
45                                          cl::init(false));
46 
47 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
48                                        cl::init(true));
49 
50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
51                                                 cl::Hidden, cl::init(1));
52 
53 // Check if the scheduler should penalize instructions that are available to
54 // early due to a zero-latency dependence.
55 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
56                                      cl::init(true));
57 
58 // This value is used to determine if a register class is a high pressure set.
59 // We compute the maximum number of registers needed and divided by the total
60 // available. Then, we compare the result to this value.
61 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
62                                   cl::init(0.75f),
63                                   cl::desc("High register pressure threhold."));
64 
65 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
66                                      const TargetSchedModel *SM)
67     : TII(STI.getInstrInfo()), SchedModel(SM) {
68   ResourcesModel = createPacketizer(STI);
69 
70   // This hard requirement could be relaxed,
71   // but for now do not let it proceed.
72   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
73 
74   Packet.reserve(SchedModel->getIssueWidth());
75   Packet.clear();
76   ResourcesModel->clearResources();
77 }
78 
79 void VLIWResourceModel::reset() {
80   Packet.clear();
81   ResourcesModel->clearResources();
82 }
83 
84 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
85 
86 /// Return true if there is a dependence between SUd and SUu.
87 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
88   if (SUd->Succs.size() == 0)
89     return false;
90 
91   for (const auto &S : SUd->Succs) {
92     // Since we do not add pseudos to packets, might as well
93     // ignore order dependencies.
94     if (S.isCtrl())
95       continue;
96 
97     if (S.getSUnit() == SUu && S.getLatency() > 0)
98       return true;
99   }
100   return false;
101 }
102 
103 /// Check if scheduling of this SU is possible
104 /// in the current packet.
105 /// It is _not_ precise (statefull), it is more like
106 /// another heuristic. Many corner cases are figured
107 /// empirically.
108 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
109   if (!SU || !SU->getInstr())
110     return false;
111 
112   // First see if the pipeline could receive this instruction
113   // in the current cycle.
114   switch (SU->getInstr()->getOpcode()) {
115   default:
116     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
117       return false;
118     break;
119   case TargetOpcode::EXTRACT_SUBREG:
120   case TargetOpcode::INSERT_SUBREG:
121   case TargetOpcode::SUBREG_TO_REG:
122   case TargetOpcode::REG_SEQUENCE:
123   case TargetOpcode::IMPLICIT_DEF:
124   case TargetOpcode::COPY:
125   case TargetOpcode::INLINEASM:
126   case TargetOpcode::INLINEASM_BR:
127     break;
128   }
129 
130   // Now see if there are no other dependencies to instructions already
131   // in the packet.
132   if (IsTop) {
133     for (unsigned i = 0, e = Packet.size(); i != e; ++i)
134       if (hasDependence(Packet[i], SU))
135         return false;
136   } else {
137     for (unsigned i = 0, e = Packet.size(); i != e; ++i)
138       if (hasDependence(SU, Packet[i]))
139         return false;
140   }
141   return true;
142 }
143 
144 /// Keep track of available resources.
145 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
146   bool startNewCycle = false;
147   // Artificially reset state.
148   if (!SU) {
149     reset();
150     TotalPackets++;
151     return false;
152   }
153   // If this SU does not fit in the packet or the packet is now full
154   // start a new one.
155   if (!isResourceAvailable(SU, IsTop) ||
156       Packet.size() >= SchedModel->getIssueWidth()) {
157     reset();
158     TotalPackets++;
159     startNewCycle = true;
160   }
161 
162   switch (SU->getInstr()->getOpcode()) {
163   default:
164     ResourcesModel->reserveResources(*SU->getInstr());
165     break;
166   case TargetOpcode::EXTRACT_SUBREG:
167   case TargetOpcode::INSERT_SUBREG:
168   case TargetOpcode::SUBREG_TO_REG:
169   case TargetOpcode::REG_SEQUENCE:
170   case TargetOpcode::IMPLICIT_DEF:
171   case TargetOpcode::KILL:
172   case TargetOpcode::CFI_INSTRUCTION:
173   case TargetOpcode::EH_LABEL:
174   case TargetOpcode::COPY:
175   case TargetOpcode::INLINEASM:
176   case TargetOpcode::INLINEASM_BR:
177     break;
178   }
179   Packet.push_back(SU);
180 
181 #ifndef NDEBUG
182   LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
183   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
184     LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
185     LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
186     LLVM_DEBUG(Packet[i]->getInstr()->dump());
187   }
188 #endif
189 
190   return startNewCycle;
191 }
192 
193 DFAPacketizer *
194 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
195   return STI.getInstrInfo()->CreateTargetScheduleState(STI);
196 }
197 
198 /// schedule - Called back from MachineScheduler::runOnMachineFunction
199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
200 /// only includes instructions that have DAG nodes, not scheduling boundaries.
201 void VLIWMachineScheduler::schedule() {
202   LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
203                     << printMBBReference(*BB) << " " << BB->getName()
204                     << " in_func " << BB->getParent()->getName()
205                     << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
206 
207   buildDAGWithRegPressure();
208 
209   Topo.InitDAGTopologicalSorting();
210 
211   // Postprocess the DAG to add platform-specific artificial dependencies.
212   postProcessDAG();
213 
214   SmallVector<SUnit *, 8> TopRoots, BotRoots;
215   findRootsAndBiasEdges(TopRoots, BotRoots);
216 
217   // Initialize the strategy before modifying the DAG.
218   SchedImpl->initialize(this);
219 
220   LLVM_DEBUG({
221     unsigned maxH = 0;
222     for (const SUnit &SU : SUnits)
223       if (SU.getHeight() > maxH)
224         maxH = SU.getHeight();
225     dbgs() << "Max Height " << maxH << "\n";
226   });
227   LLVM_DEBUG({
228     unsigned maxD = 0;
229     for (const SUnit &SU : SUnits)
230       if (SU.getDepth() > maxD)
231         maxD = SU.getDepth();
232     dbgs() << "Max Depth " << maxD << "\n";
233   });
234   LLVM_DEBUG(dump());
235   if (ViewMISchedDAGs)
236     viewGraph();
237 
238   initQueues(TopRoots, BotRoots);
239 
240   bool IsTopNode = false;
241   while (true) {
242     LLVM_DEBUG(
243         dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
244     SUnit *SU = SchedImpl->pickNode(IsTopNode);
245     if (!SU)
246       break;
247 
248     if (!checkSchedLimit())
249       break;
250 
251     scheduleMI(SU, IsTopNode);
252 
253     // Notify the scheduling strategy after updating the DAG.
254     SchedImpl->schedNode(SU, IsTopNode);
255 
256     updateQueues(SU, IsTopNode);
257   }
258   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
259 
260   placeDebugValues();
261 
262   LLVM_DEBUG({
263     dbgs() << "*** Final schedule for "
264            << printMBBReference(*begin()->getParent()) << " ***\n";
265     dumpSchedule();
266     dbgs() << '\n';
267   });
268 }
269 
270 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
271   DAG = static_cast<VLIWMachineScheduler *>(dag);
272   SchedModel = DAG->getSchedModel();
273 
274   Top.init(DAG, SchedModel);
275   Bot.init(DAG, SchedModel);
276 
277   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
278   // are disabled, then these HazardRecs will be disabled.
279   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
280   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
281   const TargetInstrInfo *TII = STI.getInstrInfo();
282   delete Top.HazardRec;
283   delete Bot.HazardRec;
284   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
285   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
286 
287   delete Top.ResourceModel;
288   delete Bot.ResourceModel;
289   Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
290   Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
291 
292   const std::vector<unsigned> &MaxPressure =
293       DAG->getRegPressure().MaxSetPressure;
294   HighPressureSets.assign(MaxPressure.size(), false);
295   for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
296     unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
297     HighPressureSets[i] =
298         ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
299   }
300 
301   assert((!ForceTopDown || !ForceBottomUp) &&
302          "-misched-topdown incompatible with -misched-bottomup");
303 }
304 
305 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
306     const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
307   return new VLIWResourceModel(STI, SchedModel);
308 }
309 
310 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
311   for (const SDep &PI : SU->Preds) {
312     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
313     unsigned MinLatency = PI.getLatency();
314 #ifndef NDEBUG
315     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
316 #endif
317     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
318       SU->TopReadyCycle = PredReadyCycle + MinLatency;
319   }
320 
321   if (!SU->isScheduled)
322     Top.releaseNode(SU, SU->TopReadyCycle);
323 }
324 
325 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
326   assert(SU->getInstr() && "Scheduled SUnit must have instr");
327 
328   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
329        ++I) {
330     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
331     unsigned MinLatency = I->getLatency();
332 #ifndef NDEBUG
333     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
334 #endif
335     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
336       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
337   }
338 
339   if (!SU->isScheduled)
340     Bot.releaseNode(SU, SU->BotReadyCycle);
341 }
342 
343 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
344   delete ResourceModel;
345   delete HazardRec;
346 }
347 
348 /// Does this SU have a hazard within the current instruction group.
349 ///
350 /// The scheduler supports two modes of hazard recognition. The first is the
351 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
352 /// supports highly complicated in-order reservation tables
353 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
354 ///
355 /// The second is a streamlined mechanism that checks for hazards based on
356 /// simple counters that the scheduler itself maintains. It explicitly checks
357 /// for instruction dispatch limitations, including the number of micro-ops that
358 /// can dispatch per cycle.
359 ///
360 /// TODO: Also check whether the SU must start a new group.
361 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
362   if (HazardRec->isEnabled())
363     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
364 
365   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
366   if (IssueCount + uops > SchedModel->getIssueWidth())
367     return true;
368 
369   return false;
370 }
371 
372 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
373     SUnit *SU, unsigned ReadyCycle) {
374   if (ReadyCycle < MinReadyCycle)
375     MinReadyCycle = ReadyCycle;
376 
377   // Check for interlocks first. For the purpose of other heuristics, an
378   // instruction that cannot issue appears as if it's not in the ReadyQueue.
379   if (ReadyCycle > CurrCycle || checkHazard(SU))
380 
381     Pending.push(SU);
382   else
383     Available.push(SU);
384 }
385 
386 /// Move the boundary of scheduled code by one cycle.
387 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
388   unsigned Width = SchedModel->getIssueWidth();
389   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
390 
391   assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
392          "MinReadyCycle uninitialized");
393   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
394 
395   if (!HazardRec->isEnabled()) {
396     // Bypass HazardRec virtual calls.
397     CurrCycle = NextCycle;
398   } else {
399     // Bypass getHazardType calls in case of long latency.
400     for (; CurrCycle != NextCycle; ++CurrCycle) {
401       if (isTop())
402         HazardRec->AdvanceCycle();
403       else
404         HazardRec->RecedeCycle();
405     }
406   }
407   CheckPending = true;
408 
409   LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
410                     << CurrCycle << '\n');
411 }
412 
413 /// Move the boundary of scheduled code by one SUnit.
414 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
415   bool startNewCycle = false;
416 
417   // Update the reservation table.
418   if (HazardRec->isEnabled()) {
419     if (!isTop() && SU->isCall) {
420       // Calls are scheduled with their preceding instructions. For bottom-up
421       // scheduling, clear the pipeline state before emitting.
422       HazardRec->Reset();
423     }
424     HazardRec->EmitInstruction(SU);
425   }
426 
427   // Update DFA model.
428   startNewCycle = ResourceModel->reserveResources(SU, isTop());
429 
430   // Check the instruction group dispatch limit.
431   // TODO: Check if this SU must end a dispatch group.
432   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
433   if (startNewCycle) {
434     LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
435     bumpCycle();
436   } else
437     LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
438                       << CurrCycle << '\n');
439 }
440 
441 /// Release pending ready nodes in to the available queue. This makes them
442 /// visible to heuristics.
443 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
444   // If the available queue is empty, it is safe to reset MinReadyCycle.
445   if (Available.empty())
446     MinReadyCycle = std::numeric_limits<unsigned>::max();
447 
448   // Check to see if any of the pending instructions are ready to issue.  If
449   // so, add them to the available queue.
450   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
451     SUnit *SU = *(Pending.begin() + i);
452     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
453 
454     if (ReadyCycle < MinReadyCycle)
455       MinReadyCycle = ReadyCycle;
456 
457     if (ReadyCycle > CurrCycle)
458       continue;
459 
460     if (checkHazard(SU))
461       continue;
462 
463     Available.push(SU);
464     Pending.remove(Pending.begin() + i);
465     --i;
466     --e;
467   }
468   CheckPending = false;
469 }
470 
471 /// Remove SU from the ready set for this boundary.
472 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
473   if (Available.isInQueue(SU))
474     Available.remove(Available.find(SU));
475   else {
476     assert(Pending.isInQueue(SU) && "bad ready count");
477     Pending.remove(Pending.find(SU));
478   }
479 }
480 
481 /// If this queue only has one ready candidate, return it. As a side effect,
482 /// advance the cycle until at least one node is ready. If multiple instructions
483 /// are ready, return NULL.
484 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
485   if (CheckPending)
486     releasePending();
487 
488   auto AdvanceCycle = [this]() {
489     if (Available.empty())
490       return true;
491     if (Available.size() == 1 && Pending.size() > 0)
492       return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
493              getWeakLeft(*Available.begin(), isTop()) != 0;
494     return false;
495   };
496   for (unsigned i = 0; AdvanceCycle(); ++i) {
497     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
498            "permanent hazard");
499     (void)i;
500     ResourceModel->reserveResources(nullptr, isTop());
501     bumpCycle();
502     releasePending();
503   }
504   if (Available.size() == 1)
505     return *Available.begin();
506   return nullptr;
507 }
508 
509 #ifndef NDEBUG
510 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
511                                              const ReadyQueue &Q, SUnit *SU,
512                                              int Cost, PressureChange P) {
513   dbgs() << Label << " " << Q.getName() << " ";
514   if (P.isValid())
515     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
516            << P.getUnitInc() << " ";
517   else
518     dbgs() << "     ";
519   dbgs() << "cost(" << Cost << ")\t";
520   DAG->dumpNode(*SU);
521 }
522 
523 // Very detailed queue dump, to be used with higher verbosity levels.
524 void ConvergingVLIWScheduler::readyQueueVerboseDump(
525     const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
526     ReadyQueue &Q) {
527   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
528 
529   dbgs() << ">>> " << Q.getName() << "\n";
530   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
531     RegPressureDelta RPDelta;
532     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
533                                     DAG->getRegionCriticalPSets(),
534                                     DAG->getRegPressure().MaxSetPressure);
535     std::stringstream dbgstr;
536     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
537     dbgs() << dbgstr.str();
538     SchedulingCost(Q, *I, Candidate, RPDelta, true);
539     dbgs() << "\t";
540     (*I)->getInstr()->dump();
541   }
542   dbgs() << "\n";
543 }
544 #endif
545 
546 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
547 /// of SU, return true (we may have duplicates)
548 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
549   if (SU->NumPredsLeft == 0)
550     return false;
551 
552   for (auto &Pred : SU->Preds) {
553     // We found an available, but not scheduled, predecessor.
554     if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
555       return false;
556   }
557 
558   return true;
559 }
560 
561 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
562 /// of SU, return true (we may have duplicates)
563 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
564   if (SU->NumSuccsLeft == 0)
565     return false;
566 
567   for (auto &Succ : SU->Succs) {
568     // We found an available, but not scheduled, successor.
569     if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
570       return false;
571   }
572   return true;
573 }
574 
575 /// Check if the instruction changes the register pressure of a register in the
576 /// high pressure set. The function returns a negative value if the pressure
577 /// decreases and a positive value is the pressure increases. If the instruction
578 /// doesn't use a high pressure register or doesn't change the register
579 /// pressure, then return 0.
580 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
581   PressureDiff &PD = DAG->getPressureDiff(SU);
582   for (const auto &P : PD) {
583     if (!P.isValid())
584       continue;
585     // The pressure differences are computed bottom-up, so the comparison for
586     // an increase is positive in the bottom direction, but negative in the
587     //  top-down direction.
588     if (HighPressureSets[P.getPSet()])
589       return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
590   }
591   return 0;
592 }
593 
594 /// Single point to compute overall scheduling cost.
595 /// TODO: More heuristics will be used soon.
596 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
597                                             SchedCandidate &Candidate,
598                                             RegPressureDelta &Delta,
599                                             bool verbose) {
600   // Initial trivial priority.
601   int ResCount = 1;
602 
603   // Do not waste time on a node that is already scheduled.
604   if (!SU || SU->isScheduled)
605     return ResCount;
606 
607   LLVM_DEBUG(if (verbose) dbgs()
608              << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
609   // Forced priority is high.
610   if (SU->isScheduleHigh) {
611     ResCount += PriorityOne;
612     LLVM_DEBUG(dbgs() << "H|");
613   }
614 
615   unsigned IsAvailableAmt = 0;
616   // Critical path first.
617   if (Q.getID() == TopQID) {
618     if (Top.isLatencyBound(SU)) {
619       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
620       ResCount += (SU->getHeight() * ScaleTwo);
621     }
622 
623     LLVM_DEBUG(if (verbose) {
624       std::stringstream dbgstr;
625       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
626       dbgs() << dbgstr.str();
627     });
628 
629     // If resources are available for it, multiply the
630     // chance of scheduling.
631     if (Top.ResourceModel->isResourceAvailable(SU, true)) {
632       IsAvailableAmt = (PriorityTwo + PriorityThree);
633       ResCount += IsAvailableAmt;
634       LLVM_DEBUG(if (verbose) dbgs() << "A|");
635     } else
636       LLVM_DEBUG(if (verbose) dbgs() << " |");
637   } else {
638     if (Bot.isLatencyBound(SU)) {
639       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
640       ResCount += (SU->getDepth() * ScaleTwo);
641     }
642 
643     LLVM_DEBUG(if (verbose) {
644       std::stringstream dbgstr;
645       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
646       dbgs() << dbgstr.str();
647     });
648 
649     // If resources are available for it, multiply the
650     // chance of scheduling.
651     if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
652       IsAvailableAmt = (PriorityTwo + PriorityThree);
653       ResCount += IsAvailableAmt;
654       LLVM_DEBUG(if (verbose) dbgs() << "A|");
655     } else
656       LLVM_DEBUG(if (verbose) dbgs() << " |");
657   }
658 
659   unsigned NumNodesBlocking = 0;
660   if (Q.getID() == TopQID) {
661     // How many SUs does it block from scheduling?
662     // Look at all of the successors of this node.
663     // Count the number of nodes that
664     // this node is the sole unscheduled node for.
665     if (Top.isLatencyBound(SU))
666       for (const SDep &SI : SU->Succs)
667         if (isSingleUnscheduledPred(SI.getSUnit(), SU))
668           ++NumNodesBlocking;
669   } else {
670     // How many unscheduled predecessors block this node?
671     if (Bot.isLatencyBound(SU))
672       for (const SDep &PI : SU->Preds)
673         if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
674           ++NumNodesBlocking;
675   }
676   ResCount += (NumNodesBlocking * ScaleTwo);
677 
678   LLVM_DEBUG(if (verbose) {
679     std::stringstream dbgstr;
680     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
681     dbgs() << dbgstr.str();
682   });
683 
684   // Factor in reg pressure as a heuristic.
685   if (!IgnoreBBRegPressure) {
686     // Decrease priority by the amount that register pressure exceeds the limit.
687     ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
688     // Decrease priority if register pressure exceeds the limit.
689     ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
690     // Decrease priority slightly if register pressure would increase over the
691     // current maximum.
692     ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
693     // If there are register pressure issues, then we remove the value added for
694     // the instruction being available. The rationale is that we really don't
695     // want to schedule an instruction that causes a spill.
696     if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
697         (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
698          Delta.CurrentMax.getUnitInc()))
699       ResCount -= IsAvailableAmt;
700     LLVM_DEBUG(if (verbose) {
701       dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
702              << Delta.CriticalMax.getUnitInc() << "/"
703              << Delta.CurrentMax.getUnitInc() << ")|";
704     });
705   }
706 
707   // Give preference to a zero latency instruction if the dependent
708   // instruction is in the current packet.
709   if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
710     for (const SDep &PI : SU->Preds) {
711       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
712           PI.getLatency() == 0 &&
713           Top.ResourceModel->isInPacket(PI.getSUnit())) {
714         ResCount += PriorityThree;
715         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
716       }
717     }
718   } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
719     for (const SDep &SI : SU->Succs) {
720       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
721           SI.getLatency() == 0 &&
722           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
723         ResCount += PriorityThree;
724         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
725       }
726     }
727   }
728 
729   // If the instruction has a non-zero latency dependence with an instruction in
730   // the current packet, then it should not be scheduled yet. The case occurs
731   // when the dependent instruction is scheduled in a new packet, so the
732   // scheduler updates the current cycle and pending instructions become
733   // available.
734   if (CheckEarlyAvail) {
735     if (Q.getID() == TopQID) {
736       for (const auto &PI : SU->Preds) {
737         if (PI.getLatency() > 0 &&
738             Top.ResourceModel->isInPacket(PI.getSUnit())) {
739           ResCount -= PriorityOne;
740           LLVM_DEBUG(if (verbose) dbgs() << "D|");
741         }
742       }
743     } else {
744       for (const auto &SI : SU->Succs) {
745         if (SI.getLatency() > 0 &&
746             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
747           ResCount -= PriorityOne;
748           LLVM_DEBUG(if (verbose) dbgs() << "D|");
749         }
750       }
751     }
752   }
753 
754   LLVM_DEBUG(if (verbose) {
755     std::stringstream dbgstr;
756     dbgstr << "Total " << std::setw(4) << ResCount << ")";
757     dbgs() << dbgstr.str();
758   });
759 
760   return ResCount;
761 }
762 
763 /// Pick the best candidate from the top queue.
764 ///
765 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
766 /// DAG building. To adjust for the current scheduling location we need to
767 /// maintain the number of vreg uses remaining to be top-scheduled.
768 ConvergingVLIWScheduler::CandResult
769 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
770                                            const RegPressureTracker &RPTracker,
771                                            SchedCandidate &Candidate) {
772   ReadyQueue &Q = Zone.Available;
773   LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
774                  readyQueueVerboseDump(RPTracker, Candidate, Q);
775              else Q.dump(););
776 
777   // getMaxPressureDelta temporarily modifies the tracker.
778   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
779 
780   // BestSU remains NULL if no top candidates beat the best existing candidate.
781   CandResult FoundCandidate = NoCand;
782   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
783     RegPressureDelta RPDelta;
784     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
785                                     DAG->getRegionCriticalPSets(),
786                                     DAG->getRegPressure().MaxSetPressure);
787 
788     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
789 
790     // Initialize the candidate if needed.
791     if (!Candidate.SU) {
792       LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
793       Candidate.SU = *I;
794       Candidate.RPDelta = RPDelta;
795       Candidate.SCost = CurrentCost;
796       FoundCandidate = NodeOrder;
797       continue;
798     }
799 
800     // Choose node order for negative cost candidates. There is no good
801     // candidate in this case.
802     if (CurrentCost < 0 && Candidate.SCost < 0) {
803       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
804           (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
805         LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
806         Candidate.SU = *I;
807         Candidate.RPDelta = RPDelta;
808         Candidate.SCost = CurrentCost;
809         FoundCandidate = NodeOrder;
810       }
811       continue;
812     }
813 
814     // Best cost.
815     if (CurrentCost > Candidate.SCost) {
816       LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
817       Candidate.SU = *I;
818       Candidate.RPDelta = RPDelta;
819       Candidate.SCost = CurrentCost;
820       FoundCandidate = BestCost;
821       continue;
822     }
823 
824     // Choose an instruction that does not depend on an artificial edge.
825     unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
826     unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
827     if (CurrWeak != CandWeak) {
828       if (CurrWeak < CandWeak) {
829         LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
830         Candidate.SU = *I;
831         Candidate.RPDelta = RPDelta;
832         Candidate.SCost = CurrentCost;
833         FoundCandidate = Weak;
834       }
835       continue;
836     }
837 
838     if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
839       unsigned CurrSize, CandSize;
840       if (Q.getID() == TopQID) {
841         CurrSize = (*I)->Succs.size();
842         CandSize = Candidate.SU->Succs.size();
843       } else {
844         CurrSize = (*I)->Preds.size();
845         CandSize = Candidate.SU->Preds.size();
846       }
847       if (CurrSize > CandSize) {
848         LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
849         Candidate.SU = *I;
850         Candidate.RPDelta = RPDelta;
851         Candidate.SCost = CurrentCost;
852         FoundCandidate = BestCost;
853       }
854       // Keep the old candidate if it's a better candidate. That is, don't use
855       // the subsequent tie breaker.
856       if (CurrSize != CandSize)
857         continue;
858     }
859 
860     // Tie breaker.
861     // To avoid scheduling indeterminism, we need a tie breaker
862     // for the case when cost is identical for two nodes.
863     if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
864       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
865           (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
866         LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
867         Candidate.SU = *I;
868         Candidate.RPDelta = RPDelta;
869         Candidate.SCost = CurrentCost;
870         FoundCandidate = NodeOrder;
871         continue;
872       }
873     }
874 
875     // Fall through to original instruction order.
876     // Only consider node order if Candidate was chosen from this Q.
877     if (FoundCandidate == NoCand)
878       continue;
879   }
880   return FoundCandidate;
881 }
882 
883 /// Pick the best candidate node from either the top or bottom queue.
884 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
885   // Schedule as far as possible in the direction of no choice. This is most
886   // efficient, but also provides the best heuristics for CriticalPSets.
887   if (SUnit *SU = Bot.pickOnlyChoice()) {
888     LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
889     IsTopNode = false;
890     return SU;
891   }
892   if (SUnit *SU = Top.pickOnlyChoice()) {
893     LLVM_DEBUG(dbgs() << "Picked only Top\n");
894     IsTopNode = true;
895     return SU;
896   }
897   SchedCandidate BotCand;
898   // Prefer bottom scheduling when heuristics are silent.
899   CandResult BotResult =
900       pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
901   assert(BotResult != NoCand && "failed to find the first candidate");
902 
903   // If either Q has a single candidate that provides the least increase in
904   // Excess pressure, we can immediately schedule from that Q.
905   //
906   // RegionCriticalPSets summarizes the pressure within the scheduled region and
907   // affects picking from either Q. If scheduling in one direction must
908   // increase pressure for one of the excess PSets, then schedule in that
909   // direction first to provide more freedom in the other direction.
910   if (BotResult == SingleExcess || BotResult == SingleCritical) {
911     LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
912     IsTopNode = false;
913     return BotCand.SU;
914   }
915   // Check if the top Q has a better candidate.
916   SchedCandidate TopCand;
917   CandResult TopResult =
918       pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
919   assert(TopResult != NoCand && "failed to find the first candidate");
920 
921   if (TopResult == SingleExcess || TopResult == SingleCritical) {
922     LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
923     IsTopNode = true;
924     return TopCand.SU;
925   }
926   // If either Q has a single candidate that minimizes pressure above the
927   // original region's pressure pick it.
928   if (BotResult == SingleMax) {
929     LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
930     IsTopNode = false;
931     return BotCand.SU;
932   }
933   if (TopResult == SingleMax) {
934     LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
935     IsTopNode = true;
936     return TopCand.SU;
937   }
938   if (TopCand.SCost > BotCand.SCost) {
939     LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
940     IsTopNode = true;
941     return TopCand.SU;
942   }
943   // Otherwise prefer the bottom candidate in node order.
944   LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
945   IsTopNode = false;
946   return BotCand.SU;
947 }
948 
949 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
950 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
951   if (DAG->top() == DAG->bottom()) {
952     assert(Top.Available.empty() && Top.Pending.empty() &&
953            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
954     return nullptr;
955   }
956   SUnit *SU;
957   if (ForceTopDown) {
958     SU = Top.pickOnlyChoice();
959     if (!SU) {
960       SchedCandidate TopCand;
961       CandResult TopResult =
962           pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
963       assert(TopResult != NoCand && "failed to find the first candidate");
964       (void)TopResult;
965       SU = TopCand.SU;
966     }
967     IsTopNode = true;
968   } else if (ForceBottomUp) {
969     SU = Bot.pickOnlyChoice();
970     if (!SU) {
971       SchedCandidate BotCand;
972       CandResult BotResult =
973           pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
974       assert(BotResult != NoCand && "failed to find the first candidate");
975       (void)BotResult;
976       SU = BotCand.SU;
977     }
978     IsTopNode = false;
979   } else {
980     SU = pickNodeBidrectional(IsTopNode);
981   }
982   if (SU->isTopReady())
983     Top.removeReady(SU);
984   if (SU->isBottomReady())
985     Bot.removeReady(SU);
986 
987   LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
988                     << " Scheduling instruction in cycle "
989                     << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
990                     << reportPackets() << ")\n";
991              DAG->dumpNode(*SU));
992   return SU;
993 }
994 
995 /// Update the scheduler's state after scheduling a node. This is the same node
996 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
997 /// to update it's state based on the current cycle before MachineSchedStrategy
998 /// does.
999 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1000   if (IsTopNode) {
1001     Top.bumpNode(SU);
1002     SU->TopReadyCycle = Top.CurrCycle;
1003   } else {
1004     Bot.bumpNode(SU);
1005     SU->BotReadyCycle = Bot.CurrCycle;
1006   }
1007 }
1008