xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (revision 62ff619dcc3540659a319be71c9a489f1659e14a)
1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
12 //===----------------------------------------------------------------------===//
13 
14 #include "GCNSchedStrategy.h"
15 #include "SIMachineFunctionInfo.h"
16 
17 #define DEBUG_TYPE "machine-scheduler"
18 
19 using namespace llvm;
20 
21 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
22     const MachineSchedContext *C) :
23     GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
24     HasExcessPressure(false), MF(nullptr) { }
25 
26 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
27   GenericScheduler::initialize(DAG);
28 
29   MF = &DAG->MF;
30 
31   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
32 
33   // FIXME: This is also necessary, because some passes that run after
34   // scheduling and before regalloc increase register pressure.
35   const unsigned ErrorMargin = 3;
36 
37   SGPRExcessLimit =
38       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
39   VGPRExcessLimit =
40       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
41 
42   SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
43   // Set the initial TargetOccupnacy to the maximum occupancy that we can
44   // achieve for this function. This effectively sets a lower bound on the
45   // 'Critical' register limits in the scheduler.
46   TargetOccupancy = MFI.getOccupancy();
47   SGPRCriticalLimit =
48       std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
49   VGPRCriticalLimit =
50       std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
51 
52   // Subtract error margin from register limits and avoid overflow.
53   SGPRCriticalLimit =
54       std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit);
55   VGPRCriticalLimit =
56       std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit);
57   SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit);
58   VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
59 }
60 
61 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
62                                      bool AtTop, const RegPressureTracker &RPTracker,
63                                      const SIRegisterInfo *SRI,
64                                      unsigned SGPRPressure,
65                                      unsigned VGPRPressure) {
66 
67   Cand.SU = SU;
68   Cand.AtTop = AtTop;
69 
70   // getDownwardPressure() and getUpwardPressure() make temporary changes to
71   // the tracker, so we need to pass those function a non-const copy.
72   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
73 
74   Pressure.clear();
75   MaxPressure.clear();
76 
77   if (AtTop)
78     TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
79   else {
80     // FIXME: I think for bottom up scheduling, the register pressure is cached
81     // and can be retrieved by DAG->getPressureDif(SU).
82     TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
83   }
84 
85   unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
86   unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
87 
88   // If two instructions increase the pressure of different register sets
89   // by the same amount, the generic scheduler will prefer to schedule the
90   // instruction that increases the set with the least amount of registers,
91   // which in our case would be SGPRs.  This is rarely what we want, so
92   // when we report excess/critical register pressure, we do it either
93   // only for VGPRs or only for SGPRs.
94 
95   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
96   const unsigned MaxVGPRPressureInc = 16;
97   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
98   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
99 
100 
101   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
102   // to increase the likelihood we don't go over the limits.  We should improve
103   // the analysis to look through dependencies to find the path with the least
104   // register pressure.
105 
106   // We only need to update the RPDelta for instructions that increase register
107   // pressure. Instructions that decrease or keep reg pressure the same will be
108   // marked as RegExcess in tryCandidate() when they are compared with
109   // instructions that increase the register pressure.
110   if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
111     HasExcessPressure = true;
112     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
113     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
114   }
115 
116   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
117     HasExcessPressure = true;
118     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
119     Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
120   }
121 
122   // Register pressure is considered 'CRITICAL' if it is approaching a value
123   // that would reduce the wave occupancy for the execution unit.  When
124   // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
125   // has the same cost, so we don't need to prefer one over the other.
126 
127   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
128   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
129 
130   if (SGPRDelta >= 0 || VGPRDelta >= 0) {
131     HasExcessPressure = true;
132     if (SGPRDelta > VGPRDelta) {
133       Cand.RPDelta.CriticalMax =
134         PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
135       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
136     } else {
137       Cand.RPDelta.CriticalMax =
138         PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
139       Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
140     }
141   }
142 }
143 
144 // This function is mostly cut and pasted from
145 // GenericScheduler::pickNodeFromQueue()
146 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
147                                          const CandPolicy &ZonePolicy,
148                                          const RegPressureTracker &RPTracker,
149                                          SchedCandidate &Cand) {
150   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
151   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
152   unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
153   unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
154   ReadyQueue &Q = Zone.Available;
155   for (SUnit *SU : Q) {
156 
157     SchedCandidate TryCand(ZonePolicy);
158     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
159                   SGPRPressure, VGPRPressure);
160     // Pass SchedBoundary only when comparing nodes from the same boundary.
161     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
162     GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
163     if (TryCand.Reason != NoCand) {
164       // Initialize resource delta if needed in case future heuristics query it.
165       if (TryCand.ResDelta == SchedResourceDelta())
166         TryCand.initResourceDelta(Zone.DAG, SchedModel);
167       Cand.setBest(TryCand);
168       LLVM_DEBUG(traceCandidate(Cand));
169     }
170   }
171 }
172 
173 // This function is mostly cut and pasted from
174 // GenericScheduler::pickNodeBidirectional()
175 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
176   // Schedule as far as possible in the direction of no choice. This is most
177   // efficient, but also provides the best heuristics for CriticalPSets.
178   if (SUnit *SU = Bot.pickOnlyChoice()) {
179     IsTopNode = false;
180     return SU;
181   }
182   if (SUnit *SU = Top.pickOnlyChoice()) {
183     IsTopNode = true;
184     return SU;
185   }
186   // Set the bottom-up policy based on the state of the current bottom zone and
187   // the instructions outside the zone, including the top zone.
188   CandPolicy BotPolicy;
189   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
190   // Set the top-down policy based on the state of the current top zone and
191   // the instructions outside the zone, including the bottom zone.
192   CandPolicy TopPolicy;
193   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
194 
195   // See if BotCand is still valid (because we previously scheduled from Top).
196   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
197   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
198       BotCand.Policy != BotPolicy) {
199     BotCand.reset(CandPolicy());
200     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
201     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
202   } else {
203     LLVM_DEBUG(traceCandidate(BotCand));
204 #ifndef NDEBUG
205     if (VerifyScheduling) {
206       SchedCandidate TCand;
207       TCand.reset(CandPolicy());
208       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
209       assert(TCand.SU == BotCand.SU &&
210              "Last pick result should correspond to re-picking right now");
211     }
212 #endif
213   }
214 
215   // Check if the top Q has a better candidate.
216   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
217   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
218       TopCand.Policy != TopPolicy) {
219     TopCand.reset(CandPolicy());
220     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
221     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
222   } else {
223     LLVM_DEBUG(traceCandidate(TopCand));
224 #ifndef NDEBUG
225     if (VerifyScheduling) {
226       SchedCandidate TCand;
227       TCand.reset(CandPolicy());
228       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
229       assert(TCand.SU == TopCand.SU &&
230            "Last pick result should correspond to re-picking right now");
231     }
232 #endif
233   }
234 
235   // Pick best from BotCand and TopCand.
236   LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
237              dbgs() << "Bot Cand: "; traceCandidate(BotCand););
238   SchedCandidate Cand = BotCand;
239   TopCand.Reason = NoCand;
240   GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
241   if (TopCand.Reason != NoCand) {
242     Cand.setBest(TopCand);
243   }
244   LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
245 
246   IsTopNode = Cand.AtTop;
247   return Cand.SU;
248 }
249 
250 // This function is mostly cut and pasted from
251 // GenericScheduler::pickNode()
252 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
253   if (DAG->top() == DAG->bottom()) {
254     assert(Top.Available.empty() && Top.Pending.empty() &&
255            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
256     return nullptr;
257   }
258   SUnit *SU;
259   do {
260     if (RegionPolicy.OnlyTopDown) {
261       SU = Top.pickOnlyChoice();
262       if (!SU) {
263         CandPolicy NoPolicy;
264         TopCand.reset(NoPolicy);
265         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
266         assert(TopCand.Reason != NoCand && "failed to find a candidate");
267         SU = TopCand.SU;
268       }
269       IsTopNode = true;
270     } else if (RegionPolicy.OnlyBottomUp) {
271       SU = Bot.pickOnlyChoice();
272       if (!SU) {
273         CandPolicy NoPolicy;
274         BotCand.reset(NoPolicy);
275         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
276         assert(BotCand.Reason != NoCand && "failed to find a candidate");
277         SU = BotCand.SU;
278       }
279       IsTopNode = false;
280     } else {
281       SU = pickNodeBidirectional(IsTopNode);
282     }
283   } while (SU->isScheduled);
284 
285   if (SU->isTopReady())
286     Top.removeReady(SU);
287   if (SU->isBottomReady())
288     Bot.removeReady(SU);
289 
290   if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
291     for (SDep &Dep : SU->Preds) {
292       if (Dep.isCluster()) {
293         HasClusteredNodes = true;
294         break;
295       }
296     }
297   }
298 
299   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
300                     << *SU->getInstr());
301   return SU;
302 }
303 
304 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
305                         std::unique_ptr<MachineSchedStrategy> S) :
306   ScheduleDAGMILive(C, std::move(S)),
307   ST(MF.getSubtarget<GCNSubtarget>()),
308   MFI(*MF.getInfo<SIMachineFunctionInfo>()),
309   StartingOccupancy(MFI.getOccupancy()),
310   MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
311 
312   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
313 }
314 
315 void GCNScheduleDAGMILive::schedule() {
316   if (Stage == Collect) {
317     // Just record regions at the first pass.
318     Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
319     return;
320   }
321 
322   std::vector<MachineInstr*> Unsched;
323   Unsched.reserve(NumRegionInstrs);
324   for (auto &I : *this) {
325     Unsched.push_back(&I);
326   }
327 
328   GCNRegPressure PressureBefore;
329   if (LIS) {
330     PressureBefore = Pressure[RegionIdx];
331 
332     LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
333                GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
334                dbgs() << "Region live-in pressure:  ";
335                llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
336                dbgs() << "Region register pressure: ";
337                PressureBefore.print(dbgs()));
338   }
339 
340   GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
341   // Set HasClusteredNodes to true for late stages where we have already
342   // collected it. That way pickNode() will not scan SDep's when not needed.
343   S.HasClusteredNodes = Stage > InitialSchedule;
344   S.HasExcessPressure = false;
345   ScheduleDAGMILive::schedule();
346   Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
347   RescheduleRegions[RegionIdx] = false;
348   if (Stage == InitialSchedule && S.HasClusteredNodes)
349     RegionsWithClusters[RegionIdx] = true;
350   if (S.HasExcessPressure)
351     RegionsWithHighRP[RegionIdx] = true;
352 
353   if (!LIS)
354     return;
355 
356   // Check the results of scheduling.
357   auto PressureAfter = getRealRegPressure();
358 
359   LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
360              PressureAfter.print(dbgs()));
361 
362   if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
363       PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
364     Pressure[RegionIdx] = PressureAfter;
365     LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
366     return;
367   }
368 
369   unsigned WavesAfter =
370       std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST));
371   unsigned WavesBefore =
372       std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST));
373   LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
374                     << ", after " << WavesAfter << ".\n");
375 
376   // We may not be able to keep the current target occupancy because of the just
377   // scheduled region. We might still be able to revert scheduling if the
378   // occupancy before was higher, or if the current schedule has register
379   // pressure higher than the excess limits which could lead to more spilling.
380   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
381   // Allow memory bound functions to drop to 4 waves if not limited by an
382   // attribute.
383   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
384       WavesAfter >= MFI.getMinAllowedOccupancy()) {
385     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
386                       << MFI.getMinAllowedOccupancy() << " waves\n");
387     NewOccupancy = WavesAfter;
388   }
389 
390   if (NewOccupancy < MinOccupancy) {
391     MinOccupancy = NewOccupancy;
392     MFI.limitOccupancy(MinOccupancy);
393     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
394                       << MinOccupancy << ".\n");
395   }
396 
397   unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
398   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
399   if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
400       PressureAfter.getAGPRNum() > MaxVGPRs ||
401       PressureAfter.getSGPRNum() > MaxSGPRs) {
402     RescheduleRegions[RegionIdx] = true;
403     RegionsWithHighRP[RegionIdx] = true;
404   }
405 
406   // If this condition is true, then either the occupancy before and after
407   // scheduling is the same, or we are allowing the occupancy to drop because
408   // the function is memory bound. Even if we are OK with the current occupancy,
409   // we still need to verify that we will not introduce any extra chance of
410   // spilling.
411   if (WavesAfter >= MinOccupancy) {
412     if (Stage == UnclusteredReschedule &&
413         !PressureAfter.less(ST, PressureBefore)) {
414       LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
415     } else if (WavesAfter > MFI.getMinWavesPerEU() ||
416         PressureAfter.less(ST, PressureBefore) ||
417         !RescheduleRegions[RegionIdx]) {
418       Pressure[RegionIdx] = PressureAfter;
419       if (!RegionsWithClusters[RegionIdx] &&
420           (Stage + 1) == UnclusteredReschedule)
421         RescheduleRegions[RegionIdx] = false;
422       return;
423     } else {
424       LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
425     }
426   }
427 
428   LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
429   RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
430                                  (Stage + 1) != UnclusteredReschedule;
431   RegionEnd = RegionBegin;
432   for (MachineInstr *MI : Unsched) {
433     if (MI->isDebugInstr())
434       continue;
435 
436     if (MI->getIterator() != RegionEnd) {
437       BB->remove(MI);
438       BB->insert(RegionEnd, MI);
439       if (!MI->isDebugInstr())
440         LIS->handleMove(*MI, true);
441     }
442     // Reset read-undef flags and update them later.
443     for (auto &Op : MI->operands())
444       if (Op.isReg() && Op.isDef())
445         Op.setIsUndef(false);
446     RegisterOperands RegOpers;
447     RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
448     if (!MI->isDebugInstr()) {
449       if (ShouldTrackLaneMasks) {
450         // Adjust liveness and add missing dead+read-undef flags.
451         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
452         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
453       } else {
454         // Adjust for missing dead-def flags.
455         RegOpers.detectDeadDefs(*MI, *LIS);
456       }
457     }
458     RegionEnd = MI->getIterator();
459     ++RegionEnd;
460     LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
461   }
462   RegionBegin = Unsched.front()->getIterator();
463   Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
464 
465   placeDebugValues();
466 }
467 
468 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
469   GCNDownwardRPTracker RPTracker(*LIS);
470   RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
471   return RPTracker.moveMaxPressure();
472 }
473 
474 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
475   GCNDownwardRPTracker RPTracker(*LIS);
476 
477   // If the block has the only successor then live-ins of that successor are
478   // live-outs of the current block. We can reuse calculated live set if the
479   // successor will be sent to scheduling past current block.
480   const MachineBasicBlock *OnlySucc = nullptr;
481   if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
482     SlotIndexes *Ind = LIS->getSlotIndexes();
483     if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
484       OnlySucc = *MBB->succ_begin();
485   }
486 
487   // Scheduler sends regions from the end of the block upwards.
488   size_t CurRegion = RegionIdx;
489   for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
490     if (Regions[CurRegion].first->getParent() != MBB)
491       break;
492   --CurRegion;
493 
494   auto I = MBB->begin();
495   auto LiveInIt = MBBLiveIns.find(MBB);
496   if (LiveInIt != MBBLiveIns.end()) {
497     auto LiveIn = std::move(LiveInIt->second);
498     RPTracker.reset(*MBB->begin(), &LiveIn);
499     MBBLiveIns.erase(LiveInIt);
500   } else {
501     auto &Rgn = Regions[CurRegion];
502     I = Rgn.first;
503     auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
504     auto LRS = BBLiveInMap.lookup(NonDbgMI);
505 #ifdef EXPENSIVE_CHECKS
506     assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
507 #endif
508     RPTracker.reset(*I, &LRS);
509   }
510 
511   for ( ; ; ) {
512     I = RPTracker.getNext();
513 
514     if (Regions[CurRegion].first == I) {
515       LiveIns[CurRegion] = RPTracker.getLiveRegs();
516       RPTracker.clearMaxPressure();
517     }
518 
519     if (Regions[CurRegion].second == I) {
520       Pressure[CurRegion] = RPTracker.moveMaxPressure();
521       if (CurRegion-- == RegionIdx)
522         break;
523     }
524     RPTracker.advanceToNext();
525     RPTracker.advanceBeforeNext();
526   }
527 
528   if (OnlySucc) {
529     if (I != MBB->end()) {
530       RPTracker.advanceToNext();
531       RPTracker.advance(MBB->end());
532     }
533     RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
534     RPTracker.advanceBeforeNext();
535     MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
536   }
537 }
538 
539 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
540 GCNScheduleDAGMILive::getBBLiveInMap() const {
541   assert(!Regions.empty());
542   std::vector<MachineInstr *> BBStarters;
543   BBStarters.reserve(Regions.size());
544   auto I = Regions.rbegin(), E = Regions.rend();
545   auto *BB = I->first->getParent();
546   do {
547     auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
548     BBStarters.push_back(MI);
549     do {
550       ++I;
551     } while (I != E && I->first->getParent() == BB);
552   } while (I != E);
553   return getLiveRegMap(BBStarters, false /*After*/, *LIS);
554 }
555 
556 void GCNScheduleDAGMILive::finalizeSchedule() {
557   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
558 
559   LiveIns.resize(Regions.size());
560   Pressure.resize(Regions.size());
561   RescheduleRegions.resize(Regions.size());
562   RegionsWithClusters.resize(Regions.size());
563   RegionsWithHighRP.resize(Regions.size());
564   RescheduleRegions.set();
565   RegionsWithClusters.reset();
566   RegionsWithHighRP.reset();
567 
568   if (!Regions.empty())
569     BBLiveInMap = getBBLiveInMap();
570 
571   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
572 
573   do {
574     Stage++;
575     RegionIdx = 0;
576     MachineBasicBlock *MBB = nullptr;
577 
578     if (Stage > InitialSchedule) {
579       if (!LIS)
580         break;
581 
582       // Retry function scheduling if we found resulting occupancy and it is
583       // lower than used for first pass scheduling. This will give more freedom
584       // to schedule low register pressure blocks.
585       // Code is partially copied from MachineSchedulerBase::scheduleRegions().
586 
587       if (Stage == UnclusteredReschedule) {
588         if (RescheduleRegions.none())
589           continue;
590         LLVM_DEBUG(dbgs() <<
591           "Retrying function scheduling without clustering.\n");
592       }
593 
594       if (Stage == ClusteredLowOccupancyReschedule) {
595         if (StartingOccupancy <= MinOccupancy)
596           break;
597 
598         LLVM_DEBUG(
599             dbgs()
600             << "Retrying function scheduling with lowest recorded occupancy "
601             << MinOccupancy << ".\n");
602       }
603     }
604 
605     if (Stage == UnclusteredReschedule)
606       SavedMutations.swap(Mutations);
607 
608     for (auto Region : Regions) {
609       if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) ||
610           (Stage == ClusteredLowOccupancyReschedule &&
611            !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
612 
613         ++RegionIdx;
614         continue;
615       }
616 
617       RegionBegin = Region.first;
618       RegionEnd = Region.second;
619 
620       if (RegionBegin->getParent() != MBB) {
621         if (MBB) finishBlock();
622         MBB = RegionBegin->getParent();
623         startBlock(MBB);
624         if (Stage == InitialSchedule)
625           computeBlockPressure(MBB);
626       }
627 
628       unsigned NumRegionInstrs = std::distance(begin(), end());
629       enterRegion(MBB, begin(), end(), NumRegionInstrs);
630 
631       // Skip empty scheduling regions (0 or 1 schedulable instructions).
632       if (begin() == end() || begin() == std::prev(end())) {
633         exitRegion();
634         continue;
635       }
636 
637       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
638       LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
639                         << MBB->getName() << "\n  From: " << *begin()
640                         << "    To: ";
641                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
642                  else dbgs() << "End";
643                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
644 
645       schedule();
646 
647       exitRegion();
648       ++RegionIdx;
649     }
650     finishBlock();
651 
652     if (Stage == UnclusteredReschedule)
653       SavedMutations.swap(Mutations);
654   } while (Stage != LastStage);
655 }
656