xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave
110b57cec5SDimitry Andric /// occupancy on GCN hardware.
12972a253aSDimitry Andric ///
13972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function.
14972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15972a253aSDimitry Andric /// entry point for the scheduling of those regions is
16972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages.
17972a253aSDimitry Andric 
18972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account
19972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy.  Usually, only a
20972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit
21972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22972a253aSDimitry Andric /// other regions.
23972a253aSDimitry Andric ///
240b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #include "GCNSchedStrategy.h"
27*bdd1243dSDimitry Andric #include "AMDGPUIGroupLP.h"
280b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
2981ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric 
35*bdd1243dSDimitry Andric static cl::opt<bool>
36*bdd1243dSDimitry Andric     DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule",
37*bdd1243dSDimitry Andric                            cl::Hidden,
38*bdd1243dSDimitry Andric                            cl::desc("Disable unclustred high register pressure "
39*bdd1243dSDimitry Andric                                     "reduction scheduling stage."),
40*bdd1243dSDimitry Andric                            cl::init(false));
41*bdd1243dSDimitry Andric static cl::opt<unsigned> ScheduleMetricBias(
42*bdd1243dSDimitry Andric     "amdgpu-schedule-metric-bias", cl::Hidden,
43*bdd1243dSDimitry Andric     cl::desc(
44*bdd1243dSDimitry Andric         "Sets the bias which adds weight to occupancy vs latency. Set it to "
45*bdd1243dSDimitry Andric         "100 to chase the occupancy only."),
46*bdd1243dSDimitry Andric     cl::init(10));
470b57cec5SDimitry Andric 
48*bdd1243dSDimitry Andric const unsigned ScheduleMetrics::ScaleFactor = 100;
49*bdd1243dSDimitry Andric 
50*bdd1243dSDimitry Andric GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
51*bdd1243dSDimitry Andric     : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
52*bdd1243dSDimitry Andric       HasHighPressure(false) {}
53*bdd1243dSDimitry Andric 
54*bdd1243dSDimitry Andric void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
550b57cec5SDimitry Andric   GenericScheduler::initialize(DAG);
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   MF = &DAG->MF;
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
600b57cec5SDimitry Andric 
61349cc55cSDimitry Andric   SGPRExcessLimit =
62349cc55cSDimitry Andric       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
63349cc55cSDimitry Andric   VGPRExcessLimit =
64349cc55cSDimitry Andric       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
650b57cec5SDimitry Andric 
66349cc55cSDimitry Andric   SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
67349cc55cSDimitry Andric   // Set the initial TargetOccupnacy to the maximum occupancy that we can
68349cc55cSDimitry Andric   // achieve for this function. This effectively sets a lower bound on the
69349cc55cSDimitry Andric   // 'Critical' register limits in the scheduler.
70349cc55cSDimitry Andric   TargetOccupancy = MFI.getOccupancy();
71349cc55cSDimitry Andric   SGPRCriticalLimit =
72349cc55cSDimitry Andric       std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
73*bdd1243dSDimitry Andric 
74*bdd1243dSDimitry Andric   if (!KnownExcessRP) {
75349cc55cSDimitry Andric     VGPRCriticalLimit =
76349cc55cSDimitry Andric         std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
77*bdd1243dSDimitry Andric   } else {
78*bdd1243dSDimitry Andric     // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
79*bdd1243dSDimitry Andric     // returns a reasonably small number for targets with lots of VGPRs, such
80*bdd1243dSDimitry Andric     // as GFX10 and GFX11.
81*bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
82*bdd1243dSDimitry Andric                          "VGPRCriticalLimit calculation method.\n");
83349cc55cSDimitry Andric 
84*bdd1243dSDimitry Andric     unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
85*bdd1243dSDimitry Andric     unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
86*bdd1243dSDimitry Andric     unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
87*bdd1243dSDimitry Andric     VGPRBudget = std::max(VGPRBudget, Granule);
88*bdd1243dSDimitry Andric     VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
890b57cec5SDimitry Andric   }
900b57cec5SDimitry Andric 
91*bdd1243dSDimitry Andric   // Subtract error margin and bias from register limits and avoid overflow.
92*bdd1243dSDimitry Andric   SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);
93*bdd1243dSDimitry Andric   VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);
94*bdd1243dSDimitry Andric   SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);
95*bdd1243dSDimitry Andric   VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);
96*bdd1243dSDimitry Andric 
97*bdd1243dSDimitry Andric   LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
98*bdd1243dSDimitry Andric                     << ", VGPRExcessLimit = " << VGPRExcessLimit
99*bdd1243dSDimitry Andric                     << ", SGPRCriticalLimit = " << SGPRCriticalLimit
100*bdd1243dSDimitry Andric                     << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
101*bdd1243dSDimitry Andric }
102*bdd1243dSDimitry Andric 
103*bdd1243dSDimitry Andric void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
104*bdd1243dSDimitry Andric                                      bool AtTop,
105*bdd1243dSDimitry Andric                                      const RegPressureTracker &RPTracker,
1060b57cec5SDimitry Andric                                      const SIRegisterInfo *SRI,
1070b57cec5SDimitry Andric                                      unsigned SGPRPressure,
1080b57cec5SDimitry Andric                                      unsigned VGPRPressure) {
1090b57cec5SDimitry Andric   Cand.SU = SU;
1100b57cec5SDimitry Andric   Cand.AtTop = AtTop;
1110b57cec5SDimitry Andric 
112*bdd1243dSDimitry Andric   if (!DAG->isTrackingPressure())
113*bdd1243dSDimitry Andric     return;
114*bdd1243dSDimitry Andric 
1150b57cec5SDimitry Andric   // getDownwardPressure() and getUpwardPressure() make temporary changes to
1160b57cec5SDimitry Andric   // the tracker, so we need to pass those function a non-const copy.
1170b57cec5SDimitry Andric   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1180b57cec5SDimitry Andric 
1198bcb0991SDimitry Andric   Pressure.clear();
1208bcb0991SDimitry Andric   MaxPressure.clear();
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric   if (AtTop)
1230b57cec5SDimitry Andric     TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
1240b57cec5SDimitry Andric   else {
1250b57cec5SDimitry Andric     // FIXME: I think for bottom up scheduling, the register pressure is cached
1260b57cec5SDimitry Andric     // and can be retrieved by DAG->getPressureDif(SU).
1270b57cec5SDimitry Andric     TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
1280b57cec5SDimitry Andric   }
1290b57cec5SDimitry Andric 
1305ffd83dbSDimitry Andric   unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
1315ffd83dbSDimitry Andric   unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric   // If two instructions increase the pressure of different register sets
1340b57cec5SDimitry Andric   // by the same amount, the generic scheduler will prefer to schedule the
1350b57cec5SDimitry Andric   // instruction that increases the set with the least amount of registers,
1360b57cec5SDimitry Andric   // which in our case would be SGPRs.  This is rarely what we want, so
1370b57cec5SDimitry Andric   // when we report excess/critical register pressure, we do it either
1380b57cec5SDimitry Andric   // only for VGPRs or only for SGPRs.
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
1410b57cec5SDimitry Andric   const unsigned MaxVGPRPressureInc = 16;
1420b57cec5SDimitry Andric   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
1430b57cec5SDimitry Andric   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
1470b57cec5SDimitry Andric   // to increase the likelihood we don't go over the limits.  We should improve
1480b57cec5SDimitry Andric   // the analysis to look through dependencies to find the path with the least
1490b57cec5SDimitry Andric   // register pressure.
1500b57cec5SDimitry Andric 
1518bcb0991SDimitry Andric   // We only need to update the RPDelta for instructions that increase register
1528bcb0991SDimitry Andric   // pressure. Instructions that decrease or keep reg pressure the same will be
1538bcb0991SDimitry Andric   // marked as RegExcess in tryCandidate() when they are compared with
1548bcb0991SDimitry Andric   // instructions that increase the register pressure.
1550b57cec5SDimitry Andric   if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
156*bdd1243dSDimitry Andric     HasHighPressure = true;
1575ffd83dbSDimitry Andric     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
1580b57cec5SDimitry Andric     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
1590b57cec5SDimitry Andric   }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
162*bdd1243dSDimitry Andric     HasHighPressure = true;
1635ffd83dbSDimitry Andric     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
1640b57cec5SDimitry Andric     Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
1650b57cec5SDimitry Andric   }
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric   // Register pressure is considered 'CRITICAL' if it is approaching a value
1680b57cec5SDimitry Andric   // that would reduce the wave occupancy for the execution unit.  When
169349cc55cSDimitry Andric   // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
1700b57cec5SDimitry Andric   // has the same cost, so we don't need to prefer one over the other.
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
1730b57cec5SDimitry Andric   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   if (SGPRDelta >= 0 || VGPRDelta >= 0) {
176*bdd1243dSDimitry Andric     HasHighPressure = true;
1770b57cec5SDimitry Andric     if (SGPRDelta > VGPRDelta) {
1785ffd83dbSDimitry Andric       Cand.RPDelta.CriticalMax =
1795ffd83dbSDimitry Andric         PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
1800b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
1810b57cec5SDimitry Andric     } else {
1825ffd83dbSDimitry Andric       Cand.RPDelta.CriticalMax =
1835ffd83dbSDimitry Andric         PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
1840b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
1850b57cec5SDimitry Andric     }
1860b57cec5SDimitry Andric   }
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric // This function is mostly cut and pasted from
1900b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue()
191*bdd1243dSDimitry Andric void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
1920b57cec5SDimitry Andric                                          const CandPolicy &ZonePolicy,
1930b57cec5SDimitry Andric                                          const RegPressureTracker &RPTracker,
1940b57cec5SDimitry Andric                                          SchedCandidate &Cand) {
1950b57cec5SDimitry Andric   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
1960b57cec5SDimitry Andric   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
197*bdd1243dSDimitry Andric   unsigned SGPRPressure = 0;
198*bdd1243dSDimitry Andric   unsigned VGPRPressure = 0;
199*bdd1243dSDimitry Andric   if (DAG->isTrackingPressure()) {
200*bdd1243dSDimitry Andric     SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
201*bdd1243dSDimitry Andric     VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
202*bdd1243dSDimitry Andric   }
2030b57cec5SDimitry Andric   ReadyQueue &Q = Zone.Available;
2040b57cec5SDimitry Andric   for (SUnit *SU : Q) {
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric     SchedCandidate TryCand(ZonePolicy);
2070b57cec5SDimitry Andric     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
2080b57cec5SDimitry Andric                   SGPRPressure, VGPRPressure);
2090b57cec5SDimitry Andric     // Pass SchedBoundary only when comparing nodes from the same boundary.
2100b57cec5SDimitry Andric     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
211*bdd1243dSDimitry Andric     tryCandidate(Cand, TryCand, ZoneArg);
2120b57cec5SDimitry Andric     if (TryCand.Reason != NoCand) {
2130b57cec5SDimitry Andric       // Initialize resource delta if needed in case future heuristics query it.
2140b57cec5SDimitry Andric       if (TryCand.ResDelta == SchedResourceDelta())
2150b57cec5SDimitry Andric         TryCand.initResourceDelta(Zone.DAG, SchedModel);
2160b57cec5SDimitry Andric       Cand.setBest(TryCand);
2178bcb0991SDimitry Andric       LLVM_DEBUG(traceCandidate(Cand));
2180b57cec5SDimitry Andric     }
2190b57cec5SDimitry Andric   }
2200b57cec5SDimitry Andric }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric // This function is mostly cut and pasted from
2230b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional()
224*bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
2250b57cec5SDimitry Andric   // Schedule as far as possible in the direction of no choice. This is most
2260b57cec5SDimitry Andric   // efficient, but also provides the best heuristics for CriticalPSets.
2270b57cec5SDimitry Andric   if (SUnit *SU = Bot.pickOnlyChoice()) {
2280b57cec5SDimitry Andric     IsTopNode = false;
2290b57cec5SDimitry Andric     return SU;
2300b57cec5SDimitry Andric   }
2310b57cec5SDimitry Andric   if (SUnit *SU = Top.pickOnlyChoice()) {
2320b57cec5SDimitry Andric     IsTopNode = true;
2330b57cec5SDimitry Andric     return SU;
2340b57cec5SDimitry Andric   }
2350b57cec5SDimitry Andric   // Set the bottom-up policy based on the state of the current bottom zone and
2360b57cec5SDimitry Andric   // the instructions outside the zone, including the top zone.
2370b57cec5SDimitry Andric   CandPolicy BotPolicy;
2380b57cec5SDimitry Andric   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
2390b57cec5SDimitry Andric   // Set the top-down policy based on the state of the current top zone and
2400b57cec5SDimitry Andric   // the instructions outside the zone, including the bottom zone.
2410b57cec5SDimitry Andric   CandPolicy TopPolicy;
2420b57cec5SDimitry Andric   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   // See if BotCand is still valid (because we previously scheduled from Top).
2450b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
2460b57cec5SDimitry Andric   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
2470b57cec5SDimitry Andric       BotCand.Policy != BotPolicy) {
2480b57cec5SDimitry Andric     BotCand.reset(CandPolicy());
2490b57cec5SDimitry Andric     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
2500b57cec5SDimitry Andric     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2510b57cec5SDimitry Andric   } else {
2520b57cec5SDimitry Andric     LLVM_DEBUG(traceCandidate(BotCand));
2538bcb0991SDimitry Andric #ifndef NDEBUG
2548bcb0991SDimitry Andric     if (VerifyScheduling) {
2558bcb0991SDimitry Andric       SchedCandidate TCand;
2568bcb0991SDimitry Andric       TCand.reset(CandPolicy());
2578bcb0991SDimitry Andric       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
2588bcb0991SDimitry Andric       assert(TCand.SU == BotCand.SU &&
2598bcb0991SDimitry Andric              "Last pick result should correspond to re-picking right now");
2608bcb0991SDimitry Andric     }
2618bcb0991SDimitry Andric #endif
2620b57cec5SDimitry Andric   }
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric   // Check if the top Q has a better candidate.
2650b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
2660b57cec5SDimitry Andric   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
2670b57cec5SDimitry Andric       TopCand.Policy != TopPolicy) {
2680b57cec5SDimitry Andric     TopCand.reset(CandPolicy());
2690b57cec5SDimitry Andric     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
2700b57cec5SDimitry Andric     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2710b57cec5SDimitry Andric   } else {
2720b57cec5SDimitry Andric     LLVM_DEBUG(traceCandidate(TopCand));
2738bcb0991SDimitry Andric #ifndef NDEBUG
2748bcb0991SDimitry Andric     if (VerifyScheduling) {
2758bcb0991SDimitry Andric       SchedCandidate TCand;
2768bcb0991SDimitry Andric       TCand.reset(CandPolicy());
2778bcb0991SDimitry Andric       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
2788bcb0991SDimitry Andric       assert(TCand.SU == TopCand.SU &&
2798bcb0991SDimitry Andric            "Last pick result should correspond to re-picking right now");
2808bcb0991SDimitry Andric     }
2818bcb0991SDimitry Andric #endif
2820b57cec5SDimitry Andric   }
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric   // Pick best from BotCand and TopCand.
2850b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
2860b57cec5SDimitry Andric              dbgs() << "Bot Cand: "; traceCandidate(BotCand););
2875ffd83dbSDimitry Andric   SchedCandidate Cand = BotCand;
2880b57cec5SDimitry Andric   TopCand.Reason = NoCand;
289*bdd1243dSDimitry Andric   tryCandidate(Cand, TopCand, nullptr);
2900b57cec5SDimitry Andric   if (TopCand.Reason != NoCand) {
2910b57cec5SDimitry Andric     Cand.setBest(TopCand);
2920b57cec5SDimitry Andric   }
2930b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric   IsTopNode = Cand.AtTop;
2960b57cec5SDimitry Andric   return Cand.SU;
2970b57cec5SDimitry Andric }
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric // This function is mostly cut and pasted from
3000b57cec5SDimitry Andric // GenericScheduler::pickNode()
301*bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
3020b57cec5SDimitry Andric   if (DAG->top() == DAG->bottom()) {
3030b57cec5SDimitry Andric     assert(Top.Available.empty() && Top.Pending.empty() &&
3040b57cec5SDimitry Andric            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3050b57cec5SDimitry Andric     return nullptr;
3060b57cec5SDimitry Andric   }
3070b57cec5SDimitry Andric   SUnit *SU;
3080b57cec5SDimitry Andric   do {
3090b57cec5SDimitry Andric     if (RegionPolicy.OnlyTopDown) {
3100b57cec5SDimitry Andric       SU = Top.pickOnlyChoice();
3110b57cec5SDimitry Andric       if (!SU) {
3120b57cec5SDimitry Andric         CandPolicy NoPolicy;
3130b57cec5SDimitry Andric         TopCand.reset(NoPolicy);
3140b57cec5SDimitry Andric         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3150b57cec5SDimitry Andric         assert(TopCand.Reason != NoCand && "failed to find a candidate");
3160b57cec5SDimitry Andric         SU = TopCand.SU;
3170b57cec5SDimitry Andric       }
3180b57cec5SDimitry Andric       IsTopNode = true;
3190b57cec5SDimitry Andric     } else if (RegionPolicy.OnlyBottomUp) {
3200b57cec5SDimitry Andric       SU = Bot.pickOnlyChoice();
3210b57cec5SDimitry Andric       if (!SU) {
3220b57cec5SDimitry Andric         CandPolicy NoPolicy;
3230b57cec5SDimitry Andric         BotCand.reset(NoPolicy);
3240b57cec5SDimitry Andric         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3250b57cec5SDimitry Andric         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3260b57cec5SDimitry Andric         SU = BotCand.SU;
3270b57cec5SDimitry Andric       }
3280b57cec5SDimitry Andric       IsTopNode = false;
3290b57cec5SDimitry Andric     } else {
3300b57cec5SDimitry Andric       SU = pickNodeBidirectional(IsTopNode);
3310b57cec5SDimitry Andric     }
3320b57cec5SDimitry Andric   } while (SU->isScheduled);
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   if (SU->isTopReady())
3350b57cec5SDimitry Andric     Top.removeReady(SU);
3360b57cec5SDimitry Andric   if (SU->isBottomReady())
3370b57cec5SDimitry Andric     Bot.removeReady(SU);
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3400b57cec5SDimitry Andric                     << *SU->getInstr());
3410b57cec5SDimitry Andric   return SU;
3420b57cec5SDimitry Andric }
3430b57cec5SDimitry Andric 
344*bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
345*bdd1243dSDimitry Andric   assert(CurrentStage && CurrentStage != SchedStages.end());
346*bdd1243dSDimitry Andric   return *CurrentStage;
347*bdd1243dSDimitry Andric }
348*bdd1243dSDimitry Andric 
349*bdd1243dSDimitry Andric bool GCNSchedStrategy::advanceStage() {
350*bdd1243dSDimitry Andric   assert(CurrentStage != SchedStages.end());
351*bdd1243dSDimitry Andric   if (!CurrentStage)
352*bdd1243dSDimitry Andric     CurrentStage = SchedStages.begin();
353*bdd1243dSDimitry Andric   else
354*bdd1243dSDimitry Andric     CurrentStage++;
355*bdd1243dSDimitry Andric 
356*bdd1243dSDimitry Andric   return CurrentStage != SchedStages.end();
357*bdd1243dSDimitry Andric }
358*bdd1243dSDimitry Andric 
359*bdd1243dSDimitry Andric bool GCNSchedStrategy::hasNextStage() const {
360*bdd1243dSDimitry Andric   assert(CurrentStage);
361*bdd1243dSDimitry Andric   return std::next(CurrentStage) != SchedStages.end();
362*bdd1243dSDimitry Andric }
363*bdd1243dSDimitry Andric 
364*bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getNextStage() const {
365*bdd1243dSDimitry Andric   assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
366*bdd1243dSDimitry Andric   return *std::next(CurrentStage);
367*bdd1243dSDimitry Andric }
368*bdd1243dSDimitry Andric 
369*bdd1243dSDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
370*bdd1243dSDimitry Andric     const MachineSchedContext *C)
371*bdd1243dSDimitry Andric     : GCNSchedStrategy(C) {
372*bdd1243dSDimitry Andric   SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
373*bdd1243dSDimitry Andric   SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
374*bdd1243dSDimitry Andric   SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
375*bdd1243dSDimitry Andric   SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
376*bdd1243dSDimitry Andric }
377*bdd1243dSDimitry Andric 
378*bdd1243dSDimitry Andric GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
379*bdd1243dSDimitry Andric     : GCNSchedStrategy(C) {
380*bdd1243dSDimitry Andric   SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);
381*bdd1243dSDimitry Andric }
382*bdd1243dSDimitry Andric 
383*bdd1243dSDimitry Andric bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
384*bdd1243dSDimitry Andric                                           SchedCandidate &TryCand,
385*bdd1243dSDimitry Andric                                           SchedBoundary *Zone) const {
386*bdd1243dSDimitry Andric   // Initialize the candidate if needed.
387*bdd1243dSDimitry Andric   if (!Cand.isValid()) {
388*bdd1243dSDimitry Andric     TryCand.Reason = NodeOrder;
389*bdd1243dSDimitry Andric     return true;
390*bdd1243dSDimitry Andric   }
391*bdd1243dSDimitry Andric 
392*bdd1243dSDimitry Andric   // Avoid spilling by exceeding the register limit.
393*bdd1243dSDimitry Andric   if (DAG->isTrackingPressure() &&
394*bdd1243dSDimitry Andric       tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
395*bdd1243dSDimitry Andric                   RegExcess, TRI, DAG->MF))
396*bdd1243dSDimitry Andric     return TryCand.Reason != NoCand;
397*bdd1243dSDimitry Andric 
398*bdd1243dSDimitry Andric   // Bias PhysReg Defs and copies to their uses and defined respectively.
399*bdd1243dSDimitry Andric   if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
400*bdd1243dSDimitry Andric                  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
401*bdd1243dSDimitry Andric     return TryCand.Reason != NoCand;
402*bdd1243dSDimitry Andric 
403*bdd1243dSDimitry Andric   bool SameBoundary = Zone != nullptr;
404*bdd1243dSDimitry Andric   if (SameBoundary) {
405*bdd1243dSDimitry Andric     // Prioritize instructions that read unbuffered resources by stall cycles.
406*bdd1243dSDimitry Andric     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
407*bdd1243dSDimitry Andric                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
408*bdd1243dSDimitry Andric       return TryCand.Reason != NoCand;
409*bdd1243dSDimitry Andric 
410*bdd1243dSDimitry Andric     // Avoid critical resource consumption and balance the schedule.
411*bdd1243dSDimitry Andric     TryCand.initResourceDelta(DAG, SchedModel);
412*bdd1243dSDimitry Andric     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
413*bdd1243dSDimitry Andric                 TryCand, Cand, ResourceReduce))
414*bdd1243dSDimitry Andric       return TryCand.Reason != NoCand;
415*bdd1243dSDimitry Andric     if (tryGreater(TryCand.ResDelta.DemandedResources,
416*bdd1243dSDimitry Andric                    Cand.ResDelta.DemandedResources, TryCand, Cand,
417*bdd1243dSDimitry Andric                    ResourceDemand))
418*bdd1243dSDimitry Andric       return TryCand.Reason != NoCand;
419*bdd1243dSDimitry Andric 
420*bdd1243dSDimitry Andric     // Unconditionally try to reduce latency.
421*bdd1243dSDimitry Andric     if (tryLatency(TryCand, Cand, *Zone))
422*bdd1243dSDimitry Andric       return TryCand.Reason != NoCand;
423*bdd1243dSDimitry Andric 
424*bdd1243dSDimitry Andric     // Weak edges are for clustering and other constraints.
425*bdd1243dSDimitry Andric     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
426*bdd1243dSDimitry Andric                 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
427*bdd1243dSDimitry Andric       return TryCand.Reason != NoCand;
428*bdd1243dSDimitry Andric   }
429*bdd1243dSDimitry Andric 
430*bdd1243dSDimitry Andric   // Keep clustered nodes together to encourage downstream peephole
431*bdd1243dSDimitry Andric   // optimizations which may reduce resource requirements.
432*bdd1243dSDimitry Andric   //
433*bdd1243dSDimitry Andric   // This is a best effort to set things up for a post-RA pass. Optimizations
434*bdd1243dSDimitry Andric   // like generating loads of multiple registers should ideally be done within
435*bdd1243dSDimitry Andric   // the scheduler pass by combining the loads during DAG postprocessing.
436*bdd1243dSDimitry Andric   const SUnit *CandNextClusterSU =
437*bdd1243dSDimitry Andric       Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
438*bdd1243dSDimitry Andric   const SUnit *TryCandNextClusterSU =
439*bdd1243dSDimitry Andric       TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
440*bdd1243dSDimitry Andric   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
441*bdd1243dSDimitry Andric                  Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
442*bdd1243dSDimitry Andric     return TryCand.Reason != NoCand;
443*bdd1243dSDimitry Andric 
444*bdd1243dSDimitry Andric   // Avoid increasing the max critical pressure in the scheduled region.
445*bdd1243dSDimitry Andric   if (DAG->isTrackingPressure() &&
446*bdd1243dSDimitry Andric       tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
447*bdd1243dSDimitry Andric                   TryCand, Cand, RegCritical, TRI, DAG->MF))
448*bdd1243dSDimitry Andric     return TryCand.Reason != NoCand;
449*bdd1243dSDimitry Andric 
450*bdd1243dSDimitry Andric   // Avoid increasing the max pressure of the entire region.
451*bdd1243dSDimitry Andric   if (DAG->isTrackingPressure() &&
452*bdd1243dSDimitry Andric       tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
453*bdd1243dSDimitry Andric                   Cand, RegMax, TRI, DAG->MF))
454*bdd1243dSDimitry Andric     return TryCand.Reason != NoCand;
455*bdd1243dSDimitry Andric 
456*bdd1243dSDimitry Andric   if (SameBoundary) {
457*bdd1243dSDimitry Andric     // Fall through to original instruction order.
458*bdd1243dSDimitry Andric     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
459*bdd1243dSDimitry Andric         (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
460*bdd1243dSDimitry Andric       TryCand.Reason = NodeOrder;
461*bdd1243dSDimitry Andric       return true;
462*bdd1243dSDimitry Andric     }
463*bdd1243dSDimitry Andric   }
464*bdd1243dSDimitry Andric   return false;
465*bdd1243dSDimitry Andric }
466*bdd1243dSDimitry Andric 
467972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive(
468972a253aSDimitry Andric     MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
469972a253aSDimitry Andric     : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
4700b57cec5SDimitry Andric       MFI(*MF.getInfo<SIMachineFunctionInfo>()),
471972a253aSDimitry Andric       StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
4720b57cec5SDimitry Andric 
4730b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
4740b57cec5SDimitry Andric }
4750b57cec5SDimitry Andric 
476*bdd1243dSDimitry Andric std::unique_ptr<GCNSchedStage>
477*bdd1243dSDimitry Andric GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
478*bdd1243dSDimitry Andric   switch (SchedStageID) {
479*bdd1243dSDimitry Andric   case GCNSchedStageID::OccInitialSchedule:
480*bdd1243dSDimitry Andric     return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
481*bdd1243dSDimitry Andric   case GCNSchedStageID::UnclusteredHighRPReschedule:
482*bdd1243dSDimitry Andric     return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
483*bdd1243dSDimitry Andric   case GCNSchedStageID::ClusteredLowOccupancyReschedule:
484*bdd1243dSDimitry Andric     return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
485*bdd1243dSDimitry Andric   case GCNSchedStageID::PreRARematerialize:
486*bdd1243dSDimitry Andric     return std::make_unique<PreRARematStage>(SchedStageID, *this);
487*bdd1243dSDimitry Andric   case GCNSchedStageID::ILPInitialSchedule:
488*bdd1243dSDimitry Andric     return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
489*bdd1243dSDimitry Andric   }
490*bdd1243dSDimitry Andric 
491*bdd1243dSDimitry Andric   llvm_unreachable("Unknown SchedStageID.");
492*bdd1243dSDimitry Andric }
493*bdd1243dSDimitry Andric 
4940b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() {
495972a253aSDimitry Andric   // Collect all scheduling regions. The actual scheduling is performed in
496972a253aSDimitry Andric   // GCNScheduleDAGMILive::finalizeSchedule.
497*bdd1243dSDimitry Andric   Regions.push_back(std::pair(RegionBegin, RegionEnd));
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric 
500972a253aSDimitry Andric GCNRegPressure
501972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
5020b57cec5SDimitry Andric   GCNDownwardRPTracker RPTracker(*LIS);
5030b57cec5SDimitry Andric   RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
5040b57cec5SDimitry Andric   return RPTracker.moveMaxPressure();
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric 
507972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
508972a253aSDimitry Andric                                                 const MachineBasicBlock *MBB) {
5090b57cec5SDimitry Andric   GCNDownwardRPTracker RPTracker(*LIS);
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric   // If the block has the only successor then live-ins of that successor are
5120b57cec5SDimitry Andric   // live-outs of the current block. We can reuse calculated live set if the
5130b57cec5SDimitry Andric   // successor will be sent to scheduling past current block.
5140b57cec5SDimitry Andric   const MachineBasicBlock *OnlySucc = nullptr;
5150b57cec5SDimitry Andric   if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
5160b57cec5SDimitry Andric     SlotIndexes *Ind = LIS->getSlotIndexes();
5170b57cec5SDimitry Andric     if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
5180b57cec5SDimitry Andric       OnlySucc = *MBB->succ_begin();
5190b57cec5SDimitry Andric   }
5200b57cec5SDimitry Andric 
5210b57cec5SDimitry Andric   // Scheduler sends regions from the end of the block upwards.
5220b57cec5SDimitry Andric   size_t CurRegion = RegionIdx;
5230b57cec5SDimitry Andric   for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
5240b57cec5SDimitry Andric     if (Regions[CurRegion].first->getParent() != MBB)
5250b57cec5SDimitry Andric       break;
5260b57cec5SDimitry Andric   --CurRegion;
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   auto I = MBB->begin();
5290b57cec5SDimitry Andric   auto LiveInIt = MBBLiveIns.find(MBB);
53081ad6265SDimitry Andric   auto &Rgn = Regions[CurRegion];
53181ad6265SDimitry Andric   auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
5320b57cec5SDimitry Andric   if (LiveInIt != MBBLiveIns.end()) {
5330b57cec5SDimitry Andric     auto LiveIn = std::move(LiveInIt->second);
5340b57cec5SDimitry Andric     RPTracker.reset(*MBB->begin(), &LiveIn);
5350b57cec5SDimitry Andric     MBBLiveIns.erase(LiveInIt);
5360b57cec5SDimitry Andric   } else {
5370b57cec5SDimitry Andric     I = Rgn.first;
5380b57cec5SDimitry Andric     auto LRS = BBLiveInMap.lookup(NonDbgMI);
539fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS
5400b57cec5SDimitry Andric     assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
541fe6060f1SDimitry Andric #endif
5420b57cec5SDimitry Andric     RPTracker.reset(*I, &LRS);
5430b57cec5SDimitry Andric   }
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric   for (;;) {
5460b57cec5SDimitry Andric     I = RPTracker.getNext();
5470b57cec5SDimitry Andric 
54881ad6265SDimitry Andric     if (Regions[CurRegion].first == I || NonDbgMI == I) {
5490b57cec5SDimitry Andric       LiveIns[CurRegion] = RPTracker.getLiveRegs();
5500b57cec5SDimitry Andric       RPTracker.clearMaxPressure();
5510b57cec5SDimitry Andric     }
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric     if (Regions[CurRegion].second == I) {
5540b57cec5SDimitry Andric       Pressure[CurRegion] = RPTracker.moveMaxPressure();
5550b57cec5SDimitry Andric       if (CurRegion-- == RegionIdx)
5560b57cec5SDimitry Andric         break;
5570b57cec5SDimitry Andric     }
5580b57cec5SDimitry Andric     RPTracker.advanceToNext();
5590b57cec5SDimitry Andric     RPTracker.advanceBeforeNext();
5600b57cec5SDimitry Andric   }
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   if (OnlySucc) {
5630b57cec5SDimitry Andric     if (I != MBB->end()) {
5640b57cec5SDimitry Andric       RPTracker.advanceToNext();
5650b57cec5SDimitry Andric       RPTracker.advance(MBB->end());
5660b57cec5SDimitry Andric     }
5670b57cec5SDimitry Andric     RPTracker.advanceBeforeNext();
5680b57cec5SDimitry Andric     MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
5690b57cec5SDimitry Andric   }
5700b57cec5SDimitry Andric }
5710b57cec5SDimitry Andric 
5720b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
5730b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const {
5740b57cec5SDimitry Andric   assert(!Regions.empty());
5750b57cec5SDimitry Andric   std::vector<MachineInstr *> BBStarters;
5760b57cec5SDimitry Andric   BBStarters.reserve(Regions.size());
5770b57cec5SDimitry Andric   auto I = Regions.rbegin(), E = Regions.rend();
5780b57cec5SDimitry Andric   auto *BB = I->first->getParent();
5790b57cec5SDimitry Andric   do {
5800b57cec5SDimitry Andric     auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
5810b57cec5SDimitry Andric     BBStarters.push_back(MI);
5820b57cec5SDimitry Andric     do {
5830b57cec5SDimitry Andric       ++I;
5840b57cec5SDimitry Andric     } while (I != E && I->first->getParent() == BB);
5850b57cec5SDimitry Andric   } while (I != E);
5860b57cec5SDimitry Andric   return getLiveRegMap(BBStarters, false /*After*/, *LIS);
5870b57cec5SDimitry Andric }
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() {
590972a253aSDimitry Andric   // Start actual scheduling here. This function is called by the base
591972a253aSDimitry Andric   // MachineScheduler after all regions have been recorded by
592972a253aSDimitry Andric   // GCNScheduleDAGMILive::schedule().
5930b57cec5SDimitry Andric   LiveIns.resize(Regions.size());
5940b57cec5SDimitry Andric   Pressure.resize(Regions.size());
5955ffd83dbSDimitry Andric   RescheduleRegions.resize(Regions.size());
596fe6060f1SDimitry Andric   RegionsWithHighRP.resize(Regions.size());
597*bdd1243dSDimitry Andric   RegionsWithExcessRP.resize(Regions.size());
59881ad6265SDimitry Andric   RegionsWithMinOcc.resize(Regions.size());
599*bdd1243dSDimitry Andric   RegionsWithIGLPInstrs.resize(Regions.size());
6005ffd83dbSDimitry Andric   RescheduleRegions.set();
601fe6060f1SDimitry Andric   RegionsWithHighRP.reset();
602*bdd1243dSDimitry Andric   RegionsWithExcessRP.reset();
60381ad6265SDimitry Andric   RegionsWithMinOcc.reset();
604*bdd1243dSDimitry Andric   RegionsWithIGLPInstrs.reset();
6050b57cec5SDimitry Andric 
606972a253aSDimitry Andric   runSchedStages();
607972a253aSDimitry Andric }
608972a253aSDimitry Andric 
609972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() {
610972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
611972a253aSDimitry Andric 
6120b57cec5SDimitry Andric   if (!Regions.empty())
6130b57cec5SDimitry Andric     BBLiveInMap = getBBLiveInMap();
6140b57cec5SDimitry Andric 
615*bdd1243dSDimitry Andric   GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
616*bdd1243dSDimitry Andric   while (S.advanceStage()) {
617*bdd1243dSDimitry Andric     auto Stage = createSchedStage(S.getCurrentStage());
618972a253aSDimitry Andric     if (!Stage->initGCNSchedStage())
6195ffd83dbSDimitry Andric       continue;
620972a253aSDimitry Andric 
621972a253aSDimitry Andric     for (auto Region : Regions) {
622972a253aSDimitry Andric       RegionBegin = Region.first;
623972a253aSDimitry Andric       RegionEnd = Region.second;
624972a253aSDimitry Andric       // Setup for scheduling the region and check whether it should be skipped.
625972a253aSDimitry Andric       if (!Stage->initGCNRegion()) {
626972a253aSDimitry Andric         Stage->advanceRegion();
627972a253aSDimitry Andric         exitRegion();
628972a253aSDimitry Andric         continue;
6295ffd83dbSDimitry Andric       }
6305ffd83dbSDimitry Andric 
631972a253aSDimitry Andric       ScheduleDAGMILive::schedule();
632972a253aSDimitry Andric       Stage->finalizeGCNRegion();
633972a253aSDimitry Andric     }
634972a253aSDimitry Andric 
635972a253aSDimitry Andric     Stage->finalizeGCNSchedStage();
636972a253aSDimitry Andric   }
637972a253aSDimitry Andric }
638972a253aSDimitry Andric 
639972a253aSDimitry Andric #ifndef NDEBUG
640972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
641972a253aSDimitry Andric   switch (StageID) {
642*bdd1243dSDimitry Andric   case GCNSchedStageID::OccInitialSchedule:
643*bdd1243dSDimitry Andric     OS << "Max Occupancy Initial Schedule";
6440b57cec5SDimitry Andric     break;
645*bdd1243dSDimitry Andric   case GCNSchedStageID::UnclusteredHighRPReschedule:
646*bdd1243dSDimitry Andric     OS << "Unclustered High Register Pressure Reschedule";
647972a253aSDimitry Andric     break;
648972a253aSDimitry Andric   case GCNSchedStageID::ClusteredLowOccupancyReschedule:
649972a253aSDimitry Andric     OS << "Clustered Low Occupancy Reschedule";
650972a253aSDimitry Andric     break;
651972a253aSDimitry Andric   case GCNSchedStageID::PreRARematerialize:
652972a253aSDimitry Andric     OS << "Pre-RA Rematerialize";
653972a253aSDimitry Andric     break;
654*bdd1243dSDimitry Andric   case GCNSchedStageID::ILPInitialSchedule:
655*bdd1243dSDimitry Andric     OS << "Max ILP Initial Schedule";
656*bdd1243dSDimitry Andric     break;
657972a253aSDimitry Andric   }
658*bdd1243dSDimitry Andric 
659972a253aSDimitry Andric   return OS;
660972a253aSDimitry Andric }
661972a253aSDimitry Andric #endif
662972a253aSDimitry Andric 
663972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
664*bdd1243dSDimitry Andric     : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
665*bdd1243dSDimitry Andric       MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
666972a253aSDimitry Andric 
667972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() {
668972a253aSDimitry Andric   if (!DAG.LIS)
669972a253aSDimitry Andric     return false;
670972a253aSDimitry Andric 
671972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
672972a253aSDimitry Andric   return true;
673972a253aSDimitry Andric }
674972a253aSDimitry Andric 
675*bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNSchedStage() {
676*bdd1243dSDimitry Andric   if (DisableUnclusterHighRP)
677*bdd1243dSDimitry Andric     return false;
678*bdd1243dSDimitry Andric 
679972a253aSDimitry Andric   if (!GCNSchedStage::initGCNSchedStage())
680972a253aSDimitry Andric     return false;
681972a253aSDimitry Andric 
682*bdd1243dSDimitry Andric   if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
683972a253aSDimitry Andric     return false;
684972a253aSDimitry Andric 
685972a253aSDimitry Andric   SavedMutations.swap(DAG.Mutations);
686*bdd1243dSDimitry Andric   DAG.addMutation(createIGroupLPDAGMutation());
687972a253aSDimitry Andric 
688*bdd1243dSDimitry Andric   InitialOccupancy = DAG.MinOccupancy;
689*bdd1243dSDimitry Andric   // Aggressivly try to reduce register pressure in the unclustered high RP
690*bdd1243dSDimitry Andric   // stage. Temporarily increase occupancy target in the region.
691*bdd1243dSDimitry Andric   S.SGPRLimitBias = S.HighRPSGPRBias;
692*bdd1243dSDimitry Andric   S.VGPRLimitBias = S.HighRPVGPRBias;
693*bdd1243dSDimitry Andric   if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
694*bdd1243dSDimitry Andric     MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
695*bdd1243dSDimitry Andric 
696*bdd1243dSDimitry Andric   LLVM_DEBUG(
697*bdd1243dSDimitry Andric       dbgs()
698*bdd1243dSDimitry Andric       << "Retrying function scheduling without clustering. "
699*bdd1243dSDimitry Andric          "Aggressivly try to reduce register pressure to achieve occupancy "
700*bdd1243dSDimitry Andric       << DAG.MinOccupancy << ".\n");
701*bdd1243dSDimitry Andric 
702972a253aSDimitry Andric   return true;
703972a253aSDimitry Andric }
704972a253aSDimitry Andric 
705972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() {
706972a253aSDimitry Andric   if (!GCNSchedStage::initGCNSchedStage())
707972a253aSDimitry Andric     return false;
708972a253aSDimitry Andric 
709972a253aSDimitry Andric   // Don't bother trying to improve ILP in lower RP regions if occupancy has not
710972a253aSDimitry Andric   // been dropped. All regions will have already been scheduled with the ideal
711972a253aSDimitry Andric   // occupancy targets.
712972a253aSDimitry Andric   if (DAG.StartingOccupancy <= DAG.MinOccupancy)
713972a253aSDimitry Andric     return false;
7140b57cec5SDimitry Andric 
7150b57cec5SDimitry Andric   LLVM_DEBUG(
716972a253aSDimitry Andric       dbgs() << "Retrying function scheduling with lowest recorded occupancy "
717972a253aSDimitry Andric              << DAG.MinOccupancy << ".\n");
718972a253aSDimitry Andric   return true;
7190b57cec5SDimitry Andric }
72081ad6265SDimitry Andric 
721972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() {
722972a253aSDimitry Andric   if (!GCNSchedStage::initGCNSchedStage())
723972a253aSDimitry Andric     return false;
72481ad6265SDimitry Andric 
725972a253aSDimitry Andric   if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
726972a253aSDimitry Andric     return false;
727972a253aSDimitry Andric 
72881ad6265SDimitry Andric   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
72981ad6265SDimitry Andric   // Check maximum occupancy
73081ad6265SDimitry Andric   if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
731972a253aSDimitry Andric       DAG.MinOccupancy)
732972a253aSDimitry Andric     return false;
73381ad6265SDimitry Andric 
73481ad6265SDimitry Andric   // FIXME: This pass will invalidate cached MBBLiveIns for regions
73581ad6265SDimitry Andric   // inbetween the defs and region we sinked the def to. Cached pressure
73681ad6265SDimitry Andric   // for regions where a def is sinked from will also be invalidated. Will
73781ad6265SDimitry Andric   // need to be fixed if there is another pass after this pass.
738*bdd1243dSDimitry Andric   assert(!S.hasNextStage());
73981ad6265SDimitry Andric 
74081ad6265SDimitry Andric   collectRematerializableInstructions();
74181ad6265SDimitry Andric   if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
742972a253aSDimitry Andric     return false;
74381ad6265SDimitry Andric 
74481ad6265SDimitry Andric   LLVM_DEBUG(
74581ad6265SDimitry Andric       dbgs() << "Retrying function scheduling with improved occupancy of "
746972a253aSDimitry Andric              << DAG.MinOccupancy << " from rematerializing\n");
747972a253aSDimitry Andric   return true;
7485ffd83dbSDimitry Andric }
7495ffd83dbSDimitry Andric 
750972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() {
751972a253aSDimitry Andric   DAG.finishBlock();
752972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
753e8d8bef9SDimitry Andric }
7545ffd83dbSDimitry Andric 
755*bdd1243dSDimitry Andric void UnclusteredHighRPStage::finalizeGCNSchedStage() {
756972a253aSDimitry Andric   SavedMutations.swap(DAG.Mutations);
757*bdd1243dSDimitry Andric   S.SGPRLimitBias = S.VGPRLimitBias = 0;
758*bdd1243dSDimitry Andric   if (DAG.MinOccupancy > InitialOccupancy) {
759*bdd1243dSDimitry Andric     for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
760*bdd1243dSDimitry Andric       DAG.RegionsWithMinOcc[IDX] =
761*bdd1243dSDimitry Andric           DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
762*bdd1243dSDimitry Andric 
763*bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << StageID
764*bdd1243dSDimitry Andric                       << " stage successfully increased occupancy to "
765*bdd1243dSDimitry Andric                       << DAG.MinOccupancy << '\n');
766*bdd1243dSDimitry Andric   }
7670b57cec5SDimitry Andric 
768972a253aSDimitry Andric   GCNSchedStage::finalizeGCNSchedStage();
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric 
771972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() {
772972a253aSDimitry Andric   // Check whether this new region is also a new block.
773972a253aSDimitry Andric   if (DAG.RegionBegin->getParent() != CurrentMBB)
774972a253aSDimitry Andric     setupNewBlock();
775972a253aSDimitry Andric 
776972a253aSDimitry Andric   unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
777972a253aSDimitry Andric   DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
7780b57cec5SDimitry Andric 
7790b57cec5SDimitry Andric   // Skip empty scheduling regions (0 or 1 schedulable instructions).
780972a253aSDimitry Andric   if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
781972a253aSDimitry Andric     return false;
7820b57cec5SDimitry Andric 
7830b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
784972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
785972a253aSDimitry Andric                     << " " << CurrentMBB->getName()
786972a253aSDimitry Andric                     << "\n  From: " << *DAG.begin() << "    To: ";
787972a253aSDimitry Andric              if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
7880b57cec5SDimitry Andric              else dbgs() << "End";
7890b57cec5SDimitry Andric              dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
7900b57cec5SDimitry Andric 
791972a253aSDimitry Andric   // Save original instruction order before scheduling for possible revert.
792972a253aSDimitry Andric   Unsched.clear();
793972a253aSDimitry Andric   Unsched.reserve(DAG.NumRegionInstrs);
794*bdd1243dSDimitry Andric   if (StageID == GCNSchedStageID::OccInitialSchedule ||
795*bdd1243dSDimitry Andric       StageID == GCNSchedStageID::ILPInitialSchedule) {
796*bdd1243dSDimitry Andric     for (auto &I : DAG) {
797*bdd1243dSDimitry Andric       Unsched.push_back(&I);
798*bdd1243dSDimitry Andric       if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
799*bdd1243dSDimitry Andric           I.getOpcode() == AMDGPU::IGLP_OPT)
800*bdd1243dSDimitry Andric         DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
801*bdd1243dSDimitry Andric     }
802*bdd1243dSDimitry Andric   } else {
803972a253aSDimitry Andric     for (auto &I : DAG)
804972a253aSDimitry Andric       Unsched.push_back(&I);
805*bdd1243dSDimitry Andric   }
8060b57cec5SDimitry Andric 
807972a253aSDimitry Andric   PressureBefore = DAG.Pressure[RegionIdx];
8080b57cec5SDimitry Andric 
809972a253aSDimitry Andric   LLVM_DEBUG(
810*bdd1243dSDimitry Andric       dbgs() << "Pressure before scheduling:\nRegion live-ins:"
811*bdd1243dSDimitry Andric              << print(DAG.LiveIns[RegionIdx], DAG.MRI)
812*bdd1243dSDimitry Andric              << "Region live-in pressure:  "
813*bdd1243dSDimitry Andric              << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
814*bdd1243dSDimitry Andric              << "Region register pressure: " << print(PressureBefore));
815972a253aSDimitry Andric 
816*bdd1243dSDimitry Andric   S.HasHighPressure = false;
817*bdd1243dSDimitry Andric   S.KnownExcessRP = isRegionWithExcessRP();
818*bdd1243dSDimitry Andric 
819*bdd1243dSDimitry Andric   if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
820*bdd1243dSDimitry Andric       StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
821*bdd1243dSDimitry Andric     SavedMutations.clear();
822*bdd1243dSDimitry Andric     SavedMutations.swap(DAG.Mutations);
823*bdd1243dSDimitry Andric     DAG.addMutation(createIGroupLPDAGMutation());
824*bdd1243dSDimitry Andric   }
825972a253aSDimitry Andric 
826972a253aSDimitry Andric   return true;
8270b57cec5SDimitry Andric }
82881ad6265SDimitry Andric 
829*bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNRegion() {
830*bdd1243dSDimitry Andric   // Only reschedule regions with the minimum occupancy or regions that may have
831*bdd1243dSDimitry Andric   // spilling (excess register pressure).
832*bdd1243dSDimitry Andric   if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
833*bdd1243dSDimitry Andric        DAG.MinOccupancy <= InitialOccupancy) &&
834*bdd1243dSDimitry Andric       !DAG.RegionsWithExcessRP[RegionIdx])
835972a253aSDimitry Andric     return false;
836972a253aSDimitry Andric 
837972a253aSDimitry Andric   return GCNSchedStage::initGCNRegion();
838972a253aSDimitry Andric }
839972a253aSDimitry Andric 
840972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() {
841*bdd1243dSDimitry Andric   // We may need to reschedule this region if it wasn't rescheduled in the last
842*bdd1243dSDimitry Andric   // stage, or if we found it was testing critical register pressure limits in
843*bdd1243dSDimitry Andric   // the unclustered reschedule stage. The later is because we may not have been
844*bdd1243dSDimitry Andric   // able to raise the min occupancy in the previous stage so the region may be
845*bdd1243dSDimitry Andric   // overly constrained even if it was already rescheduled.
846*bdd1243dSDimitry Andric   if (!DAG.RegionsWithHighRP[RegionIdx])
847972a253aSDimitry Andric     return false;
848972a253aSDimitry Andric 
849972a253aSDimitry Andric   return GCNSchedStage::initGCNRegion();
850972a253aSDimitry Andric }
851972a253aSDimitry Andric 
852972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() {
853972a253aSDimitry Andric   if (!DAG.RescheduleRegions[RegionIdx])
854972a253aSDimitry Andric     return false;
855972a253aSDimitry Andric 
856972a253aSDimitry Andric   return GCNSchedStage::initGCNRegion();
857972a253aSDimitry Andric }
858972a253aSDimitry Andric 
859972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() {
860972a253aSDimitry Andric   if (CurrentMBB)
861972a253aSDimitry Andric     DAG.finishBlock();
862972a253aSDimitry Andric 
863972a253aSDimitry Andric   CurrentMBB = DAG.RegionBegin->getParent();
864972a253aSDimitry Andric   DAG.startBlock(CurrentMBB);
865972a253aSDimitry Andric   // Get real RP for the region if it hasn't be calculated before. After the
866972a253aSDimitry Andric   // initial schedule stage real RP will be collected after scheduling.
867*bdd1243dSDimitry Andric   if (StageID == GCNSchedStageID::OccInitialSchedule)
868972a253aSDimitry Andric     DAG.computeBlockPressure(RegionIdx, CurrentMBB);
869972a253aSDimitry Andric }
870972a253aSDimitry Andric 
871972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() {
872*bdd1243dSDimitry Andric   DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
873972a253aSDimitry Andric   DAG.RescheduleRegions[RegionIdx] = false;
874*bdd1243dSDimitry Andric   if (S.HasHighPressure)
875972a253aSDimitry Andric     DAG.RegionsWithHighRP[RegionIdx] = true;
876972a253aSDimitry Andric 
877972a253aSDimitry Andric   // Revert scheduling if we have dropped occupancy or there is some other
878972a253aSDimitry Andric   // reason that the original schedule is better.
879972a253aSDimitry Andric   checkScheduling();
880972a253aSDimitry Andric 
881*bdd1243dSDimitry Andric   if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
882*bdd1243dSDimitry Andric       StageID != GCNSchedStageID::UnclusteredHighRPReschedule)
883*bdd1243dSDimitry Andric     SavedMutations.swap(DAG.Mutations);
884*bdd1243dSDimitry Andric 
885972a253aSDimitry Andric   DAG.exitRegion();
886972a253aSDimitry Andric   RegionIdx++;
887972a253aSDimitry Andric }
888972a253aSDimitry Andric 
889972a253aSDimitry Andric void GCNSchedStage::checkScheduling() {
890972a253aSDimitry Andric   // Check the results of scheduling.
891972a253aSDimitry Andric   PressureAfter = DAG.getRealRegPressure(RegionIdx);
892*bdd1243dSDimitry Andric   LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
893*bdd1243dSDimitry Andric   LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
894972a253aSDimitry Andric 
895972a253aSDimitry Andric   if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
896972a253aSDimitry Andric       PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
897972a253aSDimitry Andric     DAG.Pressure[RegionIdx] = PressureAfter;
898972a253aSDimitry Andric     DAG.RegionsWithMinOcc[RegionIdx] =
899972a253aSDimitry Andric         PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
900972a253aSDimitry Andric 
901972a253aSDimitry Andric     // Early out if we have achieve the occupancy target.
902972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
903972a253aSDimitry Andric     return;
904972a253aSDimitry Andric   }
905972a253aSDimitry Andric 
906*bdd1243dSDimitry Andric   unsigned TargetOccupancy =
907*bdd1243dSDimitry Andric       std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF));
908972a253aSDimitry Andric   unsigned WavesAfter =
909*bdd1243dSDimitry Andric       std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
910972a253aSDimitry Andric   unsigned WavesBefore =
911*bdd1243dSDimitry Andric       std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
912972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
913972a253aSDimitry Andric                     << ", after " << WavesAfter << ".\n");
914972a253aSDimitry Andric 
915972a253aSDimitry Andric   // We may not be able to keep the current target occupancy because of the just
916972a253aSDimitry Andric   // scheduled region. We might still be able to revert scheduling if the
917972a253aSDimitry Andric   // occupancy before was higher, or if the current schedule has register
918972a253aSDimitry Andric   // pressure higher than the excess limits which could lead to more spilling.
919972a253aSDimitry Andric   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
920972a253aSDimitry Andric 
921972a253aSDimitry Andric   // Allow memory bound functions to drop to 4 waves if not limited by an
922972a253aSDimitry Andric   // attribute.
923972a253aSDimitry Andric   if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
924972a253aSDimitry Andric       WavesAfter >= MFI.getMinAllowedOccupancy()) {
925972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
926972a253aSDimitry Andric                       << MFI.getMinAllowedOccupancy() << " waves\n");
927972a253aSDimitry Andric     NewOccupancy = WavesAfter;
928972a253aSDimitry Andric   }
929972a253aSDimitry Andric 
930972a253aSDimitry Andric   if (NewOccupancy < DAG.MinOccupancy) {
931972a253aSDimitry Andric     DAG.MinOccupancy = NewOccupancy;
932972a253aSDimitry Andric     MFI.limitOccupancy(DAG.MinOccupancy);
933972a253aSDimitry Andric     DAG.RegionsWithMinOcc.reset();
934972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
935972a253aSDimitry Andric                       << DAG.MinOccupancy << ".\n");
936972a253aSDimitry Andric   }
937972a253aSDimitry Andric 
938972a253aSDimitry Andric   unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
939972a253aSDimitry Andric   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
940972a253aSDimitry Andric   if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
941972a253aSDimitry Andric       PressureAfter.getAGPRNum() > MaxVGPRs ||
942972a253aSDimitry Andric       PressureAfter.getSGPRNum() > MaxSGPRs) {
943972a253aSDimitry Andric     DAG.RescheduleRegions[RegionIdx] = true;
944972a253aSDimitry Andric     DAG.RegionsWithHighRP[RegionIdx] = true;
945*bdd1243dSDimitry Andric     DAG.RegionsWithExcessRP[RegionIdx] = true;
946972a253aSDimitry Andric   }
947972a253aSDimitry Andric 
948972a253aSDimitry Andric   // Revert if this region's schedule would cause a drop in occupancy or
949972a253aSDimitry Andric   // spilling.
950972a253aSDimitry Andric   if (shouldRevertScheduling(WavesAfter)) {
951972a253aSDimitry Andric     revertScheduling();
952972a253aSDimitry Andric   } else {
953972a253aSDimitry Andric     DAG.Pressure[RegionIdx] = PressureAfter;
954972a253aSDimitry Andric     DAG.RegionsWithMinOcc[RegionIdx] =
955972a253aSDimitry Andric         PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
956972a253aSDimitry Andric   }
957972a253aSDimitry Andric }
958972a253aSDimitry Andric 
959*bdd1243dSDimitry Andric unsigned
960*bdd1243dSDimitry Andric GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
961*bdd1243dSDimitry Andric                                       DenseMap<unsigned, unsigned> &ReadyCycles,
962*bdd1243dSDimitry Andric                                       const TargetSchedModel &SM) {
963*bdd1243dSDimitry Andric   unsigned ReadyCycle = CurrCycle;
964*bdd1243dSDimitry Andric   for (auto &D : SU.Preds) {
965*bdd1243dSDimitry Andric     if (D.isAssignedRegDep()) {
966*bdd1243dSDimitry Andric       MachineInstr *DefMI = D.getSUnit()->getInstr();
967*bdd1243dSDimitry Andric       unsigned Latency = SM.computeInstrLatency(DefMI);
968*bdd1243dSDimitry Andric       unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
969*bdd1243dSDimitry Andric       ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
970*bdd1243dSDimitry Andric     }
971*bdd1243dSDimitry Andric   }
972*bdd1243dSDimitry Andric   ReadyCycles[SU.NodeNum] = ReadyCycle;
973*bdd1243dSDimitry Andric   return ReadyCycle;
974*bdd1243dSDimitry Andric }
975*bdd1243dSDimitry Andric 
976*bdd1243dSDimitry Andric #ifndef NDEBUG
977*bdd1243dSDimitry Andric struct EarlierIssuingCycle {
978*bdd1243dSDimitry Andric   bool operator()(std::pair<MachineInstr *, unsigned> A,
979*bdd1243dSDimitry Andric                   std::pair<MachineInstr *, unsigned> B) const {
980*bdd1243dSDimitry Andric     return A.second < B.second;
981*bdd1243dSDimitry Andric   }
982*bdd1243dSDimitry Andric };
983*bdd1243dSDimitry Andric 
984*bdd1243dSDimitry Andric static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
985*bdd1243dSDimitry Andric                                         EarlierIssuingCycle> &ReadyCycles) {
986*bdd1243dSDimitry Andric   if (ReadyCycles.empty())
987*bdd1243dSDimitry Andric     return;
988*bdd1243dSDimitry Andric   unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
989*bdd1243dSDimitry Andric   dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
990*bdd1243dSDimitry Andric          << " ##################\n# Cycle #\t\t\tInstruction          "
991*bdd1243dSDimitry Andric             "             "
992*bdd1243dSDimitry Andric             "                            \n";
993*bdd1243dSDimitry Andric   unsigned IPrev = 1;
994*bdd1243dSDimitry Andric   for (auto &I : ReadyCycles) {
995*bdd1243dSDimitry Andric     if (I.second > IPrev + 1)
996*bdd1243dSDimitry Andric       dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
997*bdd1243dSDimitry Andric              << " CYCLES DETECTED ******************************\n\n";
998*bdd1243dSDimitry Andric     dbgs() << "[ " << I.second << " ]  :  " << *I.first << "\n";
999*bdd1243dSDimitry Andric     IPrev = I.second;
1000*bdd1243dSDimitry Andric   }
1001*bdd1243dSDimitry Andric }
1002*bdd1243dSDimitry Andric #endif
1003*bdd1243dSDimitry Andric 
1004*bdd1243dSDimitry Andric ScheduleMetrics
1005*bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1006*bdd1243dSDimitry Andric #ifndef NDEBUG
1007*bdd1243dSDimitry Andric   std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1008*bdd1243dSDimitry Andric       ReadyCyclesSorted;
1009*bdd1243dSDimitry Andric #endif
1010*bdd1243dSDimitry Andric   const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1011*bdd1243dSDimitry Andric   unsigned SumBubbles = 0;
1012*bdd1243dSDimitry Andric   DenseMap<unsigned, unsigned> ReadyCycles;
1013*bdd1243dSDimitry Andric   unsigned CurrCycle = 0;
1014*bdd1243dSDimitry Andric   for (auto &SU : InputSchedule) {
1015*bdd1243dSDimitry Andric     unsigned ReadyCycle =
1016*bdd1243dSDimitry Andric         computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1017*bdd1243dSDimitry Andric     SumBubbles += ReadyCycle - CurrCycle;
1018*bdd1243dSDimitry Andric #ifndef NDEBUG
1019*bdd1243dSDimitry Andric     ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1020*bdd1243dSDimitry Andric #endif
1021*bdd1243dSDimitry Andric     CurrCycle = ++ReadyCycle;
1022*bdd1243dSDimitry Andric   }
1023*bdd1243dSDimitry Andric #ifndef NDEBUG
1024*bdd1243dSDimitry Andric   LLVM_DEBUG(
1025*bdd1243dSDimitry Andric       printScheduleModel(ReadyCyclesSorted);
1026*bdd1243dSDimitry Andric       dbgs() << "\n\t"
1027*bdd1243dSDimitry Andric              << "Metric: "
1028*bdd1243dSDimitry Andric              << (SumBubbles
1029*bdd1243dSDimitry Andric                      ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1030*bdd1243dSDimitry Andric                      : 1)
1031*bdd1243dSDimitry Andric              << "\n\n");
1032*bdd1243dSDimitry Andric #endif
1033*bdd1243dSDimitry Andric 
1034*bdd1243dSDimitry Andric   return ScheduleMetrics(CurrCycle, SumBubbles);
1035*bdd1243dSDimitry Andric }
1036*bdd1243dSDimitry Andric 
1037*bdd1243dSDimitry Andric ScheduleMetrics
1038*bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
1039*bdd1243dSDimitry Andric #ifndef NDEBUG
1040*bdd1243dSDimitry Andric   std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1041*bdd1243dSDimitry Andric       ReadyCyclesSorted;
1042*bdd1243dSDimitry Andric #endif
1043*bdd1243dSDimitry Andric   const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1044*bdd1243dSDimitry Andric   unsigned SumBubbles = 0;
1045*bdd1243dSDimitry Andric   DenseMap<unsigned, unsigned> ReadyCycles;
1046*bdd1243dSDimitry Andric   unsigned CurrCycle = 0;
1047*bdd1243dSDimitry Andric   for (auto &MI : DAG) {
1048*bdd1243dSDimitry Andric     SUnit *SU = DAG.getSUnit(&MI);
1049*bdd1243dSDimitry Andric     if (!SU)
1050*bdd1243dSDimitry Andric       continue;
1051*bdd1243dSDimitry Andric     unsigned ReadyCycle =
1052*bdd1243dSDimitry Andric         computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1053*bdd1243dSDimitry Andric     SumBubbles += ReadyCycle - CurrCycle;
1054*bdd1243dSDimitry Andric #ifndef NDEBUG
1055*bdd1243dSDimitry Andric     ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1056*bdd1243dSDimitry Andric #endif
1057*bdd1243dSDimitry Andric     CurrCycle = ++ReadyCycle;
1058*bdd1243dSDimitry Andric   }
1059*bdd1243dSDimitry Andric #ifndef NDEBUG
1060*bdd1243dSDimitry Andric   LLVM_DEBUG(
1061*bdd1243dSDimitry Andric       printScheduleModel(ReadyCyclesSorted);
1062*bdd1243dSDimitry Andric       dbgs() << "\n\t"
1063*bdd1243dSDimitry Andric              << "Metric: "
1064*bdd1243dSDimitry Andric              << (SumBubbles
1065*bdd1243dSDimitry Andric                      ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1066*bdd1243dSDimitry Andric                      : 1)
1067*bdd1243dSDimitry Andric              << "\n\n");
1068*bdd1243dSDimitry Andric #endif
1069*bdd1243dSDimitry Andric 
1070*bdd1243dSDimitry Andric   return ScheduleMetrics(CurrCycle, SumBubbles);
1071*bdd1243dSDimitry Andric }
1072*bdd1243dSDimitry Andric 
1073972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1074972a253aSDimitry Andric   if (WavesAfter < DAG.MinOccupancy)
1075972a253aSDimitry Andric     return true;
1076972a253aSDimitry Andric 
1077972a253aSDimitry Andric   return false;
1078972a253aSDimitry Andric }
1079972a253aSDimitry Andric 
1080*bdd1243dSDimitry Andric bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1081*bdd1243dSDimitry Andric   if (PressureAfter == PressureBefore)
1082*bdd1243dSDimitry Andric     return false;
1083*bdd1243dSDimitry Andric 
1084972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1085972a253aSDimitry Andric     return true;
1086972a253aSDimitry Andric 
1087972a253aSDimitry Andric   if (mayCauseSpilling(WavesAfter))
1088972a253aSDimitry Andric     return true;
1089972a253aSDimitry Andric 
1090972a253aSDimitry Andric   return false;
1091972a253aSDimitry Andric }
1092972a253aSDimitry Andric 
1093*bdd1243dSDimitry Andric bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
1094*bdd1243dSDimitry Andric   // If RP is not reduced in the unclustred reschedule stage, revert to the
1095*bdd1243dSDimitry Andric   // old schedule.
1096*bdd1243dSDimitry Andric   if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1097*bdd1243dSDimitry Andric        mayCauseSpilling(WavesAfter)) ||
1098*bdd1243dSDimitry Andric       GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
1099972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1100972a253aSDimitry Andric     return true;
1101972a253aSDimitry Andric   }
1102972a253aSDimitry Andric 
1103*bdd1243dSDimitry Andric   LLVM_DEBUG(
1104*bdd1243dSDimitry Andric       dbgs()
1105*bdd1243dSDimitry Andric       << "\n\t      *** In shouldRevertScheduling ***\n"
1106*bdd1243dSDimitry Andric       << "      *********** BEFORE UnclusteredHighRPStage ***********\n");
1107*bdd1243dSDimitry Andric   ScheduleMetrics MBefore =
1108*bdd1243dSDimitry Andric       getScheduleMetrics(DAG.SUnits);
1109*bdd1243dSDimitry Andric   LLVM_DEBUG(
1110*bdd1243dSDimitry Andric       dbgs()
1111*bdd1243dSDimitry Andric       << "\n      *********** AFTER UnclusteredHighRPStage ***********\n");
1112*bdd1243dSDimitry Andric   ScheduleMetrics MAfter = getScheduleMetrics(DAG);
1113*bdd1243dSDimitry Andric   unsigned OldMetric = MBefore.getMetric();
1114*bdd1243dSDimitry Andric   unsigned NewMetric = MAfter.getMetric();
1115*bdd1243dSDimitry Andric   unsigned WavesBefore =
1116*bdd1243dSDimitry Andric       std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
1117*bdd1243dSDimitry Andric   unsigned Profit =
1118*bdd1243dSDimitry Andric       ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1119*bdd1243dSDimitry Andric        ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1120*bdd1243dSDimitry Andric        NewMetric) /
1121*bdd1243dSDimitry Andric       ScheduleMetrics::ScaleFactor;
1122*bdd1243dSDimitry Andric   LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1123*bdd1243dSDimitry Andric                     << MAfter << "Profit: " << Profit << "\n");
1124*bdd1243dSDimitry Andric   return Profit < ScheduleMetrics::ScaleFactor;
1125972a253aSDimitry Andric }
1126972a253aSDimitry Andric 
1127972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
1128*bdd1243dSDimitry Andric   if (PressureAfter == PressureBefore)
1129*bdd1243dSDimitry Andric     return false;
1130*bdd1243dSDimitry Andric 
1131972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1132972a253aSDimitry Andric     return true;
1133972a253aSDimitry Andric 
1134972a253aSDimitry Andric   if (mayCauseSpilling(WavesAfter))
1135972a253aSDimitry Andric     return true;
1136972a253aSDimitry Andric 
1137972a253aSDimitry Andric   return false;
1138972a253aSDimitry Andric }
1139972a253aSDimitry Andric 
1140972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
1141972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1142972a253aSDimitry Andric     return true;
1143972a253aSDimitry Andric 
1144972a253aSDimitry Andric   if (mayCauseSpilling(WavesAfter))
1145972a253aSDimitry Andric     return true;
1146972a253aSDimitry Andric 
1147972a253aSDimitry Andric   return false;
1148972a253aSDimitry Andric }
1149972a253aSDimitry Andric 
1150*bdd1243dSDimitry Andric bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1151*bdd1243dSDimitry Andric   if (mayCauseSpilling(WavesAfter))
1152*bdd1243dSDimitry Andric     return true;
1153*bdd1243dSDimitry Andric 
1154*bdd1243dSDimitry Andric   return false;
1155*bdd1243dSDimitry Andric }
1156*bdd1243dSDimitry Andric 
1157972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1158972a253aSDimitry Andric   if (WavesAfter <= MFI.getMinWavesPerEU() &&
1159972a253aSDimitry Andric       !PressureAfter.less(ST, PressureBefore) &&
1160*bdd1243dSDimitry Andric       isRegionWithExcessRP()) {
1161972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1162972a253aSDimitry Andric     return true;
1163972a253aSDimitry Andric   }
1164972a253aSDimitry Andric 
1165972a253aSDimitry Andric   return false;
1166972a253aSDimitry Andric }
1167972a253aSDimitry Andric 
1168972a253aSDimitry Andric void GCNSchedStage::revertScheduling() {
1169972a253aSDimitry Andric   DAG.RegionsWithMinOcc[RegionIdx] =
1170972a253aSDimitry Andric       PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1171972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1172972a253aSDimitry Andric   DAG.RescheduleRegions[RegionIdx] =
1173*bdd1243dSDimitry Andric       S.hasNextStage() &&
1174*bdd1243dSDimitry Andric       S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
1175972a253aSDimitry Andric   DAG.RegionEnd = DAG.RegionBegin;
1176972a253aSDimitry Andric   int SkippedDebugInstr = 0;
1177972a253aSDimitry Andric   for (MachineInstr *MI : Unsched) {
1178972a253aSDimitry Andric     if (MI->isDebugInstr()) {
1179972a253aSDimitry Andric       ++SkippedDebugInstr;
1180972a253aSDimitry Andric       continue;
1181972a253aSDimitry Andric     }
1182972a253aSDimitry Andric 
1183972a253aSDimitry Andric     if (MI->getIterator() != DAG.RegionEnd) {
1184972a253aSDimitry Andric       DAG.BB->remove(MI);
1185972a253aSDimitry Andric       DAG.BB->insert(DAG.RegionEnd, MI);
1186972a253aSDimitry Andric       if (!MI->isDebugInstr())
1187972a253aSDimitry Andric         DAG.LIS->handleMove(*MI, true);
1188972a253aSDimitry Andric     }
1189972a253aSDimitry Andric 
1190972a253aSDimitry Andric     // Reset read-undef flags and update them later.
1191972a253aSDimitry Andric     for (auto &Op : MI->operands())
1192972a253aSDimitry Andric       if (Op.isReg() && Op.isDef())
1193972a253aSDimitry Andric         Op.setIsUndef(false);
1194972a253aSDimitry Andric     RegisterOperands RegOpers;
1195972a253aSDimitry Andric     RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1196972a253aSDimitry Andric     if (!MI->isDebugInstr()) {
1197972a253aSDimitry Andric       if (DAG.ShouldTrackLaneMasks) {
1198972a253aSDimitry Andric         // Adjust liveness and add missing dead+read-undef flags.
1199972a253aSDimitry Andric         SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1200972a253aSDimitry Andric         RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1201972a253aSDimitry Andric       } else {
1202972a253aSDimitry Andric         // Adjust for missing dead-def flags.
1203972a253aSDimitry Andric         RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1204972a253aSDimitry Andric       }
1205972a253aSDimitry Andric     }
1206972a253aSDimitry Andric     DAG.RegionEnd = MI->getIterator();
1207972a253aSDimitry Andric     ++DAG.RegionEnd;
1208972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1209972a253aSDimitry Andric   }
1210972a253aSDimitry Andric 
1211972a253aSDimitry Andric   // After reverting schedule, debug instrs will now be at the end of the block
1212972a253aSDimitry Andric   // and RegionEnd will point to the first debug instr. Increment RegionEnd
1213972a253aSDimitry Andric   // pass debug instrs to the actual end of the scheduling region.
1214972a253aSDimitry Andric   while (SkippedDebugInstr-- > 0)
1215972a253aSDimitry Andric     ++DAG.RegionEnd;
1216972a253aSDimitry Andric 
1217972a253aSDimitry Andric   // If Unsched.front() instruction is a debug instruction, this will actually
1218972a253aSDimitry Andric   // shrink the region since we moved all debug instructions to the end of the
1219972a253aSDimitry Andric   // block. Find the first instruction that is not a debug instruction.
1220972a253aSDimitry Andric   DAG.RegionBegin = Unsched.front()->getIterator();
1221972a253aSDimitry Andric   if (DAG.RegionBegin->isDebugInstr()) {
1222972a253aSDimitry Andric     for (MachineInstr *MI : Unsched) {
1223972a253aSDimitry Andric       if (MI->isDebugInstr())
1224972a253aSDimitry Andric         continue;
1225972a253aSDimitry Andric       DAG.RegionBegin = MI->getIterator();
1226972a253aSDimitry Andric       break;
1227972a253aSDimitry Andric     }
1228972a253aSDimitry Andric   }
1229972a253aSDimitry Andric 
1230972a253aSDimitry Andric   // Then move the debug instructions back into their correct place and set
1231972a253aSDimitry Andric   // RegionBegin and RegionEnd if needed.
1232972a253aSDimitry Andric   DAG.placeDebugValues();
1233972a253aSDimitry Andric 
1234*bdd1243dSDimitry Andric   DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1235972a253aSDimitry Andric }
1236972a253aSDimitry Andric 
1237972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() {
1238972a253aSDimitry Andric   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1239972a253aSDimitry Andric   for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
124081ad6265SDimitry Andric     Register Reg = Register::index2VirtReg(I);
1241972a253aSDimitry Andric     if (!DAG.LIS->hasInterval(Reg))
124281ad6265SDimitry Andric       continue;
124381ad6265SDimitry Andric 
124481ad6265SDimitry Andric     // TODO: Handle AGPR and SGPR rematerialization
1245972a253aSDimitry Andric     if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1246972a253aSDimitry Andric         !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
124781ad6265SDimitry Andric       continue;
124881ad6265SDimitry Andric 
1249972a253aSDimitry Andric     MachineOperand *Op = DAG.MRI.getOneDef(Reg);
125081ad6265SDimitry Andric     MachineInstr *Def = Op->getParent();
1251fcaf7f86SDimitry Andric     if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
125281ad6265SDimitry Andric       continue;
125381ad6265SDimitry Andric 
1254972a253aSDimitry Andric     MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
125581ad6265SDimitry Andric     if (Def->getParent() == UseI->getParent())
125681ad6265SDimitry Andric       continue;
125781ad6265SDimitry Andric 
125881ad6265SDimitry Andric     // We are only collecting defs that are defined in another block and are
125981ad6265SDimitry Andric     // live-through or used inside regions at MinOccupancy. This means that the
126081ad6265SDimitry Andric     // register must be in the live-in set for the region.
126181ad6265SDimitry Andric     bool AddedToRematList = false;
1262972a253aSDimitry Andric     for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1263972a253aSDimitry Andric       auto It = DAG.LiveIns[I].find(Reg);
1264972a253aSDimitry Andric       if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1265972a253aSDimitry Andric         if (DAG.RegionsWithMinOcc[I]) {
126681ad6265SDimitry Andric           RematerializableInsts[I][Def] = UseI;
126781ad6265SDimitry Andric           AddedToRematList = true;
126881ad6265SDimitry Andric         }
126981ad6265SDimitry Andric 
127081ad6265SDimitry Andric         // Collect regions with rematerializable reg as live-in to avoid
127181ad6265SDimitry Andric         // searching later when updating RP.
127281ad6265SDimitry Andric         RematDefToLiveInRegions[Def].push_back(I);
127381ad6265SDimitry Andric       }
127481ad6265SDimitry Andric     }
127581ad6265SDimitry Andric     if (!AddedToRematList)
127681ad6265SDimitry Andric       RematDefToLiveInRegions.erase(Def);
127781ad6265SDimitry Andric   }
127881ad6265SDimitry Andric }
127981ad6265SDimitry Andric 
1280972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
128181ad6265SDimitry Andric                                               const TargetInstrInfo *TII) {
128281ad6265SDimitry Andric   // Temporary copies of cached variables we will be modifying and replacing if
128381ad6265SDimitry Andric   // sinking succeeds.
128481ad6265SDimitry Andric   SmallVector<
128581ad6265SDimitry Andric       std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
128681ad6265SDimitry Andric       NewRegions;
128781ad6265SDimitry Andric   DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
128881ad6265SDimitry Andric   DenseMap<unsigned, GCNRegPressure> NewPressure;
128981ad6265SDimitry Andric   BitVector NewRescheduleRegions;
1290972a253aSDimitry Andric   LiveIntervals *LIS = DAG.LIS;
129181ad6265SDimitry Andric 
1292972a253aSDimitry Andric   NewRegions.resize(DAG.Regions.size());
1293972a253aSDimitry Andric   NewRescheduleRegions.resize(DAG.Regions.size());
129481ad6265SDimitry Andric 
129581ad6265SDimitry Andric   // Collect only regions that has a rematerializable def as a live-in.
129681ad6265SDimitry Andric   SmallSet<unsigned, 16> ImpactedRegions;
129781ad6265SDimitry Andric   for (const auto &It : RematDefToLiveInRegions)
129881ad6265SDimitry Andric     ImpactedRegions.insert(It.second.begin(), It.second.end());
129981ad6265SDimitry Andric 
130081ad6265SDimitry Andric   // Make copies of register pressure and live-ins cache that will be updated
130181ad6265SDimitry Andric   // as we rematerialize.
130281ad6265SDimitry Andric   for (auto Idx : ImpactedRegions) {
1303972a253aSDimitry Andric     NewPressure[Idx] = DAG.Pressure[Idx];
1304972a253aSDimitry Andric     NewLiveIns[Idx] = DAG.LiveIns[Idx];
130581ad6265SDimitry Andric   }
1306972a253aSDimitry Andric   NewRegions = DAG.Regions;
130781ad6265SDimitry Andric   NewRescheduleRegions.reset();
130881ad6265SDimitry Andric 
130981ad6265SDimitry Andric   DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
131081ad6265SDimitry Andric   bool Improved = false;
131181ad6265SDimitry Andric   for (auto I : ImpactedRegions) {
1312972a253aSDimitry Andric     if (!DAG.RegionsWithMinOcc[I])
131381ad6265SDimitry Andric       continue;
131481ad6265SDimitry Andric 
131581ad6265SDimitry Andric     Improved = false;
131681ad6265SDimitry Andric     int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
131781ad6265SDimitry Andric     int SGPRUsage = NewPressure[I].getSGPRNum();
131881ad6265SDimitry Andric 
131981ad6265SDimitry Andric     // TODO: Handle occupancy drop due to AGPR and SGPR.
132081ad6265SDimitry Andric     // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1321972a253aSDimitry Andric     if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
132281ad6265SDimitry Andric       break;
132381ad6265SDimitry Andric 
132481ad6265SDimitry Andric     // The occupancy of this region could have been improved by a previous
132581ad6265SDimitry Andric     // iteration's sinking of defs.
1326972a253aSDimitry Andric     if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
132781ad6265SDimitry Andric       NewRescheduleRegions[I] = true;
132881ad6265SDimitry Andric       Improved = true;
132981ad6265SDimitry Andric       continue;
133081ad6265SDimitry Andric     }
133181ad6265SDimitry Andric 
133281ad6265SDimitry Andric     // First check if we have enough trivially rematerializable instructions to
133381ad6265SDimitry Andric     // improve occupancy. Optimistically assume all instructions we are able to
133481ad6265SDimitry Andric     // sink decreased RP.
133581ad6265SDimitry Andric     int TotalSinkableRegs = 0;
133681ad6265SDimitry Andric     for (const auto &It : RematerializableInsts[I]) {
133781ad6265SDimitry Andric       MachineInstr *Def = It.first;
133881ad6265SDimitry Andric       Register DefReg = Def->getOperand(0).getReg();
133981ad6265SDimitry Andric       TotalSinkableRegs +=
134081ad6265SDimitry Andric           SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
134181ad6265SDimitry Andric     }
134281ad6265SDimitry Andric     int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
134381ad6265SDimitry Andric     unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
134481ad6265SDimitry Andric     // If in the most optimistic scenario, we cannot improve occupancy, then do
134581ad6265SDimitry Andric     // not attempt to sink any instructions.
1346972a253aSDimitry Andric     if (OptimisticOccupancy <= DAG.MinOccupancy)
134781ad6265SDimitry Andric       break;
134881ad6265SDimitry Andric 
134981ad6265SDimitry Andric     unsigned ImproveOccupancy = 0;
135081ad6265SDimitry Andric     SmallVector<MachineInstr *, 4> SinkedDefs;
135181ad6265SDimitry Andric     for (auto &It : RematerializableInsts[I]) {
135281ad6265SDimitry Andric       MachineInstr *Def = It.first;
135381ad6265SDimitry Andric       MachineBasicBlock::iterator InsertPos =
135481ad6265SDimitry Andric           MachineBasicBlock::iterator(It.second);
135581ad6265SDimitry Andric       Register Reg = Def->getOperand(0).getReg();
135681ad6265SDimitry Andric       // Rematerialize MI to its use block. Since we are only rematerializing
135781ad6265SDimitry Andric       // instructions that do not have any virtual reg uses, we do not need to
135881ad6265SDimitry Andric       // call LiveRangeEdit::allUsesAvailableAt() and
135981ad6265SDimitry Andric       // LiveRangeEdit::canRematerializeAt().
136081ad6265SDimitry Andric       TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1361972a253aSDimitry Andric                          Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1362*bdd1243dSDimitry Andric       MachineInstr *NewMI = &*std::prev(InsertPos);
136381ad6265SDimitry Andric       LIS->InsertMachineInstrInMaps(*NewMI);
136481ad6265SDimitry Andric       LIS->removeInterval(Reg);
136581ad6265SDimitry Andric       LIS->createAndComputeVirtRegInterval(Reg);
136681ad6265SDimitry Andric       InsertedMIToOldDef[NewMI] = Def;
136781ad6265SDimitry Andric 
136881ad6265SDimitry Andric       // Update region boundaries in scheduling region we sinked from since we
136981ad6265SDimitry Andric       // may sink an instruction that was at the beginning or end of its region
1370972a253aSDimitry Andric       DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
137181ad6265SDimitry Andric                                  /*Removing =*/true);
137281ad6265SDimitry Andric 
137381ad6265SDimitry Andric       // Update region boundaries in region we sinked to.
1374972a253aSDimitry Andric       DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
137581ad6265SDimitry Andric 
137681ad6265SDimitry Andric       LaneBitmask PrevMask = NewLiveIns[I][Reg];
137781ad6265SDimitry Andric       // FIXME: Also update cached pressure for where the def was sinked from.
137881ad6265SDimitry Andric       // Update RP for all regions that has this reg as a live-in and remove
137981ad6265SDimitry Andric       // the reg from all regions as a live-in.
138081ad6265SDimitry Andric       for (auto Idx : RematDefToLiveInRegions[Def]) {
138181ad6265SDimitry Andric         NewLiveIns[Idx].erase(Reg);
1382972a253aSDimitry Andric         if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
138381ad6265SDimitry Andric           // Def is live-through and not used in this block.
1384972a253aSDimitry Andric           NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
138581ad6265SDimitry Andric         } else {
138681ad6265SDimitry Andric           // Def is used and rematerialized into this block.
138781ad6265SDimitry Andric           GCNDownwardRPTracker RPT(*LIS);
138881ad6265SDimitry Andric           auto *NonDbgMI = &*skipDebugInstructionsForward(
138981ad6265SDimitry Andric               NewRegions[Idx].first, NewRegions[Idx].second);
139081ad6265SDimitry Andric           RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
139181ad6265SDimitry Andric           RPT.advance(NewRegions[Idx].second);
139281ad6265SDimitry Andric           NewPressure[Idx] = RPT.moveMaxPressure();
139381ad6265SDimitry Andric         }
139481ad6265SDimitry Andric       }
139581ad6265SDimitry Andric 
139681ad6265SDimitry Andric       SinkedDefs.push_back(Def);
139781ad6265SDimitry Andric       ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1398972a253aSDimitry Andric       if (ImproveOccupancy > DAG.MinOccupancy)
139981ad6265SDimitry Andric         break;
140081ad6265SDimitry Andric     }
140181ad6265SDimitry Andric 
140281ad6265SDimitry Andric     // Remove defs we just sinked from all regions' list of sinkable defs
140381ad6265SDimitry Andric     for (auto &Def : SinkedDefs)
140481ad6265SDimitry Andric       for (auto TrackedIdx : RematDefToLiveInRegions[Def])
140581ad6265SDimitry Andric         RematerializableInsts[TrackedIdx].erase(Def);
140681ad6265SDimitry Andric 
1407972a253aSDimitry Andric     if (ImproveOccupancy <= DAG.MinOccupancy)
140881ad6265SDimitry Andric       break;
140981ad6265SDimitry Andric 
141081ad6265SDimitry Andric     NewRescheduleRegions[I] = true;
141181ad6265SDimitry Andric     Improved = true;
141281ad6265SDimitry Andric   }
141381ad6265SDimitry Andric 
141481ad6265SDimitry Andric   if (!Improved) {
141581ad6265SDimitry Andric     // Occupancy was not improved for all regions that were at MinOccupancy.
141681ad6265SDimitry Andric     // Undo sinking and remove newly rematerialized instructions.
141781ad6265SDimitry Andric     for (auto &Entry : InsertedMIToOldDef) {
141881ad6265SDimitry Andric       MachineInstr *MI = Entry.first;
141981ad6265SDimitry Andric       MachineInstr *OldMI = Entry.second;
142081ad6265SDimitry Andric       Register Reg = MI->getOperand(0).getReg();
142181ad6265SDimitry Andric       LIS->RemoveMachineInstrFromMaps(*MI);
142281ad6265SDimitry Andric       MI->eraseFromParent();
142381ad6265SDimitry Andric       OldMI->clearRegisterDeads(Reg);
142481ad6265SDimitry Andric       LIS->removeInterval(Reg);
142581ad6265SDimitry Andric       LIS->createAndComputeVirtRegInterval(Reg);
142681ad6265SDimitry Andric     }
142781ad6265SDimitry Andric     return false;
142881ad6265SDimitry Andric   }
142981ad6265SDimitry Andric 
143081ad6265SDimitry Andric   // Occupancy was improved for all regions.
143181ad6265SDimitry Andric   for (auto &Entry : InsertedMIToOldDef) {
143281ad6265SDimitry Andric     MachineInstr *MI = Entry.first;
143381ad6265SDimitry Andric     MachineInstr *OldMI = Entry.second;
143481ad6265SDimitry Andric 
143581ad6265SDimitry Andric     // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1436972a253aSDimitry Andric     DAG.BBLiveInMap.erase(OldMI);
143781ad6265SDimitry Andric 
143881ad6265SDimitry Andric     // Remove OldMI and update LIS
143981ad6265SDimitry Andric     Register Reg = MI->getOperand(0).getReg();
144081ad6265SDimitry Andric     LIS->RemoveMachineInstrFromMaps(*OldMI);
144181ad6265SDimitry Andric     OldMI->eraseFromParent();
144281ad6265SDimitry Andric     LIS->removeInterval(Reg);
144381ad6265SDimitry Andric     LIS->createAndComputeVirtRegInterval(Reg);
144481ad6265SDimitry Andric   }
144581ad6265SDimitry Andric 
144681ad6265SDimitry Andric   // Update live-ins, register pressure, and regions caches.
144781ad6265SDimitry Andric   for (auto Idx : ImpactedRegions) {
1448972a253aSDimitry Andric     DAG.LiveIns[Idx] = NewLiveIns[Idx];
1449972a253aSDimitry Andric     DAG.Pressure[Idx] = NewPressure[Idx];
1450972a253aSDimitry Andric     DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
145181ad6265SDimitry Andric   }
1452972a253aSDimitry Andric   DAG.Regions = NewRegions;
1453972a253aSDimitry Andric   DAG.RescheduleRegions = NewRescheduleRegions;
145481ad6265SDimitry Andric 
145581ad6265SDimitry Andric   SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1456972a253aSDimitry Andric   MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
145781ad6265SDimitry Andric 
145881ad6265SDimitry Andric   return true;
145981ad6265SDimitry Andric }
146081ad6265SDimitry Andric 
146181ad6265SDimitry Andric // Copied from MachineLICM
1462972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1463972a253aSDimitry Andric   if (!DAG.TII->isTriviallyReMaterializable(MI))
146481ad6265SDimitry Andric     return false;
146581ad6265SDimitry Andric 
146681ad6265SDimitry Andric   for (const MachineOperand &MO : MI.operands())
146781ad6265SDimitry Andric     if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
146881ad6265SDimitry Andric       return false;
146981ad6265SDimitry Andric 
147081ad6265SDimitry Andric   return true;
147181ad6265SDimitry Andric }
147281ad6265SDimitry Andric 
147381ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region.
147481ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front
147581ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only
147681ad6265SDimitry Andric // ever be inserting before an already existing MI.
147781ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries(
147881ad6265SDimitry Andric     SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
147981ad6265SDimitry Andric                               MachineBasicBlock::iterator>> &RegionBoundaries,
148081ad6265SDimitry Andric     MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
148181ad6265SDimitry Andric   unsigned I = 0, E = RegionBoundaries.size();
148281ad6265SDimitry Andric   // Search for first region of the block where MI is located
148381ad6265SDimitry Andric   while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
148481ad6265SDimitry Andric     ++I;
148581ad6265SDimitry Andric 
148681ad6265SDimitry Andric   for (; I != E; ++I) {
148781ad6265SDimitry Andric     if (MI->getParent() != RegionBoundaries[I].first->getParent())
148881ad6265SDimitry Andric       return;
148981ad6265SDimitry Andric 
149081ad6265SDimitry Andric     if (Removing && MI == RegionBoundaries[I].first &&
149181ad6265SDimitry Andric         MI == RegionBoundaries[I].second) {
149281ad6265SDimitry Andric       // MI is in a region with size 1, after removing, the region will be
149381ad6265SDimitry Andric       // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
149481ad6265SDimitry Andric       RegionBoundaries[I] =
1495*bdd1243dSDimitry Andric           std::pair(MI->getParent()->end(), MI->getParent()->end());
149681ad6265SDimitry Andric       return;
149781ad6265SDimitry Andric     }
149881ad6265SDimitry Andric     if (MI == RegionBoundaries[I].first) {
149981ad6265SDimitry Andric       if (Removing)
150081ad6265SDimitry Andric         RegionBoundaries[I] =
1501*bdd1243dSDimitry Andric             std::pair(std::next(MI), RegionBoundaries[I].second);
150281ad6265SDimitry Andric       else
150381ad6265SDimitry Andric         // Inserted NewMI in front of region, set new RegionBegin to NewMI
1504*bdd1243dSDimitry Andric         RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
150581ad6265SDimitry Andric                                         RegionBoundaries[I].second);
150681ad6265SDimitry Andric       return;
150781ad6265SDimitry Andric     }
150881ad6265SDimitry Andric     if (Removing && MI == RegionBoundaries[I].second) {
1509*bdd1243dSDimitry Andric       RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
151081ad6265SDimitry Andric       return;
151181ad6265SDimitry Andric     }
151281ad6265SDimitry Andric   }
151381ad6265SDimitry Andric }
1514*bdd1243dSDimitry Andric 
1515*bdd1243dSDimitry Andric static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1516*bdd1243dSDimitry Andric   return std::any_of(
1517*bdd1243dSDimitry Andric       DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1518*bdd1243dSDimitry Andric         unsigned Opc = MI->getOpcode();
1519*bdd1243dSDimitry Andric         return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1520*bdd1243dSDimitry Andric       });
1521*bdd1243dSDimitry Andric }
1522*bdd1243dSDimitry Andric 
1523*bdd1243dSDimitry Andric GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1524*bdd1243dSDimitry Andric     MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1525*bdd1243dSDimitry Andric     bool RemoveKillFlags)
1526*bdd1243dSDimitry Andric     : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1527*bdd1243dSDimitry Andric 
1528*bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::schedule() {
1529*bdd1243dSDimitry Andric   HasIGLPInstrs = hasIGLPInstrs(this);
1530*bdd1243dSDimitry Andric   if (HasIGLPInstrs) {
1531*bdd1243dSDimitry Andric     SavedMutations.clear();
1532*bdd1243dSDimitry Andric     SavedMutations.swap(Mutations);
1533*bdd1243dSDimitry Andric     addMutation(createIGroupLPDAGMutation());
1534*bdd1243dSDimitry Andric   }
1535*bdd1243dSDimitry Andric 
1536*bdd1243dSDimitry Andric   ScheduleDAGMI::schedule();
1537*bdd1243dSDimitry Andric }
1538*bdd1243dSDimitry Andric 
1539*bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::finalizeSchedule() {
1540*bdd1243dSDimitry Andric   if (HasIGLPInstrs)
1541*bdd1243dSDimitry Andric     SavedMutations.swap(Mutations);
1542*bdd1243dSDimitry Andric 
1543*bdd1243dSDimitry Andric   ScheduleDAGMI::finalizeSchedule();
1544*bdd1243dSDimitry Andric }
1545