xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (revision 972a253a57b6f144b0e4a3e2080a2a0076ec55a0)
10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave
110b57cec5SDimitry Andric /// occupancy on GCN hardware.
12*972a253aSDimitry Andric ///
13*972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function.
14*972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15*972a253aSDimitry Andric /// entry point for the scheduling of those regions is
16*972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages.
17*972a253aSDimitry Andric 
18*972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account
19*972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy.  Usually, only a
20*972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit
21*972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22*972a253aSDimitry Andric /// other regions.
23*972a253aSDimitry Andric ///
240b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #include "GCNSchedStrategy.h"
270b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
2881ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric using namespace llvm;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
35*972a253aSDimitry Andric     const MachineSchedContext *C)
36*972a253aSDimitry Andric     : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
37*972a253aSDimitry Andric       HasClusteredNodes(false), HasExcessPressure(false) {}
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
400b57cec5SDimitry Andric   GenericScheduler::initialize(DAG);
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric   MF = &DAG->MF;
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric   // FIXME: This is also necessary, because some passes that run after
470b57cec5SDimitry Andric   // scheduling and before regalloc increase register pressure.
48349cc55cSDimitry Andric   const unsigned ErrorMargin = 3;
490b57cec5SDimitry Andric 
50349cc55cSDimitry Andric   SGPRExcessLimit =
51349cc55cSDimitry Andric       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
52349cc55cSDimitry Andric   VGPRExcessLimit =
53349cc55cSDimitry Andric       Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
540b57cec5SDimitry Andric 
55349cc55cSDimitry Andric   SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
56349cc55cSDimitry Andric   // Set the initial TargetOccupnacy to the maximum occupancy that we can
57349cc55cSDimitry Andric   // achieve for this function. This effectively sets a lower bound on the
58349cc55cSDimitry Andric   // 'Critical' register limits in the scheduler.
59349cc55cSDimitry Andric   TargetOccupancy = MFI.getOccupancy();
60349cc55cSDimitry Andric   SGPRCriticalLimit =
61349cc55cSDimitry Andric       std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
62349cc55cSDimitry Andric   VGPRCriticalLimit =
63349cc55cSDimitry Andric       std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
64349cc55cSDimitry Andric 
65349cc55cSDimitry Andric   // Subtract error margin from register limits and avoid overflow.
66349cc55cSDimitry Andric   SGPRCriticalLimit =
67349cc55cSDimitry Andric       std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit);
68349cc55cSDimitry Andric   VGPRCriticalLimit =
69349cc55cSDimitry Andric       std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit);
70349cc55cSDimitry Andric   SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit);
71349cc55cSDimitry Andric   VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
720b57cec5SDimitry Andric }
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
750b57cec5SDimitry Andric                                      bool AtTop, const RegPressureTracker &RPTracker,
760b57cec5SDimitry Andric                                      const SIRegisterInfo *SRI,
770b57cec5SDimitry Andric                                      unsigned SGPRPressure,
780b57cec5SDimitry Andric                                      unsigned VGPRPressure) {
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric   Cand.SU = SU;
810b57cec5SDimitry Andric   Cand.AtTop = AtTop;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   // getDownwardPressure() and getUpwardPressure() make temporary changes to
840b57cec5SDimitry Andric   // the tracker, so we need to pass those function a non-const copy.
850b57cec5SDimitry Andric   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
860b57cec5SDimitry Andric 
878bcb0991SDimitry Andric   Pressure.clear();
888bcb0991SDimitry Andric   MaxPressure.clear();
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   if (AtTop)
910b57cec5SDimitry Andric     TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
920b57cec5SDimitry Andric   else {
930b57cec5SDimitry Andric     // FIXME: I think for bottom up scheduling, the register pressure is cached
940b57cec5SDimitry Andric     // and can be retrieved by DAG->getPressureDif(SU).
950b57cec5SDimitry Andric     TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
960b57cec5SDimitry Andric   }
970b57cec5SDimitry Andric 
985ffd83dbSDimitry Andric   unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
995ffd83dbSDimitry Andric   unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   // If two instructions increase the pressure of different register sets
1020b57cec5SDimitry Andric   // by the same amount, the generic scheduler will prefer to schedule the
1030b57cec5SDimitry Andric   // instruction that increases the set with the least amount of registers,
1040b57cec5SDimitry Andric   // which in our case would be SGPRs.  This is rarely what we want, so
1050b57cec5SDimitry Andric   // when we report excess/critical register pressure, we do it either
1060b57cec5SDimitry Andric   // only for VGPRs or only for SGPRs.
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
1090b57cec5SDimitry Andric   const unsigned MaxVGPRPressureInc = 16;
1100b57cec5SDimitry Andric   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
1110b57cec5SDimitry Andric   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
1150b57cec5SDimitry Andric   // to increase the likelihood we don't go over the limits.  We should improve
1160b57cec5SDimitry Andric   // the analysis to look through dependencies to find the path with the least
1170b57cec5SDimitry Andric   // register pressure.
1180b57cec5SDimitry Andric 
1198bcb0991SDimitry Andric   // We only need to update the RPDelta for instructions that increase register
1208bcb0991SDimitry Andric   // pressure. Instructions that decrease or keep reg pressure the same will be
1218bcb0991SDimitry Andric   // marked as RegExcess in tryCandidate() when they are compared with
1228bcb0991SDimitry Andric   // instructions that increase the register pressure.
1230b57cec5SDimitry Andric   if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
124fe6060f1SDimitry Andric     HasExcessPressure = true;
1255ffd83dbSDimitry Andric     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
1260b57cec5SDimitry Andric     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
1270b57cec5SDimitry Andric   }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
130fe6060f1SDimitry Andric     HasExcessPressure = true;
1315ffd83dbSDimitry Andric     Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
1320b57cec5SDimitry Andric     Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
1330b57cec5SDimitry Andric   }
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric   // Register pressure is considered 'CRITICAL' if it is approaching a value
1360b57cec5SDimitry Andric   // that would reduce the wave occupancy for the execution unit.  When
137349cc55cSDimitry Andric   // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
1380b57cec5SDimitry Andric   // has the same cost, so we don't need to prefer one over the other.
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
1410b57cec5SDimitry Andric   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   if (SGPRDelta >= 0 || VGPRDelta >= 0) {
144fe6060f1SDimitry Andric     HasExcessPressure = true;
1450b57cec5SDimitry Andric     if (SGPRDelta > VGPRDelta) {
1465ffd83dbSDimitry Andric       Cand.RPDelta.CriticalMax =
1475ffd83dbSDimitry Andric         PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
1480b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
1490b57cec5SDimitry Andric     } else {
1505ffd83dbSDimitry Andric       Cand.RPDelta.CriticalMax =
1515ffd83dbSDimitry Andric         PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
1520b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
1530b57cec5SDimitry Andric     }
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric }
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric // This function is mostly cut and pasted from
1580b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue()
1590b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
1600b57cec5SDimitry Andric                                          const CandPolicy &ZonePolicy,
1610b57cec5SDimitry Andric                                          const RegPressureTracker &RPTracker,
1620b57cec5SDimitry Andric                                          SchedCandidate &Cand) {
1630b57cec5SDimitry Andric   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
1640b57cec5SDimitry Andric   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
1655ffd83dbSDimitry Andric   unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
1665ffd83dbSDimitry Andric   unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
1670b57cec5SDimitry Andric   ReadyQueue &Q = Zone.Available;
1680b57cec5SDimitry Andric   for (SUnit *SU : Q) {
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric     SchedCandidate TryCand(ZonePolicy);
1710b57cec5SDimitry Andric     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
1720b57cec5SDimitry Andric                   SGPRPressure, VGPRPressure);
1730b57cec5SDimitry Andric     // Pass SchedBoundary only when comparing nodes from the same boundary.
1740b57cec5SDimitry Andric     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
1750b57cec5SDimitry Andric     GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
1760b57cec5SDimitry Andric     if (TryCand.Reason != NoCand) {
1770b57cec5SDimitry Andric       // Initialize resource delta if needed in case future heuristics query it.
1780b57cec5SDimitry Andric       if (TryCand.ResDelta == SchedResourceDelta())
1790b57cec5SDimitry Andric         TryCand.initResourceDelta(Zone.DAG, SchedModel);
1800b57cec5SDimitry Andric       Cand.setBest(TryCand);
1818bcb0991SDimitry Andric       LLVM_DEBUG(traceCandidate(Cand));
1820b57cec5SDimitry Andric     }
1830b57cec5SDimitry Andric   }
1840b57cec5SDimitry Andric }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric // This function is mostly cut and pasted from
1870b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional()
1880b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
1890b57cec5SDimitry Andric   // Schedule as far as possible in the direction of no choice. This is most
1900b57cec5SDimitry Andric   // efficient, but also provides the best heuristics for CriticalPSets.
1910b57cec5SDimitry Andric   if (SUnit *SU = Bot.pickOnlyChoice()) {
1920b57cec5SDimitry Andric     IsTopNode = false;
1930b57cec5SDimitry Andric     return SU;
1940b57cec5SDimitry Andric   }
1950b57cec5SDimitry Andric   if (SUnit *SU = Top.pickOnlyChoice()) {
1960b57cec5SDimitry Andric     IsTopNode = true;
1970b57cec5SDimitry Andric     return SU;
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric   // Set the bottom-up policy based on the state of the current bottom zone and
2000b57cec5SDimitry Andric   // the instructions outside the zone, including the top zone.
2010b57cec5SDimitry Andric   CandPolicy BotPolicy;
2020b57cec5SDimitry Andric   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
2030b57cec5SDimitry Andric   // Set the top-down policy based on the state of the current top zone and
2040b57cec5SDimitry Andric   // the instructions outside the zone, including the bottom zone.
2050b57cec5SDimitry Andric   CandPolicy TopPolicy;
2060b57cec5SDimitry Andric   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   // See if BotCand is still valid (because we previously scheduled from Top).
2090b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
2100b57cec5SDimitry Andric   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
2110b57cec5SDimitry Andric       BotCand.Policy != BotPolicy) {
2120b57cec5SDimitry Andric     BotCand.reset(CandPolicy());
2130b57cec5SDimitry Andric     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
2140b57cec5SDimitry Andric     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2150b57cec5SDimitry Andric   } else {
2160b57cec5SDimitry Andric     LLVM_DEBUG(traceCandidate(BotCand));
2178bcb0991SDimitry Andric #ifndef NDEBUG
2188bcb0991SDimitry Andric     if (VerifyScheduling) {
2198bcb0991SDimitry Andric       SchedCandidate TCand;
2208bcb0991SDimitry Andric       TCand.reset(CandPolicy());
2218bcb0991SDimitry Andric       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
2228bcb0991SDimitry Andric       assert(TCand.SU == BotCand.SU &&
2238bcb0991SDimitry Andric              "Last pick result should correspond to re-picking right now");
2248bcb0991SDimitry Andric     }
2258bcb0991SDimitry Andric #endif
2260b57cec5SDimitry Andric   }
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric   // Check if the top Q has a better candidate.
2290b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
2300b57cec5SDimitry Andric   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
2310b57cec5SDimitry Andric       TopCand.Policy != TopPolicy) {
2320b57cec5SDimitry Andric     TopCand.reset(CandPolicy());
2330b57cec5SDimitry Andric     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
2340b57cec5SDimitry Andric     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2350b57cec5SDimitry Andric   } else {
2360b57cec5SDimitry Andric     LLVM_DEBUG(traceCandidate(TopCand));
2378bcb0991SDimitry Andric #ifndef NDEBUG
2388bcb0991SDimitry Andric     if (VerifyScheduling) {
2398bcb0991SDimitry Andric       SchedCandidate TCand;
2408bcb0991SDimitry Andric       TCand.reset(CandPolicy());
2418bcb0991SDimitry Andric       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
2428bcb0991SDimitry Andric       assert(TCand.SU == TopCand.SU &&
2438bcb0991SDimitry Andric            "Last pick result should correspond to re-picking right now");
2448bcb0991SDimitry Andric     }
2458bcb0991SDimitry Andric #endif
2460b57cec5SDimitry Andric   }
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric   // Pick best from BotCand and TopCand.
2490b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
2500b57cec5SDimitry Andric              dbgs() << "Bot Cand: "; traceCandidate(BotCand););
2515ffd83dbSDimitry Andric   SchedCandidate Cand = BotCand;
2520b57cec5SDimitry Andric   TopCand.Reason = NoCand;
2530b57cec5SDimitry Andric   GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
2540b57cec5SDimitry Andric   if (TopCand.Reason != NoCand) {
2550b57cec5SDimitry Andric     Cand.setBest(TopCand);
2560b57cec5SDimitry Andric   }
2570b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric   IsTopNode = Cand.AtTop;
2600b57cec5SDimitry Andric   return Cand.SU;
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric // This function is mostly cut and pasted from
2640b57cec5SDimitry Andric // GenericScheduler::pickNode()
2650b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
2660b57cec5SDimitry Andric   if (DAG->top() == DAG->bottom()) {
2670b57cec5SDimitry Andric     assert(Top.Available.empty() && Top.Pending.empty() &&
2680b57cec5SDimitry Andric            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2690b57cec5SDimitry Andric     return nullptr;
2700b57cec5SDimitry Andric   }
2710b57cec5SDimitry Andric   SUnit *SU;
2720b57cec5SDimitry Andric   do {
2730b57cec5SDimitry Andric     if (RegionPolicy.OnlyTopDown) {
2740b57cec5SDimitry Andric       SU = Top.pickOnlyChoice();
2750b57cec5SDimitry Andric       if (!SU) {
2760b57cec5SDimitry Andric         CandPolicy NoPolicy;
2770b57cec5SDimitry Andric         TopCand.reset(NoPolicy);
2780b57cec5SDimitry Andric         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
2790b57cec5SDimitry Andric         assert(TopCand.Reason != NoCand && "failed to find a candidate");
2800b57cec5SDimitry Andric         SU = TopCand.SU;
2810b57cec5SDimitry Andric       }
2820b57cec5SDimitry Andric       IsTopNode = true;
2830b57cec5SDimitry Andric     } else if (RegionPolicy.OnlyBottomUp) {
2840b57cec5SDimitry Andric       SU = Bot.pickOnlyChoice();
2850b57cec5SDimitry Andric       if (!SU) {
2860b57cec5SDimitry Andric         CandPolicy NoPolicy;
2870b57cec5SDimitry Andric         BotCand.reset(NoPolicy);
2880b57cec5SDimitry Andric         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
2890b57cec5SDimitry Andric         assert(BotCand.Reason != NoCand && "failed to find a candidate");
2900b57cec5SDimitry Andric         SU = BotCand.SU;
2910b57cec5SDimitry Andric       }
2920b57cec5SDimitry Andric       IsTopNode = false;
2930b57cec5SDimitry Andric     } else {
2940b57cec5SDimitry Andric       SU = pickNodeBidirectional(IsTopNode);
2950b57cec5SDimitry Andric     }
2960b57cec5SDimitry Andric   } while (SU->isScheduled);
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric   if (SU->isTopReady())
2990b57cec5SDimitry Andric     Top.removeReady(SU);
3000b57cec5SDimitry Andric   if (SU->isBottomReady())
3010b57cec5SDimitry Andric     Bot.removeReady(SU);
3020b57cec5SDimitry Andric 
303fe6060f1SDimitry Andric   if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
304fe6060f1SDimitry Andric     for (SDep &Dep : SU->Preds) {
305fe6060f1SDimitry Andric       if (Dep.isCluster()) {
306fe6060f1SDimitry Andric         HasClusteredNodes = true;
307fe6060f1SDimitry Andric         break;
308fe6060f1SDimitry Andric       }
309fe6060f1SDimitry Andric     }
310fe6060f1SDimitry Andric   }
311fe6060f1SDimitry Andric 
3120b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3130b57cec5SDimitry Andric                     << *SU->getInstr());
3140b57cec5SDimitry Andric   return SU;
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric 
317*972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive(
318*972a253aSDimitry Andric     MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
319*972a253aSDimitry Andric     : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
3200b57cec5SDimitry Andric       MFI(*MF.getInfo<SIMachineFunctionInfo>()),
321*972a253aSDimitry Andric       StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
3220b57cec5SDimitry Andric 
3230b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
3240b57cec5SDimitry Andric }
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() {
327*972a253aSDimitry Andric   // Collect all scheduling regions. The actual scheduling is performed in
328*972a253aSDimitry Andric   // GCNScheduleDAGMILive::finalizeSchedule.
3290b57cec5SDimitry Andric   Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
3300b57cec5SDimitry Andric }
3310b57cec5SDimitry Andric 
332*972a253aSDimitry Andric GCNRegPressure
333*972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
3340b57cec5SDimitry Andric   GCNDownwardRPTracker RPTracker(*LIS);
3350b57cec5SDimitry Andric   RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
3360b57cec5SDimitry Andric   return RPTracker.moveMaxPressure();
3370b57cec5SDimitry Andric }
3380b57cec5SDimitry Andric 
339*972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
340*972a253aSDimitry Andric                                                 const MachineBasicBlock *MBB) {
3410b57cec5SDimitry Andric   GCNDownwardRPTracker RPTracker(*LIS);
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric   // If the block has the only successor then live-ins of that successor are
3440b57cec5SDimitry Andric   // live-outs of the current block. We can reuse calculated live set if the
3450b57cec5SDimitry Andric   // successor will be sent to scheduling past current block.
3460b57cec5SDimitry Andric   const MachineBasicBlock *OnlySucc = nullptr;
3470b57cec5SDimitry Andric   if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
3480b57cec5SDimitry Andric     SlotIndexes *Ind = LIS->getSlotIndexes();
3490b57cec5SDimitry Andric     if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
3500b57cec5SDimitry Andric       OnlySucc = *MBB->succ_begin();
3510b57cec5SDimitry Andric   }
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric   // Scheduler sends regions from the end of the block upwards.
3540b57cec5SDimitry Andric   size_t CurRegion = RegionIdx;
3550b57cec5SDimitry Andric   for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
3560b57cec5SDimitry Andric     if (Regions[CurRegion].first->getParent() != MBB)
3570b57cec5SDimitry Andric       break;
3580b57cec5SDimitry Andric   --CurRegion;
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   auto I = MBB->begin();
3610b57cec5SDimitry Andric   auto LiveInIt = MBBLiveIns.find(MBB);
36281ad6265SDimitry Andric   auto &Rgn = Regions[CurRegion];
36381ad6265SDimitry Andric   auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
3640b57cec5SDimitry Andric   if (LiveInIt != MBBLiveIns.end()) {
3650b57cec5SDimitry Andric     auto LiveIn = std::move(LiveInIt->second);
3660b57cec5SDimitry Andric     RPTracker.reset(*MBB->begin(), &LiveIn);
3670b57cec5SDimitry Andric     MBBLiveIns.erase(LiveInIt);
3680b57cec5SDimitry Andric   } else {
3690b57cec5SDimitry Andric     I = Rgn.first;
3700b57cec5SDimitry Andric     auto LRS = BBLiveInMap.lookup(NonDbgMI);
371fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS
3720b57cec5SDimitry Andric     assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
373fe6060f1SDimitry Andric #endif
3740b57cec5SDimitry Andric     RPTracker.reset(*I, &LRS);
3750b57cec5SDimitry Andric   }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   for (;;) {
3780b57cec5SDimitry Andric     I = RPTracker.getNext();
3790b57cec5SDimitry Andric 
38081ad6265SDimitry Andric     if (Regions[CurRegion].first == I || NonDbgMI == I) {
3810b57cec5SDimitry Andric       LiveIns[CurRegion] = RPTracker.getLiveRegs();
3820b57cec5SDimitry Andric       RPTracker.clearMaxPressure();
3830b57cec5SDimitry Andric     }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric     if (Regions[CurRegion].second == I) {
3860b57cec5SDimitry Andric       Pressure[CurRegion] = RPTracker.moveMaxPressure();
3870b57cec5SDimitry Andric       if (CurRegion-- == RegionIdx)
3880b57cec5SDimitry Andric         break;
3890b57cec5SDimitry Andric     }
3900b57cec5SDimitry Andric     RPTracker.advanceToNext();
3910b57cec5SDimitry Andric     RPTracker.advanceBeforeNext();
3920b57cec5SDimitry Andric   }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   if (OnlySucc) {
3950b57cec5SDimitry Andric     if (I != MBB->end()) {
3960b57cec5SDimitry Andric       RPTracker.advanceToNext();
3970b57cec5SDimitry Andric       RPTracker.advance(MBB->end());
3980b57cec5SDimitry Andric     }
3990b57cec5SDimitry Andric     RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
4000b57cec5SDimitry Andric     RPTracker.advanceBeforeNext();
4010b57cec5SDimitry Andric     MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
4020b57cec5SDimitry Andric   }
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
4060b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const {
4070b57cec5SDimitry Andric   assert(!Regions.empty());
4080b57cec5SDimitry Andric   std::vector<MachineInstr *> BBStarters;
4090b57cec5SDimitry Andric   BBStarters.reserve(Regions.size());
4100b57cec5SDimitry Andric   auto I = Regions.rbegin(), E = Regions.rend();
4110b57cec5SDimitry Andric   auto *BB = I->first->getParent();
4120b57cec5SDimitry Andric   do {
4130b57cec5SDimitry Andric     auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
4140b57cec5SDimitry Andric     BBStarters.push_back(MI);
4150b57cec5SDimitry Andric     do {
4160b57cec5SDimitry Andric       ++I;
4170b57cec5SDimitry Andric     } while (I != E && I->first->getParent() == BB);
4180b57cec5SDimitry Andric   } while (I != E);
4190b57cec5SDimitry Andric   return getLiveRegMap(BBStarters, false /*After*/, *LIS);
4200b57cec5SDimitry Andric }
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() {
423*972a253aSDimitry Andric   // Start actual scheduling here. This function is called by the base
424*972a253aSDimitry Andric   // MachineScheduler after all regions have been recorded by
425*972a253aSDimitry Andric   // GCNScheduleDAGMILive::schedule().
4260b57cec5SDimitry Andric   LiveIns.resize(Regions.size());
4270b57cec5SDimitry Andric   Pressure.resize(Regions.size());
4285ffd83dbSDimitry Andric   RescheduleRegions.resize(Regions.size());
429fe6060f1SDimitry Andric   RegionsWithClusters.resize(Regions.size());
430fe6060f1SDimitry Andric   RegionsWithHighRP.resize(Regions.size());
43181ad6265SDimitry Andric   RegionsWithMinOcc.resize(Regions.size());
4325ffd83dbSDimitry Andric   RescheduleRegions.set();
433fe6060f1SDimitry Andric   RegionsWithClusters.reset();
434fe6060f1SDimitry Andric   RegionsWithHighRP.reset();
43581ad6265SDimitry Andric   RegionsWithMinOcc.reset();
4360b57cec5SDimitry Andric 
437*972a253aSDimitry Andric   runSchedStages();
438*972a253aSDimitry Andric }
439*972a253aSDimitry Andric 
440*972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() {
441*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
442*972a253aSDimitry Andric   InitialScheduleStage S0(GCNSchedStageID::InitialSchedule, *this);
443*972a253aSDimitry Andric   UnclusteredRescheduleStage S1(GCNSchedStageID::UnclusteredReschedule, *this);
444*972a253aSDimitry Andric   ClusteredLowOccStage S2(GCNSchedStageID::ClusteredLowOccupancyReschedule,
445*972a253aSDimitry Andric                           *this);
446*972a253aSDimitry Andric   PreRARematStage S3(GCNSchedStageID::PreRARematerialize, *this);
447*972a253aSDimitry Andric   GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3};
448*972a253aSDimitry Andric 
4490b57cec5SDimitry Andric   if (!Regions.empty())
4500b57cec5SDimitry Andric     BBLiveInMap = getBBLiveInMap();
4510b57cec5SDimitry Andric 
452*972a253aSDimitry Andric   for (auto *Stage : SchedStages) {
453*972a253aSDimitry Andric     if (!Stage->initGCNSchedStage())
4545ffd83dbSDimitry Andric       continue;
455*972a253aSDimitry Andric 
456*972a253aSDimitry Andric     for (auto Region : Regions) {
457*972a253aSDimitry Andric       RegionBegin = Region.first;
458*972a253aSDimitry Andric       RegionEnd = Region.second;
459*972a253aSDimitry Andric       // Setup for scheduling the region and check whether it should be skipped.
460*972a253aSDimitry Andric       if (!Stage->initGCNRegion()) {
461*972a253aSDimitry Andric         Stage->advanceRegion();
462*972a253aSDimitry Andric         exitRegion();
463*972a253aSDimitry Andric         continue;
4645ffd83dbSDimitry Andric       }
4655ffd83dbSDimitry Andric 
466*972a253aSDimitry Andric       ScheduleDAGMILive::schedule();
467*972a253aSDimitry Andric       Stage->finalizeGCNRegion();
468*972a253aSDimitry Andric     }
469*972a253aSDimitry Andric 
470*972a253aSDimitry Andric     Stage->finalizeGCNSchedStage();
471*972a253aSDimitry Andric   }
472*972a253aSDimitry Andric }
473*972a253aSDimitry Andric 
474*972a253aSDimitry Andric #ifndef NDEBUG
475*972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
476*972a253aSDimitry Andric   switch (StageID) {
477*972a253aSDimitry Andric   case GCNSchedStageID::InitialSchedule:
478*972a253aSDimitry Andric     OS << "Initial Schedule";
4790b57cec5SDimitry Andric     break;
480*972a253aSDimitry Andric   case GCNSchedStageID::UnclusteredReschedule:
481*972a253aSDimitry Andric     OS << "Unclustered Reschedule";
482*972a253aSDimitry Andric     break;
483*972a253aSDimitry Andric   case GCNSchedStageID::ClusteredLowOccupancyReschedule:
484*972a253aSDimitry Andric     OS << "Clustered Low Occupancy Reschedule";
485*972a253aSDimitry Andric     break;
486*972a253aSDimitry Andric   case GCNSchedStageID::PreRARematerialize:
487*972a253aSDimitry Andric     OS << "Pre-RA Rematerialize";
488*972a253aSDimitry Andric     break;
489*972a253aSDimitry Andric   }
490*972a253aSDimitry Andric   return OS;
491*972a253aSDimitry Andric }
492*972a253aSDimitry Andric #endif
493*972a253aSDimitry Andric 
494*972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
495*972a253aSDimitry Andric     : DAG(DAG), S(static_cast<GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)),
496*972a253aSDimitry Andric       MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
497*972a253aSDimitry Andric 
498*972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() {
499*972a253aSDimitry Andric   if (!DAG.LIS)
500*972a253aSDimitry Andric     return false;
501*972a253aSDimitry Andric 
502*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
503*972a253aSDimitry Andric   return true;
504*972a253aSDimitry Andric }
505*972a253aSDimitry Andric 
506*972a253aSDimitry Andric bool UnclusteredRescheduleStage::initGCNSchedStage() {
507*972a253aSDimitry Andric   if (!GCNSchedStage::initGCNSchedStage())
508*972a253aSDimitry Andric     return false;
509*972a253aSDimitry Andric 
510*972a253aSDimitry Andric   if (DAG.RescheduleRegions.none())
511*972a253aSDimitry Andric     return false;
512*972a253aSDimitry Andric 
513*972a253aSDimitry Andric   SavedMutations.swap(DAG.Mutations);
514*972a253aSDimitry Andric 
515*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Retrying function scheduling without clustering.\n");
516*972a253aSDimitry Andric   return true;
517*972a253aSDimitry Andric }
518*972a253aSDimitry Andric 
519*972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() {
520*972a253aSDimitry Andric   if (!GCNSchedStage::initGCNSchedStage())
521*972a253aSDimitry Andric     return false;
522*972a253aSDimitry Andric 
523*972a253aSDimitry Andric   // Don't bother trying to improve ILP in lower RP regions if occupancy has not
524*972a253aSDimitry Andric   // been dropped. All regions will have already been scheduled with the ideal
525*972a253aSDimitry Andric   // occupancy targets.
526*972a253aSDimitry Andric   if (DAG.StartingOccupancy <= DAG.MinOccupancy)
527*972a253aSDimitry Andric     return false;
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric   LLVM_DEBUG(
530*972a253aSDimitry Andric       dbgs() << "Retrying function scheduling with lowest recorded occupancy "
531*972a253aSDimitry Andric              << DAG.MinOccupancy << ".\n");
532*972a253aSDimitry Andric   return true;
5330b57cec5SDimitry Andric }
53481ad6265SDimitry Andric 
535*972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() {
536*972a253aSDimitry Andric   if (!GCNSchedStage::initGCNSchedStage())
537*972a253aSDimitry Andric     return false;
53881ad6265SDimitry Andric 
539*972a253aSDimitry Andric   if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
540*972a253aSDimitry Andric     return false;
541*972a253aSDimitry Andric 
54281ad6265SDimitry Andric   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
54381ad6265SDimitry Andric   // Check maximum occupancy
54481ad6265SDimitry Andric   if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
545*972a253aSDimitry Andric       DAG.MinOccupancy)
546*972a253aSDimitry Andric     return false;
54781ad6265SDimitry Andric 
54881ad6265SDimitry Andric   // FIXME: This pass will invalidate cached MBBLiveIns for regions
54981ad6265SDimitry Andric   // inbetween the defs and region we sinked the def to. Cached pressure
55081ad6265SDimitry Andric   // for regions where a def is sinked from will also be invalidated. Will
55181ad6265SDimitry Andric   // need to be fixed if there is another pass after this pass.
55281ad6265SDimitry Andric 
55381ad6265SDimitry Andric   collectRematerializableInstructions();
55481ad6265SDimitry Andric   if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
555*972a253aSDimitry Andric     return false;
55681ad6265SDimitry Andric 
55781ad6265SDimitry Andric   LLVM_DEBUG(
55881ad6265SDimitry Andric       dbgs() << "Retrying function scheduling with improved occupancy of "
559*972a253aSDimitry Andric              << DAG.MinOccupancy << " from rematerializing\n");
560*972a253aSDimitry Andric   return true;
5615ffd83dbSDimitry Andric }
5625ffd83dbSDimitry Andric 
563*972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() {
564*972a253aSDimitry Andric   DAG.finishBlock();
565*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
566e8d8bef9SDimitry Andric }
5675ffd83dbSDimitry Andric 
568*972a253aSDimitry Andric void UnclusteredRescheduleStage::finalizeGCNSchedStage() {
569*972a253aSDimitry Andric   SavedMutations.swap(DAG.Mutations);
5700b57cec5SDimitry Andric 
571*972a253aSDimitry Andric   GCNSchedStage::finalizeGCNSchedStage();
5720b57cec5SDimitry Andric }
5730b57cec5SDimitry Andric 
574*972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() {
575*972a253aSDimitry Andric   // Check whether this new region is also a new block.
576*972a253aSDimitry Andric   if (DAG.RegionBegin->getParent() != CurrentMBB)
577*972a253aSDimitry Andric     setupNewBlock();
578*972a253aSDimitry Andric 
579*972a253aSDimitry Andric   unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
580*972a253aSDimitry Andric   DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric   // Skip empty scheduling regions (0 or 1 schedulable instructions).
583*972a253aSDimitry Andric   if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
584*972a253aSDimitry Andric     return false;
5850b57cec5SDimitry Andric 
5860b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
587*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
588*972a253aSDimitry Andric                     << " " << CurrentMBB->getName()
589*972a253aSDimitry Andric                     << "\n  From: " << *DAG.begin() << "    To: ";
590*972a253aSDimitry Andric              if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
5910b57cec5SDimitry Andric              else dbgs() << "End";
5920b57cec5SDimitry Andric              dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
5930b57cec5SDimitry Andric 
594*972a253aSDimitry Andric   // Save original instruction order before scheduling for possible revert.
595*972a253aSDimitry Andric   Unsched.clear();
596*972a253aSDimitry Andric   Unsched.reserve(DAG.NumRegionInstrs);
597*972a253aSDimitry Andric   for (auto &I : DAG)
598*972a253aSDimitry Andric     Unsched.push_back(&I);
5990b57cec5SDimitry Andric 
600*972a253aSDimitry Andric   PressureBefore = DAG.Pressure[RegionIdx];
6010b57cec5SDimitry Andric 
602*972a253aSDimitry Andric   LLVM_DEBUG(
603*972a253aSDimitry Andric       dbgs() << "Pressure before scheduling:\nRegion live-ins:";
604*972a253aSDimitry Andric       GCNRPTracker::printLiveRegs(dbgs(), DAG.LiveIns[RegionIdx], DAG.MRI);
605*972a253aSDimitry Andric       dbgs() << "Region live-in pressure:  ";
606*972a253aSDimitry Andric       llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]).print(dbgs());
607*972a253aSDimitry Andric       dbgs() << "Region register pressure: "; PressureBefore.print(dbgs()));
608*972a253aSDimitry Andric 
609*972a253aSDimitry Andric   // Set HasClusteredNodes to true for late stages where we have already
610*972a253aSDimitry Andric   // collected it. That way pickNode() will not scan SDep's when not needed.
611*972a253aSDimitry Andric   S.HasClusteredNodes = StageID > GCNSchedStageID::InitialSchedule;
612*972a253aSDimitry Andric   S.HasExcessPressure = false;
613*972a253aSDimitry Andric 
614*972a253aSDimitry Andric   return true;
6150b57cec5SDimitry Andric }
61681ad6265SDimitry Andric 
617*972a253aSDimitry Andric bool UnclusteredRescheduleStage::initGCNRegion() {
618*972a253aSDimitry Andric   if (!DAG.RescheduleRegions[RegionIdx])
619*972a253aSDimitry Andric     return false;
620*972a253aSDimitry Andric 
621*972a253aSDimitry Andric   return GCNSchedStage::initGCNRegion();
622*972a253aSDimitry Andric }
623*972a253aSDimitry Andric 
624*972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() {
625*972a253aSDimitry Andric   // We may need to reschedule this region if it doesn't have clusters so it
626*972a253aSDimitry Andric   // wasn't rescheduled in the last stage, or if we found it was testing
627*972a253aSDimitry Andric   // critical register pressure limits in the unclustered reschedule stage. The
628*972a253aSDimitry Andric   // later is because we may not have been able to raise the min occupancy in
629*972a253aSDimitry Andric   // the previous stage so the region may be overly constrained even if it was
630*972a253aSDimitry Andric   // already rescheduled.
631*972a253aSDimitry Andric   if (!DAG.RegionsWithClusters[RegionIdx] && !DAG.RegionsWithHighRP[RegionIdx])
632*972a253aSDimitry Andric     return false;
633*972a253aSDimitry Andric 
634*972a253aSDimitry Andric   return GCNSchedStage::initGCNRegion();
635*972a253aSDimitry Andric }
636*972a253aSDimitry Andric 
637*972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() {
638*972a253aSDimitry Andric   if (!DAG.RescheduleRegions[RegionIdx])
639*972a253aSDimitry Andric     return false;
640*972a253aSDimitry Andric 
641*972a253aSDimitry Andric   return GCNSchedStage::initGCNRegion();
642*972a253aSDimitry Andric }
643*972a253aSDimitry Andric 
644*972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() {
645*972a253aSDimitry Andric   if (CurrentMBB)
646*972a253aSDimitry Andric     DAG.finishBlock();
647*972a253aSDimitry Andric 
648*972a253aSDimitry Andric   CurrentMBB = DAG.RegionBegin->getParent();
649*972a253aSDimitry Andric   DAG.startBlock(CurrentMBB);
650*972a253aSDimitry Andric   // Get real RP for the region if it hasn't be calculated before. After the
651*972a253aSDimitry Andric   // initial schedule stage real RP will be collected after scheduling.
652*972a253aSDimitry Andric   if (StageID == GCNSchedStageID::InitialSchedule)
653*972a253aSDimitry Andric     DAG.computeBlockPressure(RegionIdx, CurrentMBB);
654*972a253aSDimitry Andric }
655*972a253aSDimitry Andric 
656*972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() {
657*972a253aSDimitry Andric   DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd);
658*972a253aSDimitry Andric   DAG.RescheduleRegions[RegionIdx] = false;
659*972a253aSDimitry Andric   if (S.HasExcessPressure)
660*972a253aSDimitry Andric     DAG.RegionsWithHighRP[RegionIdx] = true;
661*972a253aSDimitry Andric 
662*972a253aSDimitry Andric   // Revert scheduling if we have dropped occupancy or there is some other
663*972a253aSDimitry Andric   // reason that the original schedule is better.
664*972a253aSDimitry Andric   checkScheduling();
665*972a253aSDimitry Andric 
666*972a253aSDimitry Andric   DAG.exitRegion();
667*972a253aSDimitry Andric   RegionIdx++;
668*972a253aSDimitry Andric }
669*972a253aSDimitry Andric 
670*972a253aSDimitry Andric void InitialScheduleStage::finalizeGCNRegion() {
671*972a253aSDimitry Andric   // Record which regions have clustered nodes for the next unclustered
672*972a253aSDimitry Andric   // reschedule stage.
673*972a253aSDimitry Andric   assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule);
674*972a253aSDimitry Andric   if (S.HasClusteredNodes)
675*972a253aSDimitry Andric     DAG.RegionsWithClusters[RegionIdx] = true;
676*972a253aSDimitry Andric 
677*972a253aSDimitry Andric   GCNSchedStage::finalizeGCNRegion();
678*972a253aSDimitry Andric }
679*972a253aSDimitry Andric 
680*972a253aSDimitry Andric void GCNSchedStage::checkScheduling() {
681*972a253aSDimitry Andric   // Check the results of scheduling.
682*972a253aSDimitry Andric   PressureAfter = DAG.getRealRegPressure(RegionIdx);
683*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
684*972a253aSDimitry Andric              PressureAfter.print(dbgs()));
685*972a253aSDimitry Andric 
686*972a253aSDimitry Andric   if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
687*972a253aSDimitry Andric       PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
688*972a253aSDimitry Andric     DAG.Pressure[RegionIdx] = PressureAfter;
689*972a253aSDimitry Andric     DAG.RegionsWithMinOcc[RegionIdx] =
690*972a253aSDimitry Andric         PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
691*972a253aSDimitry Andric 
692*972a253aSDimitry Andric     // Early out if we have achieve the occupancy target.
693*972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
694*972a253aSDimitry Andric     return;
695*972a253aSDimitry Andric   }
696*972a253aSDimitry Andric 
697*972a253aSDimitry Andric   unsigned WavesAfter =
698*972a253aSDimitry Andric       std::min(S.getTargetOccupancy(), PressureAfter.getOccupancy(ST));
699*972a253aSDimitry Andric   unsigned WavesBefore =
700*972a253aSDimitry Andric       std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
701*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
702*972a253aSDimitry Andric                     << ", after " << WavesAfter << ".\n");
703*972a253aSDimitry Andric 
704*972a253aSDimitry Andric   // We may not be able to keep the current target occupancy because of the just
705*972a253aSDimitry Andric   // scheduled region. We might still be able to revert scheduling if the
706*972a253aSDimitry Andric   // occupancy before was higher, or if the current schedule has register
707*972a253aSDimitry Andric   // pressure higher than the excess limits which could lead to more spilling.
708*972a253aSDimitry Andric   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
709*972a253aSDimitry Andric 
710*972a253aSDimitry Andric   // Allow memory bound functions to drop to 4 waves if not limited by an
711*972a253aSDimitry Andric   // attribute.
712*972a253aSDimitry Andric   if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
713*972a253aSDimitry Andric       WavesAfter >= MFI.getMinAllowedOccupancy()) {
714*972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
715*972a253aSDimitry Andric                       << MFI.getMinAllowedOccupancy() << " waves\n");
716*972a253aSDimitry Andric     NewOccupancy = WavesAfter;
717*972a253aSDimitry Andric   }
718*972a253aSDimitry Andric 
719*972a253aSDimitry Andric   if (NewOccupancy < DAG.MinOccupancy) {
720*972a253aSDimitry Andric     DAG.MinOccupancy = NewOccupancy;
721*972a253aSDimitry Andric     MFI.limitOccupancy(DAG.MinOccupancy);
722*972a253aSDimitry Andric     DAG.RegionsWithMinOcc.reset();
723*972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
724*972a253aSDimitry Andric                       << DAG.MinOccupancy << ".\n");
725*972a253aSDimitry Andric   }
726*972a253aSDimitry Andric 
727*972a253aSDimitry Andric   unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
728*972a253aSDimitry Andric   unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
729*972a253aSDimitry Andric   if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
730*972a253aSDimitry Andric       PressureAfter.getAGPRNum() > MaxVGPRs ||
731*972a253aSDimitry Andric       PressureAfter.getSGPRNum() > MaxSGPRs) {
732*972a253aSDimitry Andric     DAG.RescheduleRegions[RegionIdx] = true;
733*972a253aSDimitry Andric     DAG.RegionsWithHighRP[RegionIdx] = true;
734*972a253aSDimitry Andric   }
735*972a253aSDimitry Andric 
736*972a253aSDimitry Andric   // Revert if this region's schedule would cause a drop in occupancy or
737*972a253aSDimitry Andric   // spilling.
738*972a253aSDimitry Andric   if (shouldRevertScheduling(WavesAfter)) {
739*972a253aSDimitry Andric     revertScheduling();
740*972a253aSDimitry Andric   } else {
741*972a253aSDimitry Andric     DAG.Pressure[RegionIdx] = PressureAfter;
742*972a253aSDimitry Andric     DAG.RegionsWithMinOcc[RegionIdx] =
743*972a253aSDimitry Andric         PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
744*972a253aSDimitry Andric   }
745*972a253aSDimitry Andric }
746*972a253aSDimitry Andric 
747*972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
748*972a253aSDimitry Andric   if (WavesAfter < DAG.MinOccupancy)
749*972a253aSDimitry Andric     return true;
750*972a253aSDimitry Andric 
751*972a253aSDimitry Andric   return false;
752*972a253aSDimitry Andric }
753*972a253aSDimitry Andric 
754*972a253aSDimitry Andric bool InitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
755*972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
756*972a253aSDimitry Andric     return true;
757*972a253aSDimitry Andric 
758*972a253aSDimitry Andric   if (mayCauseSpilling(WavesAfter))
759*972a253aSDimitry Andric     return true;
760*972a253aSDimitry Andric 
761*972a253aSDimitry Andric   assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule);
762*972a253aSDimitry Andric   // Don't reschedule the region in the next stage if it doesn't have clusters.
763*972a253aSDimitry Andric   if (!DAG.RegionsWithClusters[RegionIdx])
764*972a253aSDimitry Andric     DAG.RescheduleRegions[RegionIdx] = false;
765*972a253aSDimitry Andric 
766*972a253aSDimitry Andric   return false;
767*972a253aSDimitry Andric }
768*972a253aSDimitry Andric 
769*972a253aSDimitry Andric bool UnclusteredRescheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
770*972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
771*972a253aSDimitry Andric     return true;
772*972a253aSDimitry Andric 
773*972a253aSDimitry Andric   // If RP is not reduced in the unclustred reschedule stage, revert to the old
774*972a253aSDimitry Andric   // schedule.
775*972a253aSDimitry Andric   if (!PressureAfter.less(ST, PressureBefore)) {
776*972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
777*972a253aSDimitry Andric     return true;
778*972a253aSDimitry Andric   }
779*972a253aSDimitry Andric 
780*972a253aSDimitry Andric   return false;
781*972a253aSDimitry Andric }
782*972a253aSDimitry Andric 
783*972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
784*972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
785*972a253aSDimitry Andric     return true;
786*972a253aSDimitry Andric 
787*972a253aSDimitry Andric   if (mayCauseSpilling(WavesAfter))
788*972a253aSDimitry Andric     return true;
789*972a253aSDimitry Andric 
790*972a253aSDimitry Andric   return false;
791*972a253aSDimitry Andric }
792*972a253aSDimitry Andric 
793*972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
794*972a253aSDimitry Andric   if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
795*972a253aSDimitry Andric     return true;
796*972a253aSDimitry Andric 
797*972a253aSDimitry Andric   if (mayCauseSpilling(WavesAfter))
798*972a253aSDimitry Andric     return true;
799*972a253aSDimitry Andric 
800*972a253aSDimitry Andric   return false;
801*972a253aSDimitry Andric }
802*972a253aSDimitry Andric 
803*972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
804*972a253aSDimitry Andric   if (WavesAfter <= MFI.getMinWavesPerEU() &&
805*972a253aSDimitry Andric       !PressureAfter.less(ST, PressureBefore) &&
806*972a253aSDimitry Andric       DAG.RescheduleRegions[RegionIdx]) {
807*972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
808*972a253aSDimitry Andric     return true;
809*972a253aSDimitry Andric   }
810*972a253aSDimitry Andric 
811*972a253aSDimitry Andric   return false;
812*972a253aSDimitry Andric }
813*972a253aSDimitry Andric 
814*972a253aSDimitry Andric void GCNSchedStage::revertScheduling() {
815*972a253aSDimitry Andric   DAG.RegionsWithMinOcc[RegionIdx] =
816*972a253aSDimitry Andric       PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
817*972a253aSDimitry Andric   LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
818*972a253aSDimitry Andric   DAG.RescheduleRegions[RegionIdx] =
819*972a253aSDimitry Andric       DAG.RegionsWithClusters[RegionIdx] ||
820*972a253aSDimitry Andric       (nextStage(StageID)) != GCNSchedStageID::UnclusteredReschedule;
821*972a253aSDimitry Andric   DAG.RegionEnd = DAG.RegionBegin;
822*972a253aSDimitry Andric   int SkippedDebugInstr = 0;
823*972a253aSDimitry Andric   for (MachineInstr *MI : Unsched) {
824*972a253aSDimitry Andric     if (MI->isDebugInstr()) {
825*972a253aSDimitry Andric       ++SkippedDebugInstr;
826*972a253aSDimitry Andric       continue;
827*972a253aSDimitry Andric     }
828*972a253aSDimitry Andric 
829*972a253aSDimitry Andric     if (MI->getIterator() != DAG.RegionEnd) {
830*972a253aSDimitry Andric       DAG.BB->remove(MI);
831*972a253aSDimitry Andric       DAG.BB->insert(DAG.RegionEnd, MI);
832*972a253aSDimitry Andric       if (!MI->isDebugInstr())
833*972a253aSDimitry Andric         DAG.LIS->handleMove(*MI, true);
834*972a253aSDimitry Andric     }
835*972a253aSDimitry Andric 
836*972a253aSDimitry Andric     // Reset read-undef flags and update them later.
837*972a253aSDimitry Andric     for (auto &Op : MI->operands())
838*972a253aSDimitry Andric       if (Op.isReg() && Op.isDef())
839*972a253aSDimitry Andric         Op.setIsUndef(false);
840*972a253aSDimitry Andric     RegisterOperands RegOpers;
841*972a253aSDimitry Andric     RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
842*972a253aSDimitry Andric     if (!MI->isDebugInstr()) {
843*972a253aSDimitry Andric       if (DAG.ShouldTrackLaneMasks) {
844*972a253aSDimitry Andric         // Adjust liveness and add missing dead+read-undef flags.
845*972a253aSDimitry Andric         SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
846*972a253aSDimitry Andric         RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
847*972a253aSDimitry Andric       } else {
848*972a253aSDimitry Andric         // Adjust for missing dead-def flags.
849*972a253aSDimitry Andric         RegOpers.detectDeadDefs(*MI, *DAG.LIS);
850*972a253aSDimitry Andric       }
851*972a253aSDimitry Andric     }
852*972a253aSDimitry Andric     DAG.RegionEnd = MI->getIterator();
853*972a253aSDimitry Andric     ++DAG.RegionEnd;
854*972a253aSDimitry Andric     LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
855*972a253aSDimitry Andric   }
856*972a253aSDimitry Andric 
857*972a253aSDimitry Andric   // After reverting schedule, debug instrs will now be at the end of the block
858*972a253aSDimitry Andric   // and RegionEnd will point to the first debug instr. Increment RegionEnd
859*972a253aSDimitry Andric   // pass debug instrs to the actual end of the scheduling region.
860*972a253aSDimitry Andric   while (SkippedDebugInstr-- > 0)
861*972a253aSDimitry Andric     ++DAG.RegionEnd;
862*972a253aSDimitry Andric 
863*972a253aSDimitry Andric   // If Unsched.front() instruction is a debug instruction, this will actually
864*972a253aSDimitry Andric   // shrink the region since we moved all debug instructions to the end of the
865*972a253aSDimitry Andric   // block. Find the first instruction that is not a debug instruction.
866*972a253aSDimitry Andric   DAG.RegionBegin = Unsched.front()->getIterator();
867*972a253aSDimitry Andric   if (DAG.RegionBegin->isDebugInstr()) {
868*972a253aSDimitry Andric     for (MachineInstr *MI : Unsched) {
869*972a253aSDimitry Andric       if (MI->isDebugInstr())
870*972a253aSDimitry Andric         continue;
871*972a253aSDimitry Andric       DAG.RegionBegin = MI->getIterator();
872*972a253aSDimitry Andric       break;
873*972a253aSDimitry Andric     }
874*972a253aSDimitry Andric   }
875*972a253aSDimitry Andric 
876*972a253aSDimitry Andric   // Then move the debug instructions back into their correct place and set
877*972a253aSDimitry Andric   // RegionBegin and RegionEnd if needed.
878*972a253aSDimitry Andric   DAG.placeDebugValues();
879*972a253aSDimitry Andric 
880*972a253aSDimitry Andric   DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd);
881*972a253aSDimitry Andric }
882*972a253aSDimitry Andric 
883*972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() {
884*972a253aSDimitry Andric   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
885*972a253aSDimitry Andric   for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
88681ad6265SDimitry Andric     Register Reg = Register::index2VirtReg(I);
887*972a253aSDimitry Andric     if (!DAG.LIS->hasInterval(Reg))
88881ad6265SDimitry Andric       continue;
88981ad6265SDimitry Andric 
89081ad6265SDimitry Andric     // TODO: Handle AGPR and SGPR rematerialization
891*972a253aSDimitry Andric     if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
892*972a253aSDimitry Andric         !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
89381ad6265SDimitry Andric       continue;
89481ad6265SDimitry Andric 
895*972a253aSDimitry Andric     MachineOperand *Op = DAG.MRI.getOneDef(Reg);
89681ad6265SDimitry Andric     MachineInstr *Def = Op->getParent();
897fcaf7f86SDimitry Andric     if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
89881ad6265SDimitry Andric       continue;
89981ad6265SDimitry Andric 
900*972a253aSDimitry Andric     MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
90181ad6265SDimitry Andric     if (Def->getParent() == UseI->getParent())
90281ad6265SDimitry Andric       continue;
90381ad6265SDimitry Andric 
90481ad6265SDimitry Andric     // We are only collecting defs that are defined in another block and are
90581ad6265SDimitry Andric     // live-through or used inside regions at MinOccupancy. This means that the
90681ad6265SDimitry Andric     // register must be in the live-in set for the region.
90781ad6265SDimitry Andric     bool AddedToRematList = false;
908*972a253aSDimitry Andric     for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
909*972a253aSDimitry Andric       auto It = DAG.LiveIns[I].find(Reg);
910*972a253aSDimitry Andric       if (It != DAG.LiveIns[I].end() && !It->second.none()) {
911*972a253aSDimitry Andric         if (DAG.RegionsWithMinOcc[I]) {
91281ad6265SDimitry Andric           RematerializableInsts[I][Def] = UseI;
91381ad6265SDimitry Andric           AddedToRematList = true;
91481ad6265SDimitry Andric         }
91581ad6265SDimitry Andric 
91681ad6265SDimitry Andric         // Collect regions with rematerializable reg as live-in to avoid
91781ad6265SDimitry Andric         // searching later when updating RP.
91881ad6265SDimitry Andric         RematDefToLiveInRegions[Def].push_back(I);
91981ad6265SDimitry Andric       }
92081ad6265SDimitry Andric     }
92181ad6265SDimitry Andric     if (!AddedToRematList)
92281ad6265SDimitry Andric       RematDefToLiveInRegions.erase(Def);
92381ad6265SDimitry Andric   }
92481ad6265SDimitry Andric }
92581ad6265SDimitry Andric 
926*972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
92781ad6265SDimitry Andric                                               const TargetInstrInfo *TII) {
92881ad6265SDimitry Andric   // Temporary copies of cached variables we will be modifying and replacing if
92981ad6265SDimitry Andric   // sinking succeeds.
93081ad6265SDimitry Andric   SmallVector<
93181ad6265SDimitry Andric       std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
93281ad6265SDimitry Andric       NewRegions;
93381ad6265SDimitry Andric   DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
93481ad6265SDimitry Andric   DenseMap<unsigned, GCNRegPressure> NewPressure;
93581ad6265SDimitry Andric   BitVector NewRescheduleRegions;
936*972a253aSDimitry Andric   LiveIntervals *LIS = DAG.LIS;
93781ad6265SDimitry Andric 
938*972a253aSDimitry Andric   NewRegions.resize(DAG.Regions.size());
939*972a253aSDimitry Andric   NewRescheduleRegions.resize(DAG.Regions.size());
94081ad6265SDimitry Andric 
94181ad6265SDimitry Andric   // Collect only regions that has a rematerializable def as a live-in.
94281ad6265SDimitry Andric   SmallSet<unsigned, 16> ImpactedRegions;
94381ad6265SDimitry Andric   for (const auto &It : RematDefToLiveInRegions)
94481ad6265SDimitry Andric     ImpactedRegions.insert(It.second.begin(), It.second.end());
94581ad6265SDimitry Andric 
94681ad6265SDimitry Andric   // Make copies of register pressure and live-ins cache that will be updated
94781ad6265SDimitry Andric   // as we rematerialize.
94881ad6265SDimitry Andric   for (auto Idx : ImpactedRegions) {
949*972a253aSDimitry Andric     NewPressure[Idx] = DAG.Pressure[Idx];
950*972a253aSDimitry Andric     NewLiveIns[Idx] = DAG.LiveIns[Idx];
95181ad6265SDimitry Andric   }
952*972a253aSDimitry Andric   NewRegions = DAG.Regions;
95381ad6265SDimitry Andric   NewRescheduleRegions.reset();
95481ad6265SDimitry Andric 
95581ad6265SDimitry Andric   DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
95681ad6265SDimitry Andric   bool Improved = false;
95781ad6265SDimitry Andric   for (auto I : ImpactedRegions) {
958*972a253aSDimitry Andric     if (!DAG.RegionsWithMinOcc[I])
95981ad6265SDimitry Andric       continue;
96081ad6265SDimitry Andric 
96181ad6265SDimitry Andric     Improved = false;
96281ad6265SDimitry Andric     int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
96381ad6265SDimitry Andric     int SGPRUsage = NewPressure[I].getSGPRNum();
96481ad6265SDimitry Andric 
96581ad6265SDimitry Andric     // TODO: Handle occupancy drop due to AGPR and SGPR.
96681ad6265SDimitry Andric     // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
967*972a253aSDimitry Andric     if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
96881ad6265SDimitry Andric       break;
96981ad6265SDimitry Andric 
97081ad6265SDimitry Andric     // The occupancy of this region could have been improved by a previous
97181ad6265SDimitry Andric     // iteration's sinking of defs.
972*972a253aSDimitry Andric     if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
97381ad6265SDimitry Andric       NewRescheduleRegions[I] = true;
97481ad6265SDimitry Andric       Improved = true;
97581ad6265SDimitry Andric       continue;
97681ad6265SDimitry Andric     }
97781ad6265SDimitry Andric 
97881ad6265SDimitry Andric     // First check if we have enough trivially rematerializable instructions to
97981ad6265SDimitry Andric     // improve occupancy. Optimistically assume all instructions we are able to
98081ad6265SDimitry Andric     // sink decreased RP.
98181ad6265SDimitry Andric     int TotalSinkableRegs = 0;
98281ad6265SDimitry Andric     for (const auto &It : RematerializableInsts[I]) {
98381ad6265SDimitry Andric       MachineInstr *Def = It.first;
98481ad6265SDimitry Andric       Register DefReg = Def->getOperand(0).getReg();
98581ad6265SDimitry Andric       TotalSinkableRegs +=
98681ad6265SDimitry Andric           SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
98781ad6265SDimitry Andric     }
98881ad6265SDimitry Andric     int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
98981ad6265SDimitry Andric     unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
99081ad6265SDimitry Andric     // If in the most optimistic scenario, we cannot improve occupancy, then do
99181ad6265SDimitry Andric     // not attempt to sink any instructions.
992*972a253aSDimitry Andric     if (OptimisticOccupancy <= DAG.MinOccupancy)
99381ad6265SDimitry Andric       break;
99481ad6265SDimitry Andric 
99581ad6265SDimitry Andric     unsigned ImproveOccupancy = 0;
99681ad6265SDimitry Andric     SmallVector<MachineInstr *, 4> SinkedDefs;
99781ad6265SDimitry Andric     for (auto &It : RematerializableInsts[I]) {
99881ad6265SDimitry Andric       MachineInstr *Def = It.first;
99981ad6265SDimitry Andric       MachineBasicBlock::iterator InsertPos =
100081ad6265SDimitry Andric           MachineBasicBlock::iterator(It.second);
100181ad6265SDimitry Andric       Register Reg = Def->getOperand(0).getReg();
100281ad6265SDimitry Andric       // Rematerialize MI to its use block. Since we are only rematerializing
100381ad6265SDimitry Andric       // instructions that do not have any virtual reg uses, we do not need to
100481ad6265SDimitry Andric       // call LiveRangeEdit::allUsesAvailableAt() and
100581ad6265SDimitry Andric       // LiveRangeEdit::canRematerializeAt().
100681ad6265SDimitry Andric       TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1007*972a253aSDimitry Andric                          Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
100881ad6265SDimitry Andric       MachineInstr *NewMI = &*(--InsertPos);
100981ad6265SDimitry Andric       LIS->InsertMachineInstrInMaps(*NewMI);
101081ad6265SDimitry Andric       LIS->removeInterval(Reg);
101181ad6265SDimitry Andric       LIS->createAndComputeVirtRegInterval(Reg);
101281ad6265SDimitry Andric       InsertedMIToOldDef[NewMI] = Def;
101381ad6265SDimitry Andric 
101481ad6265SDimitry Andric       // Update region boundaries in scheduling region we sinked from since we
101581ad6265SDimitry Andric       // may sink an instruction that was at the beginning or end of its region
1016*972a253aSDimitry Andric       DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
101781ad6265SDimitry Andric                                  /*Removing =*/true);
101881ad6265SDimitry Andric 
101981ad6265SDimitry Andric       // Update region boundaries in region we sinked to.
1020*972a253aSDimitry Andric       DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
102181ad6265SDimitry Andric 
102281ad6265SDimitry Andric       LaneBitmask PrevMask = NewLiveIns[I][Reg];
102381ad6265SDimitry Andric       // FIXME: Also update cached pressure for where the def was sinked from.
102481ad6265SDimitry Andric       // Update RP for all regions that has this reg as a live-in and remove
102581ad6265SDimitry Andric       // the reg from all regions as a live-in.
102681ad6265SDimitry Andric       for (auto Idx : RematDefToLiveInRegions[Def]) {
102781ad6265SDimitry Andric         NewLiveIns[Idx].erase(Reg);
1028*972a253aSDimitry Andric         if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
102981ad6265SDimitry Andric           // Def is live-through and not used in this block.
1030*972a253aSDimitry Andric           NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
103181ad6265SDimitry Andric         } else {
103281ad6265SDimitry Andric           // Def is used and rematerialized into this block.
103381ad6265SDimitry Andric           GCNDownwardRPTracker RPT(*LIS);
103481ad6265SDimitry Andric           auto *NonDbgMI = &*skipDebugInstructionsForward(
103581ad6265SDimitry Andric               NewRegions[Idx].first, NewRegions[Idx].second);
103681ad6265SDimitry Andric           RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
103781ad6265SDimitry Andric           RPT.advance(NewRegions[Idx].second);
103881ad6265SDimitry Andric           NewPressure[Idx] = RPT.moveMaxPressure();
103981ad6265SDimitry Andric         }
104081ad6265SDimitry Andric       }
104181ad6265SDimitry Andric 
104281ad6265SDimitry Andric       SinkedDefs.push_back(Def);
104381ad6265SDimitry Andric       ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1044*972a253aSDimitry Andric       if (ImproveOccupancy > DAG.MinOccupancy)
104581ad6265SDimitry Andric         break;
104681ad6265SDimitry Andric     }
104781ad6265SDimitry Andric 
104881ad6265SDimitry Andric     // Remove defs we just sinked from all regions' list of sinkable defs
104981ad6265SDimitry Andric     for (auto &Def : SinkedDefs)
105081ad6265SDimitry Andric       for (auto TrackedIdx : RematDefToLiveInRegions[Def])
105181ad6265SDimitry Andric         RematerializableInsts[TrackedIdx].erase(Def);
105281ad6265SDimitry Andric 
1053*972a253aSDimitry Andric     if (ImproveOccupancy <= DAG.MinOccupancy)
105481ad6265SDimitry Andric       break;
105581ad6265SDimitry Andric 
105681ad6265SDimitry Andric     NewRescheduleRegions[I] = true;
105781ad6265SDimitry Andric     Improved = true;
105881ad6265SDimitry Andric   }
105981ad6265SDimitry Andric 
106081ad6265SDimitry Andric   if (!Improved) {
106181ad6265SDimitry Andric     // Occupancy was not improved for all regions that were at MinOccupancy.
106281ad6265SDimitry Andric     // Undo sinking and remove newly rematerialized instructions.
106381ad6265SDimitry Andric     for (auto &Entry : InsertedMIToOldDef) {
106481ad6265SDimitry Andric       MachineInstr *MI = Entry.first;
106581ad6265SDimitry Andric       MachineInstr *OldMI = Entry.second;
106681ad6265SDimitry Andric       Register Reg = MI->getOperand(0).getReg();
106781ad6265SDimitry Andric       LIS->RemoveMachineInstrFromMaps(*MI);
106881ad6265SDimitry Andric       MI->eraseFromParent();
106981ad6265SDimitry Andric       OldMI->clearRegisterDeads(Reg);
107081ad6265SDimitry Andric       LIS->removeInterval(Reg);
107181ad6265SDimitry Andric       LIS->createAndComputeVirtRegInterval(Reg);
107281ad6265SDimitry Andric     }
107381ad6265SDimitry Andric     return false;
107481ad6265SDimitry Andric   }
107581ad6265SDimitry Andric 
107681ad6265SDimitry Andric   // Occupancy was improved for all regions.
107781ad6265SDimitry Andric   for (auto &Entry : InsertedMIToOldDef) {
107881ad6265SDimitry Andric     MachineInstr *MI = Entry.first;
107981ad6265SDimitry Andric     MachineInstr *OldMI = Entry.second;
108081ad6265SDimitry Andric 
108181ad6265SDimitry Andric     // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1082*972a253aSDimitry Andric     DAG.BBLiveInMap.erase(OldMI);
108381ad6265SDimitry Andric 
108481ad6265SDimitry Andric     // Remove OldMI and update LIS
108581ad6265SDimitry Andric     Register Reg = MI->getOperand(0).getReg();
108681ad6265SDimitry Andric     LIS->RemoveMachineInstrFromMaps(*OldMI);
108781ad6265SDimitry Andric     OldMI->eraseFromParent();
108881ad6265SDimitry Andric     LIS->removeInterval(Reg);
108981ad6265SDimitry Andric     LIS->createAndComputeVirtRegInterval(Reg);
109081ad6265SDimitry Andric   }
109181ad6265SDimitry Andric 
109281ad6265SDimitry Andric   // Update live-ins, register pressure, and regions caches.
109381ad6265SDimitry Andric   for (auto Idx : ImpactedRegions) {
1094*972a253aSDimitry Andric     DAG.LiveIns[Idx] = NewLiveIns[Idx];
1095*972a253aSDimitry Andric     DAG.Pressure[Idx] = NewPressure[Idx];
1096*972a253aSDimitry Andric     DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
109781ad6265SDimitry Andric   }
1098*972a253aSDimitry Andric   DAG.Regions = NewRegions;
1099*972a253aSDimitry Andric   DAG.RescheduleRegions = NewRescheduleRegions;
110081ad6265SDimitry Andric 
110181ad6265SDimitry Andric   SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1102*972a253aSDimitry Andric   MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
110381ad6265SDimitry Andric 
110481ad6265SDimitry Andric   return true;
110581ad6265SDimitry Andric }
110681ad6265SDimitry Andric 
110781ad6265SDimitry Andric // Copied from MachineLICM
1108*972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1109*972a253aSDimitry Andric   if (!DAG.TII->isTriviallyReMaterializable(MI))
111081ad6265SDimitry Andric     return false;
111181ad6265SDimitry Andric 
111281ad6265SDimitry Andric   for (const MachineOperand &MO : MI.operands())
111381ad6265SDimitry Andric     if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
111481ad6265SDimitry Andric       return false;
111581ad6265SDimitry Andric 
111681ad6265SDimitry Andric   return true;
111781ad6265SDimitry Andric }
111881ad6265SDimitry Andric 
111981ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region.
112081ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front
112181ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only
112281ad6265SDimitry Andric // ever be inserting before an already existing MI.
112381ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries(
112481ad6265SDimitry Andric     SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
112581ad6265SDimitry Andric                               MachineBasicBlock::iterator>> &RegionBoundaries,
112681ad6265SDimitry Andric     MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
112781ad6265SDimitry Andric   unsigned I = 0, E = RegionBoundaries.size();
112881ad6265SDimitry Andric   // Search for first region of the block where MI is located
112981ad6265SDimitry Andric   while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
113081ad6265SDimitry Andric     ++I;
113181ad6265SDimitry Andric 
113281ad6265SDimitry Andric   for (; I != E; ++I) {
113381ad6265SDimitry Andric     if (MI->getParent() != RegionBoundaries[I].first->getParent())
113481ad6265SDimitry Andric       return;
113581ad6265SDimitry Andric 
113681ad6265SDimitry Andric     if (Removing && MI == RegionBoundaries[I].first &&
113781ad6265SDimitry Andric         MI == RegionBoundaries[I].second) {
113881ad6265SDimitry Andric       // MI is in a region with size 1, after removing, the region will be
113981ad6265SDimitry Andric       // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
114081ad6265SDimitry Andric       RegionBoundaries[I] =
114181ad6265SDimitry Andric           std::make_pair(MI->getParent()->end(), MI->getParent()->end());
114281ad6265SDimitry Andric       return;
114381ad6265SDimitry Andric     }
114481ad6265SDimitry Andric     if (MI == RegionBoundaries[I].first) {
114581ad6265SDimitry Andric       if (Removing)
114681ad6265SDimitry Andric         RegionBoundaries[I] =
114781ad6265SDimitry Andric             std::make_pair(std::next(MI), RegionBoundaries[I].second);
114881ad6265SDimitry Andric       else
114981ad6265SDimitry Andric         // Inserted NewMI in front of region, set new RegionBegin to NewMI
115081ad6265SDimitry Andric         RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI),
115181ad6265SDimitry Andric                                              RegionBoundaries[I].second);
115281ad6265SDimitry Andric       return;
115381ad6265SDimitry Andric     }
115481ad6265SDimitry Andric     if (Removing && MI == RegionBoundaries[I].second) {
115581ad6265SDimitry Andric       RegionBoundaries[I] =
115681ad6265SDimitry Andric           std::make_pair(RegionBoundaries[I].first, std::prev(MI));
115781ad6265SDimitry Andric       return;
115881ad6265SDimitry Andric     }
115981ad6265SDimitry Andric   }
116081ad6265SDimitry Andric }
1161