xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric /// \file
10*0b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave
11*0b57cec5SDimitry Andric /// occupancy on GCN hardware.
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #include "GCNSchedStrategy.h"
15*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
16*0b57cec5SDimitry Andric #include "SIInstrInfo.h"
17*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
18*0b57cec5SDimitry Andric #include "SIRegisterInfo.h"
19*0b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
20*0b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
21*0b57cec5SDimitry Andric 
22*0b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
23*0b57cec5SDimitry Andric 
24*0b57cec5SDimitry Andric using namespace llvm;
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
27*0b57cec5SDimitry Andric     const MachineSchedContext *C) :
28*0b57cec5SDimitry Andric     GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
29*0b57cec5SDimitry Andric 
30*0b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
31*0b57cec5SDimitry Andric   GenericScheduler::initialize(DAG);
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
34*0b57cec5SDimitry Andric 
35*0b57cec5SDimitry Andric   MF = &DAG->MF;
36*0b57cec5SDimitry Andric 
37*0b57cec5SDimitry Andric   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
38*0b57cec5SDimitry Andric 
39*0b57cec5SDimitry Andric   // FIXME: This is also necessary, because some passes that run after
40*0b57cec5SDimitry Andric   // scheduling and before regalloc increase register pressure.
41*0b57cec5SDimitry Andric   const int ErrorMargin = 3;
42*0b57cec5SDimitry Andric 
43*0b57cec5SDimitry Andric   SGPRExcessLimit = Context->RegClassInfo
44*0b57cec5SDimitry Andric     ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
45*0b57cec5SDimitry Andric   VGPRExcessLimit = Context->RegClassInfo
46*0b57cec5SDimitry Andric     ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
47*0b57cec5SDimitry Andric   if (TargetOccupancy) {
48*0b57cec5SDimitry Andric     SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
49*0b57cec5SDimitry Andric     VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
50*0b57cec5SDimitry Andric   } else {
51*0b57cec5SDimitry Andric     SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
52*0b57cec5SDimitry Andric                                                     SRI->getSGPRPressureSet());
53*0b57cec5SDimitry Andric     VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
54*0b57cec5SDimitry Andric                                                     SRI->getVGPRPressureSet());
55*0b57cec5SDimitry Andric   }
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric   SGPRCriticalLimit -= ErrorMargin;
58*0b57cec5SDimitry Andric   VGPRCriticalLimit -= ErrorMargin;
59*0b57cec5SDimitry Andric }
60*0b57cec5SDimitry Andric 
61*0b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
62*0b57cec5SDimitry Andric                                      bool AtTop, const RegPressureTracker &RPTracker,
63*0b57cec5SDimitry Andric                                      const SIRegisterInfo *SRI,
64*0b57cec5SDimitry Andric                                      unsigned SGPRPressure,
65*0b57cec5SDimitry Andric                                      unsigned VGPRPressure) {
66*0b57cec5SDimitry Andric 
67*0b57cec5SDimitry Andric   Cand.SU = SU;
68*0b57cec5SDimitry Andric   Cand.AtTop = AtTop;
69*0b57cec5SDimitry Andric 
70*0b57cec5SDimitry Andric   // getDownwardPressure() and getUpwardPressure() make temporary changes to
71*0b57cec5SDimitry Andric   // the tracker, so we need to pass those function a non-const copy.
72*0b57cec5SDimitry Andric   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
73*0b57cec5SDimitry Andric 
74*0b57cec5SDimitry Andric   std::vector<unsigned> Pressure;
75*0b57cec5SDimitry Andric   std::vector<unsigned> MaxPressure;
76*0b57cec5SDimitry Andric 
77*0b57cec5SDimitry Andric   if (AtTop)
78*0b57cec5SDimitry Andric     TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
79*0b57cec5SDimitry Andric   else {
80*0b57cec5SDimitry Andric     // FIXME: I think for bottom up scheduling, the register pressure is cached
81*0b57cec5SDimitry Andric     // and can be retrieved by DAG->getPressureDif(SU).
82*0b57cec5SDimitry Andric     TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
83*0b57cec5SDimitry Andric   }
84*0b57cec5SDimitry Andric 
85*0b57cec5SDimitry Andric   unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
86*0b57cec5SDimitry Andric   unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
87*0b57cec5SDimitry Andric 
88*0b57cec5SDimitry Andric   // If two instructions increase the pressure of different register sets
89*0b57cec5SDimitry Andric   // by the same amount, the generic scheduler will prefer to schedule the
90*0b57cec5SDimitry Andric   // instruction that increases the set with the least amount of registers,
91*0b57cec5SDimitry Andric   // which in our case would be SGPRs.  This is rarely what we want, so
92*0b57cec5SDimitry Andric   // when we report excess/critical register pressure, we do it either
93*0b57cec5SDimitry Andric   // only for VGPRs or only for SGPRs.
94*0b57cec5SDimitry Andric 
95*0b57cec5SDimitry Andric   // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
96*0b57cec5SDimitry Andric   const unsigned MaxVGPRPressureInc = 16;
97*0b57cec5SDimitry Andric   bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
98*0b57cec5SDimitry Andric   bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
99*0b57cec5SDimitry Andric 
100*0b57cec5SDimitry Andric 
101*0b57cec5SDimitry Andric   // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
102*0b57cec5SDimitry Andric   // to increase the likelihood we don't go over the limits.  We should improve
103*0b57cec5SDimitry Andric   // the analysis to look through dependencies to find the path with the least
104*0b57cec5SDimitry Andric   // register pressure.
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric   // We only need to update the RPDelata for instructions that increase
107*0b57cec5SDimitry Andric   // register pressure.  Instructions that decrease or keep reg pressure
108*0b57cec5SDimitry Andric   // the same will be marked as RegExcess in tryCandidate() when they
109*0b57cec5SDimitry Andric   // are compared with instructions that increase the register pressure.
110*0b57cec5SDimitry Andric   if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
111*0b57cec5SDimitry Andric     Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
112*0b57cec5SDimitry Andric     Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
113*0b57cec5SDimitry Andric   }
114*0b57cec5SDimitry Andric 
115*0b57cec5SDimitry Andric   if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
116*0b57cec5SDimitry Andric     Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
117*0b57cec5SDimitry Andric     Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
118*0b57cec5SDimitry Andric   }
119*0b57cec5SDimitry Andric 
120*0b57cec5SDimitry Andric   // Register pressure is considered 'CRITICAL' if it is approaching a value
121*0b57cec5SDimitry Andric   // that would reduce the wave occupancy for the execution unit.  When
122*0b57cec5SDimitry Andric   // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
123*0b57cec5SDimitry Andric   // has the same cost, so we don't need to prefer one over the other.
124*0b57cec5SDimitry Andric 
125*0b57cec5SDimitry Andric   int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
126*0b57cec5SDimitry Andric   int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
127*0b57cec5SDimitry Andric 
128*0b57cec5SDimitry Andric   if (SGPRDelta >= 0 || VGPRDelta >= 0) {
129*0b57cec5SDimitry Andric     if (SGPRDelta > VGPRDelta) {
130*0b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
131*0b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
132*0b57cec5SDimitry Andric     } else {
133*0b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
134*0b57cec5SDimitry Andric       Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
135*0b57cec5SDimitry Andric     }
136*0b57cec5SDimitry Andric   }
137*0b57cec5SDimitry Andric }
138*0b57cec5SDimitry Andric 
139*0b57cec5SDimitry Andric // This function is mostly cut and pasted from
140*0b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue()
141*0b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
142*0b57cec5SDimitry Andric                                          const CandPolicy &ZonePolicy,
143*0b57cec5SDimitry Andric                                          const RegPressureTracker &RPTracker,
144*0b57cec5SDimitry Andric                                          SchedCandidate &Cand) {
145*0b57cec5SDimitry Andric   const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
146*0b57cec5SDimitry Andric   ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
147*0b57cec5SDimitry Andric   unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
148*0b57cec5SDimitry Andric   unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
149*0b57cec5SDimitry Andric   ReadyQueue &Q = Zone.Available;
150*0b57cec5SDimitry Andric   for (SUnit *SU : Q) {
151*0b57cec5SDimitry Andric 
152*0b57cec5SDimitry Andric     SchedCandidate TryCand(ZonePolicy);
153*0b57cec5SDimitry Andric     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
154*0b57cec5SDimitry Andric                   SGPRPressure, VGPRPressure);
155*0b57cec5SDimitry Andric     // Pass SchedBoundary only when comparing nodes from the same boundary.
156*0b57cec5SDimitry Andric     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
157*0b57cec5SDimitry Andric     GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
158*0b57cec5SDimitry Andric     if (TryCand.Reason != NoCand) {
159*0b57cec5SDimitry Andric       // Initialize resource delta if needed in case future heuristics query it.
160*0b57cec5SDimitry Andric       if (TryCand.ResDelta == SchedResourceDelta())
161*0b57cec5SDimitry Andric         TryCand.initResourceDelta(Zone.DAG, SchedModel);
162*0b57cec5SDimitry Andric       Cand.setBest(TryCand);
163*0b57cec5SDimitry Andric     }
164*0b57cec5SDimitry Andric   }
165*0b57cec5SDimitry Andric }
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric // This function is mostly cut and pasted from
168*0b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional()
169*0b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
170*0b57cec5SDimitry Andric   // Schedule as far as possible in the direction of no choice. This is most
171*0b57cec5SDimitry Andric   // efficient, but also provides the best heuristics for CriticalPSets.
172*0b57cec5SDimitry Andric   if (SUnit *SU = Bot.pickOnlyChoice()) {
173*0b57cec5SDimitry Andric     IsTopNode = false;
174*0b57cec5SDimitry Andric     return SU;
175*0b57cec5SDimitry Andric   }
176*0b57cec5SDimitry Andric   if (SUnit *SU = Top.pickOnlyChoice()) {
177*0b57cec5SDimitry Andric     IsTopNode = true;
178*0b57cec5SDimitry Andric     return SU;
179*0b57cec5SDimitry Andric   }
180*0b57cec5SDimitry Andric   // Set the bottom-up policy based on the state of the current bottom zone and
181*0b57cec5SDimitry Andric   // the instructions outside the zone, including the top zone.
182*0b57cec5SDimitry Andric   CandPolicy BotPolicy;
183*0b57cec5SDimitry Andric   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
184*0b57cec5SDimitry Andric   // Set the top-down policy based on the state of the current top zone and
185*0b57cec5SDimitry Andric   // the instructions outside the zone, including the bottom zone.
186*0b57cec5SDimitry Andric   CandPolicy TopPolicy;
187*0b57cec5SDimitry Andric   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
188*0b57cec5SDimitry Andric 
189*0b57cec5SDimitry Andric   // See if BotCand is still valid (because we previously scheduled from Top).
190*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
191*0b57cec5SDimitry Andric   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
192*0b57cec5SDimitry Andric       BotCand.Policy != BotPolicy) {
193*0b57cec5SDimitry Andric     BotCand.reset(CandPolicy());
194*0b57cec5SDimitry Andric     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
195*0b57cec5SDimitry Andric     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
196*0b57cec5SDimitry Andric   } else {
197*0b57cec5SDimitry Andric     LLVM_DEBUG(traceCandidate(BotCand));
198*0b57cec5SDimitry Andric   }
199*0b57cec5SDimitry Andric 
200*0b57cec5SDimitry Andric   // Check if the top Q has a better candidate.
201*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
202*0b57cec5SDimitry Andric   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
203*0b57cec5SDimitry Andric       TopCand.Policy != TopPolicy) {
204*0b57cec5SDimitry Andric     TopCand.reset(CandPolicy());
205*0b57cec5SDimitry Andric     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
206*0b57cec5SDimitry Andric     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
207*0b57cec5SDimitry Andric   } else {
208*0b57cec5SDimitry Andric     LLVM_DEBUG(traceCandidate(TopCand));
209*0b57cec5SDimitry Andric   }
210*0b57cec5SDimitry Andric 
211*0b57cec5SDimitry Andric   // Pick best from BotCand and TopCand.
212*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
213*0b57cec5SDimitry Andric              dbgs() << "Bot Cand: "; traceCandidate(BotCand););
214*0b57cec5SDimitry Andric   SchedCandidate Cand;
215*0b57cec5SDimitry Andric   if (TopCand.Reason == BotCand.Reason) {
216*0b57cec5SDimitry Andric     Cand = BotCand;
217*0b57cec5SDimitry Andric     GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
218*0b57cec5SDimitry Andric     TopCand.Reason = NoCand;
219*0b57cec5SDimitry Andric     GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
220*0b57cec5SDimitry Andric     if (TopCand.Reason != NoCand) {
221*0b57cec5SDimitry Andric       Cand.setBest(TopCand);
222*0b57cec5SDimitry Andric     } else {
223*0b57cec5SDimitry Andric       TopCand.Reason = TopReason;
224*0b57cec5SDimitry Andric     }
225*0b57cec5SDimitry Andric   } else {
226*0b57cec5SDimitry Andric     if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
227*0b57cec5SDimitry Andric       Cand = TopCand;
228*0b57cec5SDimitry Andric     } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
229*0b57cec5SDimitry Andric       Cand = BotCand;
230*0b57cec5SDimitry Andric     } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
231*0b57cec5SDimitry Andric       Cand = TopCand;
232*0b57cec5SDimitry Andric     } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
233*0b57cec5SDimitry Andric       Cand = BotCand;
234*0b57cec5SDimitry Andric     } else {
235*0b57cec5SDimitry Andric       if (BotCand.Reason > TopCand.Reason) {
236*0b57cec5SDimitry Andric         Cand = TopCand;
237*0b57cec5SDimitry Andric       } else {
238*0b57cec5SDimitry Andric         Cand = BotCand;
239*0b57cec5SDimitry Andric       }
240*0b57cec5SDimitry Andric     }
241*0b57cec5SDimitry Andric   }
242*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
243*0b57cec5SDimitry Andric 
244*0b57cec5SDimitry Andric   IsTopNode = Cand.AtTop;
245*0b57cec5SDimitry Andric   return Cand.SU;
246*0b57cec5SDimitry Andric }
247*0b57cec5SDimitry Andric 
248*0b57cec5SDimitry Andric // This function is mostly cut and pasted from
249*0b57cec5SDimitry Andric // GenericScheduler::pickNode()
250*0b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
251*0b57cec5SDimitry Andric   if (DAG->top() == DAG->bottom()) {
252*0b57cec5SDimitry Andric     assert(Top.Available.empty() && Top.Pending.empty() &&
253*0b57cec5SDimitry Andric            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
254*0b57cec5SDimitry Andric     return nullptr;
255*0b57cec5SDimitry Andric   }
256*0b57cec5SDimitry Andric   SUnit *SU;
257*0b57cec5SDimitry Andric   do {
258*0b57cec5SDimitry Andric     if (RegionPolicy.OnlyTopDown) {
259*0b57cec5SDimitry Andric       SU = Top.pickOnlyChoice();
260*0b57cec5SDimitry Andric       if (!SU) {
261*0b57cec5SDimitry Andric         CandPolicy NoPolicy;
262*0b57cec5SDimitry Andric         TopCand.reset(NoPolicy);
263*0b57cec5SDimitry Andric         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
264*0b57cec5SDimitry Andric         assert(TopCand.Reason != NoCand && "failed to find a candidate");
265*0b57cec5SDimitry Andric         SU = TopCand.SU;
266*0b57cec5SDimitry Andric       }
267*0b57cec5SDimitry Andric       IsTopNode = true;
268*0b57cec5SDimitry Andric     } else if (RegionPolicy.OnlyBottomUp) {
269*0b57cec5SDimitry Andric       SU = Bot.pickOnlyChoice();
270*0b57cec5SDimitry Andric       if (!SU) {
271*0b57cec5SDimitry Andric         CandPolicy NoPolicy;
272*0b57cec5SDimitry Andric         BotCand.reset(NoPolicy);
273*0b57cec5SDimitry Andric         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
274*0b57cec5SDimitry Andric         assert(BotCand.Reason != NoCand && "failed to find a candidate");
275*0b57cec5SDimitry Andric         SU = BotCand.SU;
276*0b57cec5SDimitry Andric       }
277*0b57cec5SDimitry Andric       IsTopNode = false;
278*0b57cec5SDimitry Andric     } else {
279*0b57cec5SDimitry Andric       SU = pickNodeBidirectional(IsTopNode);
280*0b57cec5SDimitry Andric     }
281*0b57cec5SDimitry Andric   } while (SU->isScheduled);
282*0b57cec5SDimitry Andric 
283*0b57cec5SDimitry Andric   if (SU->isTopReady())
284*0b57cec5SDimitry Andric     Top.removeReady(SU);
285*0b57cec5SDimitry Andric   if (SU->isBottomReady())
286*0b57cec5SDimitry Andric     Bot.removeReady(SU);
287*0b57cec5SDimitry Andric 
288*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
289*0b57cec5SDimitry Andric                     << *SU->getInstr());
290*0b57cec5SDimitry Andric   return SU;
291*0b57cec5SDimitry Andric }
292*0b57cec5SDimitry Andric 
293*0b57cec5SDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
294*0b57cec5SDimitry Andric                         std::unique_ptr<MachineSchedStrategy> S) :
295*0b57cec5SDimitry Andric   ScheduleDAGMILive(C, std::move(S)),
296*0b57cec5SDimitry Andric   ST(MF.getSubtarget<GCNSubtarget>()),
297*0b57cec5SDimitry Andric   MFI(*MF.getInfo<SIMachineFunctionInfo>()),
298*0b57cec5SDimitry Andric   StartingOccupancy(MFI.getOccupancy()),
299*0b57cec5SDimitry Andric   MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
300*0b57cec5SDimitry Andric 
301*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
302*0b57cec5SDimitry Andric }
303*0b57cec5SDimitry Andric 
304*0b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() {
305*0b57cec5SDimitry Andric   if (Stage == 0) {
306*0b57cec5SDimitry Andric     // Just record regions at the first pass.
307*0b57cec5SDimitry Andric     Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
308*0b57cec5SDimitry Andric     return;
309*0b57cec5SDimitry Andric   }
310*0b57cec5SDimitry Andric 
311*0b57cec5SDimitry Andric   std::vector<MachineInstr*> Unsched;
312*0b57cec5SDimitry Andric   Unsched.reserve(NumRegionInstrs);
313*0b57cec5SDimitry Andric   for (auto &I : *this) {
314*0b57cec5SDimitry Andric     Unsched.push_back(&I);
315*0b57cec5SDimitry Andric   }
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric   GCNRegPressure PressureBefore;
318*0b57cec5SDimitry Andric   if (LIS) {
319*0b57cec5SDimitry Andric     PressureBefore = Pressure[RegionIdx];
320*0b57cec5SDimitry Andric 
321*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
322*0b57cec5SDimitry Andric                GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
323*0b57cec5SDimitry Andric                dbgs() << "Region live-in pressure:  ";
324*0b57cec5SDimitry Andric                llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
325*0b57cec5SDimitry Andric                dbgs() << "Region register pressure: ";
326*0b57cec5SDimitry Andric                PressureBefore.print(dbgs()));
327*0b57cec5SDimitry Andric   }
328*0b57cec5SDimitry Andric 
329*0b57cec5SDimitry Andric   ScheduleDAGMILive::schedule();
330*0b57cec5SDimitry Andric   Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
331*0b57cec5SDimitry Andric 
332*0b57cec5SDimitry Andric   if (!LIS)
333*0b57cec5SDimitry Andric     return;
334*0b57cec5SDimitry Andric 
335*0b57cec5SDimitry Andric   // Check the results of scheduling.
336*0b57cec5SDimitry Andric   GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
337*0b57cec5SDimitry Andric   auto PressureAfter = getRealRegPressure();
338*0b57cec5SDimitry Andric 
339*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
340*0b57cec5SDimitry Andric              PressureAfter.print(dbgs()));
341*0b57cec5SDimitry Andric 
342*0b57cec5SDimitry Andric   if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
343*0b57cec5SDimitry Andric       PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) {
344*0b57cec5SDimitry Andric     Pressure[RegionIdx] = PressureAfter;
345*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
346*0b57cec5SDimitry Andric     return;
347*0b57cec5SDimitry Andric   }
348*0b57cec5SDimitry Andric   unsigned Occ = MFI.getOccupancy();
349*0b57cec5SDimitry Andric   unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
350*0b57cec5SDimitry Andric   unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
351*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
352*0b57cec5SDimitry Andric                     << ", after " << WavesAfter << ".\n");
353*0b57cec5SDimitry Andric 
354*0b57cec5SDimitry Andric   // We could not keep current target occupancy because of the just scheduled
355*0b57cec5SDimitry Andric   // region. Record new occupancy for next scheduling cycle.
356*0b57cec5SDimitry Andric   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
357*0b57cec5SDimitry Andric   // Allow memory bound functions to drop to 4 waves if not limited by an
358*0b57cec5SDimitry Andric   // attribute.
359*0b57cec5SDimitry Andric   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
360*0b57cec5SDimitry Andric       WavesAfter >= MFI.getMinAllowedOccupancy()) {
361*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
362*0b57cec5SDimitry Andric                       << MFI.getMinAllowedOccupancy() << " waves\n");
363*0b57cec5SDimitry Andric     NewOccupancy = WavesAfter;
364*0b57cec5SDimitry Andric   }
365*0b57cec5SDimitry Andric   if (NewOccupancy < MinOccupancy) {
366*0b57cec5SDimitry Andric     MinOccupancy = NewOccupancy;
367*0b57cec5SDimitry Andric     MFI.limitOccupancy(MinOccupancy);
368*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
369*0b57cec5SDimitry Andric                       << MinOccupancy << ".\n");
370*0b57cec5SDimitry Andric   }
371*0b57cec5SDimitry Andric 
372*0b57cec5SDimitry Andric   if (WavesAfter >= MinOccupancy) {
373*0b57cec5SDimitry Andric     Pressure[RegionIdx] = PressureAfter;
374*0b57cec5SDimitry Andric     return;
375*0b57cec5SDimitry Andric   }
376*0b57cec5SDimitry Andric 
377*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
378*0b57cec5SDimitry Andric   RegionEnd = RegionBegin;
379*0b57cec5SDimitry Andric   for (MachineInstr *MI : Unsched) {
380*0b57cec5SDimitry Andric     if (MI->isDebugInstr())
381*0b57cec5SDimitry Andric       continue;
382*0b57cec5SDimitry Andric 
383*0b57cec5SDimitry Andric     if (MI->getIterator() != RegionEnd) {
384*0b57cec5SDimitry Andric       BB->remove(MI);
385*0b57cec5SDimitry Andric       BB->insert(RegionEnd, MI);
386*0b57cec5SDimitry Andric       if (!MI->isDebugInstr())
387*0b57cec5SDimitry Andric         LIS->handleMove(*MI, true);
388*0b57cec5SDimitry Andric     }
389*0b57cec5SDimitry Andric     // Reset read-undef flags and update them later.
390*0b57cec5SDimitry Andric     for (auto &Op : MI->operands())
391*0b57cec5SDimitry Andric       if (Op.isReg() && Op.isDef())
392*0b57cec5SDimitry Andric         Op.setIsUndef(false);
393*0b57cec5SDimitry Andric     RegisterOperands RegOpers;
394*0b57cec5SDimitry Andric     RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
395*0b57cec5SDimitry Andric     if (!MI->isDebugInstr()) {
396*0b57cec5SDimitry Andric       if (ShouldTrackLaneMasks) {
397*0b57cec5SDimitry Andric         // Adjust liveness and add missing dead+read-undef flags.
398*0b57cec5SDimitry Andric         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
399*0b57cec5SDimitry Andric         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
400*0b57cec5SDimitry Andric       } else {
401*0b57cec5SDimitry Andric         // Adjust for missing dead-def flags.
402*0b57cec5SDimitry Andric         RegOpers.detectDeadDefs(*MI, *LIS);
403*0b57cec5SDimitry Andric       }
404*0b57cec5SDimitry Andric     }
405*0b57cec5SDimitry Andric     RegionEnd = MI->getIterator();
406*0b57cec5SDimitry Andric     ++RegionEnd;
407*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
408*0b57cec5SDimitry Andric   }
409*0b57cec5SDimitry Andric   RegionBegin = Unsched.front()->getIterator();
410*0b57cec5SDimitry Andric   Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
411*0b57cec5SDimitry Andric 
412*0b57cec5SDimitry Andric   placeDebugValues();
413*0b57cec5SDimitry Andric }
414*0b57cec5SDimitry Andric 
415*0b57cec5SDimitry Andric GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
416*0b57cec5SDimitry Andric   GCNDownwardRPTracker RPTracker(*LIS);
417*0b57cec5SDimitry Andric   RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
418*0b57cec5SDimitry Andric   return RPTracker.moveMaxPressure();
419*0b57cec5SDimitry Andric }
420*0b57cec5SDimitry Andric 
421*0b57cec5SDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
422*0b57cec5SDimitry Andric   GCNDownwardRPTracker RPTracker(*LIS);
423*0b57cec5SDimitry Andric 
424*0b57cec5SDimitry Andric   // If the block has the only successor then live-ins of that successor are
425*0b57cec5SDimitry Andric   // live-outs of the current block. We can reuse calculated live set if the
426*0b57cec5SDimitry Andric   // successor will be sent to scheduling past current block.
427*0b57cec5SDimitry Andric   const MachineBasicBlock *OnlySucc = nullptr;
428*0b57cec5SDimitry Andric   if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
429*0b57cec5SDimitry Andric     SlotIndexes *Ind = LIS->getSlotIndexes();
430*0b57cec5SDimitry Andric     if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
431*0b57cec5SDimitry Andric       OnlySucc = *MBB->succ_begin();
432*0b57cec5SDimitry Andric   }
433*0b57cec5SDimitry Andric 
434*0b57cec5SDimitry Andric   // Scheduler sends regions from the end of the block upwards.
435*0b57cec5SDimitry Andric   size_t CurRegion = RegionIdx;
436*0b57cec5SDimitry Andric   for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
437*0b57cec5SDimitry Andric     if (Regions[CurRegion].first->getParent() != MBB)
438*0b57cec5SDimitry Andric       break;
439*0b57cec5SDimitry Andric   --CurRegion;
440*0b57cec5SDimitry Andric 
441*0b57cec5SDimitry Andric   auto I = MBB->begin();
442*0b57cec5SDimitry Andric   auto LiveInIt = MBBLiveIns.find(MBB);
443*0b57cec5SDimitry Andric   if (LiveInIt != MBBLiveIns.end()) {
444*0b57cec5SDimitry Andric     auto LiveIn = std::move(LiveInIt->second);
445*0b57cec5SDimitry Andric     RPTracker.reset(*MBB->begin(), &LiveIn);
446*0b57cec5SDimitry Andric     MBBLiveIns.erase(LiveInIt);
447*0b57cec5SDimitry Andric   } else {
448*0b57cec5SDimitry Andric     auto &Rgn = Regions[CurRegion];
449*0b57cec5SDimitry Andric     I = Rgn.first;
450*0b57cec5SDimitry Andric     auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
451*0b57cec5SDimitry Andric     auto LRS = BBLiveInMap.lookup(NonDbgMI);
452*0b57cec5SDimitry Andric     assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
453*0b57cec5SDimitry Andric     RPTracker.reset(*I, &LRS);
454*0b57cec5SDimitry Andric   }
455*0b57cec5SDimitry Andric 
456*0b57cec5SDimitry Andric   for ( ; ; ) {
457*0b57cec5SDimitry Andric     I = RPTracker.getNext();
458*0b57cec5SDimitry Andric 
459*0b57cec5SDimitry Andric     if (Regions[CurRegion].first == I) {
460*0b57cec5SDimitry Andric       LiveIns[CurRegion] = RPTracker.getLiveRegs();
461*0b57cec5SDimitry Andric       RPTracker.clearMaxPressure();
462*0b57cec5SDimitry Andric     }
463*0b57cec5SDimitry Andric 
464*0b57cec5SDimitry Andric     if (Regions[CurRegion].second == I) {
465*0b57cec5SDimitry Andric       Pressure[CurRegion] = RPTracker.moveMaxPressure();
466*0b57cec5SDimitry Andric       if (CurRegion-- == RegionIdx)
467*0b57cec5SDimitry Andric         break;
468*0b57cec5SDimitry Andric     }
469*0b57cec5SDimitry Andric     RPTracker.advanceToNext();
470*0b57cec5SDimitry Andric     RPTracker.advanceBeforeNext();
471*0b57cec5SDimitry Andric   }
472*0b57cec5SDimitry Andric 
473*0b57cec5SDimitry Andric   if (OnlySucc) {
474*0b57cec5SDimitry Andric     if (I != MBB->end()) {
475*0b57cec5SDimitry Andric       RPTracker.advanceToNext();
476*0b57cec5SDimitry Andric       RPTracker.advance(MBB->end());
477*0b57cec5SDimitry Andric     }
478*0b57cec5SDimitry Andric     RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
479*0b57cec5SDimitry Andric     RPTracker.advanceBeforeNext();
480*0b57cec5SDimitry Andric     MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
481*0b57cec5SDimitry Andric   }
482*0b57cec5SDimitry Andric }
483*0b57cec5SDimitry Andric 
484*0b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
485*0b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const {
486*0b57cec5SDimitry Andric   assert(!Regions.empty());
487*0b57cec5SDimitry Andric   std::vector<MachineInstr *> BBStarters;
488*0b57cec5SDimitry Andric   BBStarters.reserve(Regions.size());
489*0b57cec5SDimitry Andric   auto I = Regions.rbegin(), E = Regions.rend();
490*0b57cec5SDimitry Andric   auto *BB = I->first->getParent();
491*0b57cec5SDimitry Andric   do {
492*0b57cec5SDimitry Andric     auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
493*0b57cec5SDimitry Andric     BBStarters.push_back(MI);
494*0b57cec5SDimitry Andric     do {
495*0b57cec5SDimitry Andric       ++I;
496*0b57cec5SDimitry Andric     } while (I != E && I->first->getParent() == BB);
497*0b57cec5SDimitry Andric   } while (I != E);
498*0b57cec5SDimitry Andric   return getLiveRegMap(BBStarters, false /*After*/, *LIS);
499*0b57cec5SDimitry Andric }
500*0b57cec5SDimitry Andric 
501*0b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() {
502*0b57cec5SDimitry Andric   GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
503*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
504*0b57cec5SDimitry Andric 
505*0b57cec5SDimitry Andric   LiveIns.resize(Regions.size());
506*0b57cec5SDimitry Andric   Pressure.resize(Regions.size());
507*0b57cec5SDimitry Andric 
508*0b57cec5SDimitry Andric   if (!Regions.empty())
509*0b57cec5SDimitry Andric     BBLiveInMap = getBBLiveInMap();
510*0b57cec5SDimitry Andric 
511*0b57cec5SDimitry Andric   do {
512*0b57cec5SDimitry Andric     Stage++;
513*0b57cec5SDimitry Andric     RegionIdx = 0;
514*0b57cec5SDimitry Andric     MachineBasicBlock *MBB = nullptr;
515*0b57cec5SDimitry Andric 
516*0b57cec5SDimitry Andric     if (Stage > 1) {
517*0b57cec5SDimitry Andric       // Retry function scheduling if we found resulting occupancy and it is
518*0b57cec5SDimitry Andric       // lower than used for first pass scheduling. This will give more freedom
519*0b57cec5SDimitry Andric       // to schedule low register pressure blocks.
520*0b57cec5SDimitry Andric       // Code is partially copied from MachineSchedulerBase::scheduleRegions().
521*0b57cec5SDimitry Andric 
522*0b57cec5SDimitry Andric       if (!LIS || StartingOccupancy <= MinOccupancy)
523*0b57cec5SDimitry Andric         break;
524*0b57cec5SDimitry Andric 
525*0b57cec5SDimitry Andric       LLVM_DEBUG(
526*0b57cec5SDimitry Andric           dbgs()
527*0b57cec5SDimitry Andric           << "Retrying function scheduling with lowest recorded occupancy "
528*0b57cec5SDimitry Andric           << MinOccupancy << ".\n");
529*0b57cec5SDimitry Andric 
530*0b57cec5SDimitry Andric       S.setTargetOccupancy(MinOccupancy);
531*0b57cec5SDimitry Andric     }
532*0b57cec5SDimitry Andric 
533*0b57cec5SDimitry Andric     for (auto Region : Regions) {
534*0b57cec5SDimitry Andric       RegionBegin = Region.first;
535*0b57cec5SDimitry Andric       RegionEnd = Region.second;
536*0b57cec5SDimitry Andric 
537*0b57cec5SDimitry Andric       if (RegionBegin->getParent() != MBB) {
538*0b57cec5SDimitry Andric         if (MBB) finishBlock();
539*0b57cec5SDimitry Andric         MBB = RegionBegin->getParent();
540*0b57cec5SDimitry Andric         startBlock(MBB);
541*0b57cec5SDimitry Andric         if (Stage == 1)
542*0b57cec5SDimitry Andric           computeBlockPressure(MBB);
543*0b57cec5SDimitry Andric       }
544*0b57cec5SDimitry Andric 
545*0b57cec5SDimitry Andric       unsigned NumRegionInstrs = std::distance(begin(), end());
546*0b57cec5SDimitry Andric       enterRegion(MBB, begin(), end(), NumRegionInstrs);
547*0b57cec5SDimitry Andric 
548*0b57cec5SDimitry Andric       // Skip empty scheduling regions (0 or 1 schedulable instructions).
549*0b57cec5SDimitry Andric       if (begin() == end() || begin() == std::prev(end())) {
550*0b57cec5SDimitry Andric         exitRegion();
551*0b57cec5SDimitry Andric         continue;
552*0b57cec5SDimitry Andric       }
553*0b57cec5SDimitry Andric 
554*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
555*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
556*0b57cec5SDimitry Andric                         << MBB->getName() << "\n  From: " << *begin()
557*0b57cec5SDimitry Andric                         << "    To: ";
558*0b57cec5SDimitry Andric                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
559*0b57cec5SDimitry Andric                  else dbgs() << "End";
560*0b57cec5SDimitry Andric                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
561*0b57cec5SDimitry Andric 
562*0b57cec5SDimitry Andric       schedule();
563*0b57cec5SDimitry Andric 
564*0b57cec5SDimitry Andric       exitRegion();
565*0b57cec5SDimitry Andric       ++RegionIdx;
566*0b57cec5SDimitry Andric     }
567*0b57cec5SDimitry Andric     finishBlock();
568*0b57cec5SDimitry Andric 
569*0b57cec5SDimitry Andric   } while (Stage < 2);
570*0b57cec5SDimitry Andric }
571