10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave
110b57cec5SDimitry Andric /// occupancy on GCN hardware.
12972a253aSDimitry Andric ///
13972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function.
14972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15972a253aSDimitry Andric /// entry point for the scheduling of those regions is
16972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages.
17972a253aSDimitry Andric
18972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account
19972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy. Usually, only a
20972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit
21972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22972a253aSDimitry Andric /// other regions.
23972a253aSDimitry Andric ///
240b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric #include "GCNSchedStrategy.h"
27bdd1243dSDimitry Andric #include "AMDGPUIGroupLP.h"
280b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
2981ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric
355f757f3fSDimitry Andric static cl::opt<bool> DisableUnclusterHighRP(
365f757f3fSDimitry Andric "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
375f757f3fSDimitry Andric cl::desc("Disable unclustered high register pressure "
38bdd1243dSDimitry Andric "reduction scheduling stage."),
39bdd1243dSDimitry Andric cl::init(false));
405f757f3fSDimitry Andric
415f757f3fSDimitry Andric static cl::opt<bool> DisableClusteredLowOccupancy(
425f757f3fSDimitry Andric "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
435f757f3fSDimitry Andric cl::desc("Disable clustered low occupancy "
445f757f3fSDimitry Andric "rescheduling for ILP scheduling stage."),
455f757f3fSDimitry Andric cl::init(false));
465f757f3fSDimitry Andric
47bdd1243dSDimitry Andric static cl::opt<unsigned> ScheduleMetricBias(
48bdd1243dSDimitry Andric "amdgpu-schedule-metric-bias", cl::Hidden,
49bdd1243dSDimitry Andric cl::desc(
50bdd1243dSDimitry Andric "Sets the bias which adds weight to occupancy vs latency. Set it to "
51bdd1243dSDimitry Andric "100 to chase the occupancy only."),
52bdd1243dSDimitry Andric cl::init(10));
530b57cec5SDimitry Andric
5406c3fb27SDimitry Andric static cl::opt<bool>
5506c3fb27SDimitry Andric RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
5606c3fb27SDimitry Andric cl::desc("Relax occupancy targets for kernels which are memory "
5706c3fb27SDimitry Andric "bound (amdgpu-membound-threshold), or "
5806c3fb27SDimitry Andric "Wave Limited (amdgpu-limit-wave-threshold)."),
5906c3fb27SDimitry Andric cl::init(false));
6006c3fb27SDimitry Andric
61bdd1243dSDimitry Andric const unsigned ScheduleMetrics::ScaleFactor = 100;
62bdd1243dSDimitry Andric
GCNSchedStrategy(const MachineSchedContext * C)63bdd1243dSDimitry Andric GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
64bdd1243dSDimitry Andric : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
65bdd1243dSDimitry Andric HasHighPressure(false) {}
66bdd1243dSDimitry Andric
initialize(ScheduleDAGMI * DAG)67bdd1243dSDimitry Andric void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
680b57cec5SDimitry Andric GenericScheduler::initialize(DAG);
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric MF = &DAG->MF;
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
730b57cec5SDimitry Andric
74349cc55cSDimitry Andric SGPRExcessLimit =
75349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
76349cc55cSDimitry Andric VGPRExcessLimit =
77349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
780b57cec5SDimitry Andric
79349cc55cSDimitry Andric SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
80349cc55cSDimitry Andric // Set the initial TargetOccupnacy to the maximum occupancy that we can
81349cc55cSDimitry Andric // achieve for this function. This effectively sets a lower bound on the
82349cc55cSDimitry Andric // 'Critical' register limits in the scheduler.
8306c3fb27SDimitry Andric // Allow for lower occupancy targets if kernel is wave limited or memory
8406c3fb27SDimitry Andric // bound, and using the relaxed occupancy feature.
8506c3fb27SDimitry Andric TargetOccupancy =
8606c3fb27SDimitry Andric RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();
87349cc55cSDimitry Andric SGPRCriticalLimit =
88349cc55cSDimitry Andric std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
89bdd1243dSDimitry Andric
90bdd1243dSDimitry Andric if (!KnownExcessRP) {
91349cc55cSDimitry Andric VGPRCriticalLimit =
92349cc55cSDimitry Andric std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
93bdd1243dSDimitry Andric } else {
94bdd1243dSDimitry Andric // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
95bdd1243dSDimitry Andric // returns a reasonably small number for targets with lots of VGPRs, such
96bdd1243dSDimitry Andric // as GFX10 and GFX11.
97bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
98bdd1243dSDimitry Andric "VGPRCriticalLimit calculation method.\n");
99349cc55cSDimitry Andric
100bdd1243dSDimitry Andric unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
101bdd1243dSDimitry Andric unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
102bdd1243dSDimitry Andric unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
103bdd1243dSDimitry Andric VGPRBudget = std::max(VGPRBudget, Granule);
104bdd1243dSDimitry Andric VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric
107bdd1243dSDimitry Andric // Subtract error margin and bias from register limits and avoid overflow.
108bdd1243dSDimitry Andric SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);
109bdd1243dSDimitry Andric VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);
110bdd1243dSDimitry Andric SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);
111bdd1243dSDimitry Andric VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);
112bdd1243dSDimitry Andric
113bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
114bdd1243dSDimitry Andric << ", VGPRExcessLimit = " << VGPRExcessLimit
115bdd1243dSDimitry Andric << ", SGPRCriticalLimit = " << SGPRCriticalLimit
116bdd1243dSDimitry Andric << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
117bdd1243dSDimitry Andric }
118bdd1243dSDimitry Andric
119*0fca6ea1SDimitry Andric /// Checks whether \p SU can use the cached DAG pressure diffs to compute the
120*0fca6ea1SDimitry Andric /// current register pressure.
121*0fca6ea1SDimitry Andric ///
122*0fca6ea1SDimitry Andric /// This works for the common case, but it has a few exceptions that have been
123*0fca6ea1SDimitry Andric /// observed through trial and error:
124*0fca6ea1SDimitry Andric /// - Explicit physical register operands
125*0fca6ea1SDimitry Andric /// - Subregister definitions
126*0fca6ea1SDimitry Andric ///
127*0fca6ea1SDimitry Andric /// In both of those cases, PressureDiff doesn't represent the actual pressure,
128*0fca6ea1SDimitry Andric /// and querying LiveIntervals through the RegPressureTracker is needed to get
129*0fca6ea1SDimitry Andric /// an accurate value.
130*0fca6ea1SDimitry Andric ///
131*0fca6ea1SDimitry Andric /// We should eventually only use PressureDiff for maximum performance, but this
132*0fca6ea1SDimitry Andric /// already allows 80% of SUs to take the fast path without changing scheduling
133*0fca6ea1SDimitry Andric /// at all. Further changes would either change scheduling, or require a lot
134*0fca6ea1SDimitry Andric /// more logic to recover an accurate pressure estimate from the PressureDiffs.
canUsePressureDiffs(const SUnit & SU)135*0fca6ea1SDimitry Andric static bool canUsePressureDiffs(const SUnit &SU) {
136*0fca6ea1SDimitry Andric if (!SU.isInstr())
137*0fca6ea1SDimitry Andric return false;
138*0fca6ea1SDimitry Andric
139*0fca6ea1SDimitry Andric // Cannot use pressure diffs for subregister defs or with physregs, it's
140*0fca6ea1SDimitry Andric // imprecise in both cases.
141*0fca6ea1SDimitry Andric for (const auto &Op : SU.getInstr()->operands()) {
142*0fca6ea1SDimitry Andric if (!Op.isReg() || Op.isImplicit())
143*0fca6ea1SDimitry Andric continue;
144*0fca6ea1SDimitry Andric if (Op.getReg().isPhysical() ||
145*0fca6ea1SDimitry Andric (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))
146*0fca6ea1SDimitry Andric return false;
147*0fca6ea1SDimitry Andric }
148*0fca6ea1SDimitry Andric return true;
149*0fca6ea1SDimitry Andric }
150*0fca6ea1SDimitry Andric
getRegisterPressures(bool AtTop,const RegPressureTracker & RPTracker,SUnit * SU,std::vector<unsigned> & Pressure,std::vector<unsigned> & MaxPressure)151*0fca6ea1SDimitry Andric static void getRegisterPressures(bool AtTop,
152*0fca6ea1SDimitry Andric const RegPressureTracker &RPTracker, SUnit *SU,
153*0fca6ea1SDimitry Andric std::vector<unsigned> &Pressure,
154*0fca6ea1SDimitry Andric std::vector<unsigned> &MaxPressure) {
155*0fca6ea1SDimitry Andric // getDownwardPressure() and getUpwardPressure() make temporary changes to
156*0fca6ea1SDimitry Andric // the tracker, so we need to pass those function a non-const copy.
157*0fca6ea1SDimitry Andric RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
158*0fca6ea1SDimitry Andric if (AtTop)
159*0fca6ea1SDimitry Andric TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
160*0fca6ea1SDimitry Andric else
161*0fca6ea1SDimitry Andric TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
162*0fca6ea1SDimitry Andric }
163*0fca6ea1SDimitry Andric
initCandidate(SchedCandidate & Cand,SUnit * SU,bool AtTop,const RegPressureTracker & RPTracker,const SIRegisterInfo * SRI,unsigned SGPRPressure,unsigned VGPRPressure,bool IsBottomUp)164bdd1243dSDimitry Andric void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
165bdd1243dSDimitry Andric bool AtTop,
166bdd1243dSDimitry Andric const RegPressureTracker &RPTracker,
1670b57cec5SDimitry Andric const SIRegisterInfo *SRI,
1680b57cec5SDimitry Andric unsigned SGPRPressure,
169*0fca6ea1SDimitry Andric unsigned VGPRPressure, bool IsBottomUp) {
1700b57cec5SDimitry Andric Cand.SU = SU;
1710b57cec5SDimitry Andric Cand.AtTop = AtTop;
1720b57cec5SDimitry Andric
173bdd1243dSDimitry Andric if (!DAG->isTrackingPressure())
174bdd1243dSDimitry Andric return;
175bdd1243dSDimitry Andric
1768bcb0991SDimitry Andric Pressure.clear();
1778bcb0991SDimitry Andric MaxPressure.clear();
1780b57cec5SDimitry Andric
179*0fca6ea1SDimitry Andric // We try to use the cached PressureDiffs in the ScheduleDAG whenever
180*0fca6ea1SDimitry Andric // possible over querying the RegPressureTracker.
181*0fca6ea1SDimitry Andric //
182*0fca6ea1SDimitry Andric // RegPressureTracker will make a lot of LIS queries which are very
183*0fca6ea1SDimitry Andric // expensive, it is considered a slow function in this context.
184*0fca6ea1SDimitry Andric //
185*0fca6ea1SDimitry Andric // PressureDiffs are precomputed and cached, and getPressureDiff is just a
186*0fca6ea1SDimitry Andric // trivial lookup into an array. It is pretty much free.
187*0fca6ea1SDimitry Andric //
188*0fca6ea1SDimitry Andric // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
189*0fca6ea1SDimitry Andric // PressureDiffs.
190*0fca6ea1SDimitry Andric if (AtTop || !canUsePressureDiffs(*SU)) {
191*0fca6ea1SDimitry Andric getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure);
192*0fca6ea1SDimitry Andric } else {
193*0fca6ea1SDimitry Andric // Reserve 4 slots.
194*0fca6ea1SDimitry Andric Pressure.resize(4, 0);
195*0fca6ea1SDimitry Andric Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
196*0fca6ea1SDimitry Andric Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
197*0fca6ea1SDimitry Andric
198*0fca6ea1SDimitry Andric for (const auto &Diff : DAG->getPressureDiff(SU)) {
199*0fca6ea1SDimitry Andric if (!Diff.isValid())
200*0fca6ea1SDimitry Andric continue;
201*0fca6ea1SDimitry Andric // PressureDiffs is always bottom-up so if we're working top-down we need
202*0fca6ea1SDimitry Andric // to invert its sign.
203*0fca6ea1SDimitry Andric Pressure[Diff.getPSet()] +=
204*0fca6ea1SDimitry Andric (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
205*0fca6ea1SDimitry Andric }
206*0fca6ea1SDimitry Andric
207*0fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
208*0fca6ea1SDimitry Andric std::vector<unsigned> CheckPressure, CheckMaxPressure;
209*0fca6ea1SDimitry Andric getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
210*0fca6ea1SDimitry Andric if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
211*0fca6ea1SDimitry Andric CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
212*0fca6ea1SDimitry Andric Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
213*0fca6ea1SDimitry Andric CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
214*0fca6ea1SDimitry Andric errs() << "Register Pressure is inaccurate when calculated through "
215*0fca6ea1SDimitry Andric "PressureDiff\n"
216*0fca6ea1SDimitry Andric << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
217*0fca6ea1SDimitry Andric << ", expected "
218*0fca6ea1SDimitry Andric << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"
219*0fca6ea1SDimitry Andric << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
220*0fca6ea1SDimitry Andric << ", expected "
221*0fca6ea1SDimitry Andric << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";
222*0fca6ea1SDimitry Andric report_fatal_error("inaccurate register pressure calculation");
223*0fca6ea1SDimitry Andric }
224*0fca6ea1SDimitry Andric #endif
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric
2275ffd83dbSDimitry Andric unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
2285ffd83dbSDimitry Andric unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric // If two instructions increase the pressure of different register sets
2310b57cec5SDimitry Andric // by the same amount, the generic scheduler will prefer to schedule the
2320b57cec5SDimitry Andric // instruction that increases the set with the least amount of registers,
2330b57cec5SDimitry Andric // which in our case would be SGPRs. This is rarely what we want, so
2340b57cec5SDimitry Andric // when we report excess/critical register pressure, we do it either
2350b57cec5SDimitry Andric // only for VGPRs or only for SGPRs.
2360b57cec5SDimitry Andric
2370b57cec5SDimitry Andric // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
2380b57cec5SDimitry Andric const unsigned MaxVGPRPressureInc = 16;
2390b57cec5SDimitry Andric bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
2400b57cec5SDimitry Andric bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
2430b57cec5SDimitry Andric // to increase the likelihood we don't go over the limits. We should improve
2440b57cec5SDimitry Andric // the analysis to look through dependencies to find the path with the least
2450b57cec5SDimitry Andric // register pressure.
2460b57cec5SDimitry Andric
2478bcb0991SDimitry Andric // We only need to update the RPDelta for instructions that increase register
2488bcb0991SDimitry Andric // pressure. Instructions that decrease or keep reg pressure the same will be
2498bcb0991SDimitry Andric // marked as RegExcess in tryCandidate() when they are compared with
2508bcb0991SDimitry Andric // instructions that increase the register pressure.
2510b57cec5SDimitry Andric if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
252bdd1243dSDimitry Andric HasHighPressure = true;
2535ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
2540b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
2550b57cec5SDimitry Andric }
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
258bdd1243dSDimitry Andric HasHighPressure = true;
2595ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
2600b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric
2630b57cec5SDimitry Andric // Register pressure is considered 'CRITICAL' if it is approaching a value
2640b57cec5SDimitry Andric // that would reduce the wave occupancy for the execution unit. When
265349cc55cSDimitry Andric // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
2660b57cec5SDimitry Andric // has the same cost, so we don't need to prefer one over the other.
2670b57cec5SDimitry Andric
2680b57cec5SDimitry Andric int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
2690b57cec5SDimitry Andric int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric if (SGPRDelta >= 0 || VGPRDelta >= 0) {
272bdd1243dSDimitry Andric HasHighPressure = true;
2730b57cec5SDimitry Andric if (SGPRDelta > VGPRDelta) {
2745ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax =
2755ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
2760b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
2770b57cec5SDimitry Andric } else {
2785ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax =
2795ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
2800b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric }
2840b57cec5SDimitry Andric
2850b57cec5SDimitry Andric // This function is mostly cut and pasted from
2860b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue()
pickNodeFromQueue(SchedBoundary & Zone,const CandPolicy & ZonePolicy,const RegPressureTracker & RPTracker,SchedCandidate & Cand,bool IsBottomUp)287bdd1243dSDimitry Andric void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
2880b57cec5SDimitry Andric const CandPolicy &ZonePolicy,
2890b57cec5SDimitry Andric const RegPressureTracker &RPTracker,
290*0fca6ea1SDimitry Andric SchedCandidate &Cand,
291*0fca6ea1SDimitry Andric bool IsBottomUp) {
2920b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
2930b57cec5SDimitry Andric ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
294bdd1243dSDimitry Andric unsigned SGPRPressure = 0;
295bdd1243dSDimitry Andric unsigned VGPRPressure = 0;
296bdd1243dSDimitry Andric if (DAG->isTrackingPressure()) {
297bdd1243dSDimitry Andric SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
298bdd1243dSDimitry Andric VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
299bdd1243dSDimitry Andric }
3000b57cec5SDimitry Andric ReadyQueue &Q = Zone.Available;
3010b57cec5SDimitry Andric for (SUnit *SU : Q) {
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric SchedCandidate TryCand(ZonePolicy);
304*0fca6ea1SDimitry Andric initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,
305*0fca6ea1SDimitry Andric VGPRPressure, IsBottomUp);
3060b57cec5SDimitry Andric // Pass SchedBoundary only when comparing nodes from the same boundary.
3070b57cec5SDimitry Andric SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
308bdd1243dSDimitry Andric tryCandidate(Cand, TryCand, ZoneArg);
3090b57cec5SDimitry Andric if (TryCand.Reason != NoCand) {
3100b57cec5SDimitry Andric // Initialize resource delta if needed in case future heuristics query it.
3110b57cec5SDimitry Andric if (TryCand.ResDelta == SchedResourceDelta())
3120b57cec5SDimitry Andric TryCand.initResourceDelta(Zone.DAG, SchedModel);
3130b57cec5SDimitry Andric Cand.setBest(TryCand);
3148bcb0991SDimitry Andric LLVM_DEBUG(traceCandidate(Cand));
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric }
3170b57cec5SDimitry Andric }
3180b57cec5SDimitry Andric
3190b57cec5SDimitry Andric // This function is mostly cut and pasted from
3200b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional()
pickNodeBidirectional(bool & IsTopNode)321bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
3220b57cec5SDimitry Andric // Schedule as far as possible in the direction of no choice. This is most
3230b57cec5SDimitry Andric // efficient, but also provides the best heuristics for CriticalPSets.
3240b57cec5SDimitry Andric if (SUnit *SU = Bot.pickOnlyChoice()) {
3250b57cec5SDimitry Andric IsTopNode = false;
3260b57cec5SDimitry Andric return SU;
3270b57cec5SDimitry Andric }
3280b57cec5SDimitry Andric if (SUnit *SU = Top.pickOnlyChoice()) {
3290b57cec5SDimitry Andric IsTopNode = true;
3300b57cec5SDimitry Andric return SU;
3310b57cec5SDimitry Andric }
3320b57cec5SDimitry Andric // Set the bottom-up policy based on the state of the current bottom zone and
3330b57cec5SDimitry Andric // the instructions outside the zone, including the top zone.
3340b57cec5SDimitry Andric CandPolicy BotPolicy;
3350b57cec5SDimitry Andric setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3360b57cec5SDimitry Andric // Set the top-down policy based on the state of the current top zone and
3370b57cec5SDimitry Andric // the instructions outside the zone, including the bottom zone.
3380b57cec5SDimitry Andric CandPolicy TopPolicy;
3390b57cec5SDimitry Andric setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3400b57cec5SDimitry Andric
3410b57cec5SDimitry Andric // See if BotCand is still valid (because we previously scheduled from Top).
3420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3430b57cec5SDimitry Andric if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3440b57cec5SDimitry Andric BotCand.Policy != BotPolicy) {
3450b57cec5SDimitry Andric BotCand.reset(CandPolicy());
346*0fca6ea1SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
347*0fca6ea1SDimitry Andric /*IsBottomUp=*/true);
3480b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3490b57cec5SDimitry Andric } else {
3500b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(BotCand));
3518bcb0991SDimitry Andric #ifndef NDEBUG
3528bcb0991SDimitry Andric if (VerifyScheduling) {
3538bcb0991SDimitry Andric SchedCandidate TCand;
3548bcb0991SDimitry Andric TCand.reset(CandPolicy());
355*0fca6ea1SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
356*0fca6ea1SDimitry Andric /*IsBottomUp=*/true);
3578bcb0991SDimitry Andric assert(TCand.SU == BotCand.SU &&
3588bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now");
3598bcb0991SDimitry Andric }
3608bcb0991SDimitry Andric #endif
3610b57cec5SDimitry Andric }
3620b57cec5SDimitry Andric
3630b57cec5SDimitry Andric // Check if the top Q has a better candidate.
3640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3650b57cec5SDimitry Andric if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3660b57cec5SDimitry Andric TopCand.Policy != TopPolicy) {
3670b57cec5SDimitry Andric TopCand.reset(CandPolicy());
368*0fca6ea1SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
369*0fca6ea1SDimitry Andric /*IsBottomUp=*/false);
3700b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3710b57cec5SDimitry Andric } else {
3720b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(TopCand));
3738bcb0991SDimitry Andric #ifndef NDEBUG
3748bcb0991SDimitry Andric if (VerifyScheduling) {
3758bcb0991SDimitry Andric SchedCandidate TCand;
3768bcb0991SDimitry Andric TCand.reset(CandPolicy());
377*0fca6ea1SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
378*0fca6ea1SDimitry Andric /*IsBottomUp=*/false);
3798bcb0991SDimitry Andric assert(TCand.SU == TopCand.SU &&
3808bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now");
3818bcb0991SDimitry Andric }
3828bcb0991SDimitry Andric #endif
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric // Pick best from BotCand and TopCand.
3860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
3870b57cec5SDimitry Andric dbgs() << "Bot Cand: "; traceCandidate(BotCand););
3885ffd83dbSDimitry Andric SchedCandidate Cand = BotCand;
3890b57cec5SDimitry Andric TopCand.Reason = NoCand;
390bdd1243dSDimitry Andric tryCandidate(Cand, TopCand, nullptr);
3910b57cec5SDimitry Andric if (TopCand.Reason != NoCand) {
3920b57cec5SDimitry Andric Cand.setBest(TopCand);
3930b57cec5SDimitry Andric }
3940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric IsTopNode = Cand.AtTop;
3970b57cec5SDimitry Andric return Cand.SU;
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric
4000b57cec5SDimitry Andric // This function is mostly cut and pasted from
4010b57cec5SDimitry Andric // GenericScheduler::pickNode()
pickNode(bool & IsTopNode)402bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
4030b57cec5SDimitry Andric if (DAG->top() == DAG->bottom()) {
4040b57cec5SDimitry Andric assert(Top.Available.empty() && Top.Pending.empty() &&
4050b57cec5SDimitry Andric Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4060b57cec5SDimitry Andric return nullptr;
4070b57cec5SDimitry Andric }
4080b57cec5SDimitry Andric SUnit *SU;
4090b57cec5SDimitry Andric do {
4100b57cec5SDimitry Andric if (RegionPolicy.OnlyTopDown) {
4110b57cec5SDimitry Andric SU = Top.pickOnlyChoice();
4120b57cec5SDimitry Andric if (!SU) {
4130b57cec5SDimitry Andric CandPolicy NoPolicy;
4140b57cec5SDimitry Andric TopCand.reset(NoPolicy);
415*0fca6ea1SDimitry Andric pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
416*0fca6ea1SDimitry Andric /*IsBottomUp=*/false);
4170b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find a candidate");
4180b57cec5SDimitry Andric SU = TopCand.SU;
4190b57cec5SDimitry Andric }
4200b57cec5SDimitry Andric IsTopNode = true;
4210b57cec5SDimitry Andric } else if (RegionPolicy.OnlyBottomUp) {
4220b57cec5SDimitry Andric SU = Bot.pickOnlyChoice();
4230b57cec5SDimitry Andric if (!SU) {
4240b57cec5SDimitry Andric CandPolicy NoPolicy;
4250b57cec5SDimitry Andric BotCand.reset(NoPolicy);
426*0fca6ea1SDimitry Andric pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
427*0fca6ea1SDimitry Andric /*IsBottomUp=*/true);
4280b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find a candidate");
4290b57cec5SDimitry Andric SU = BotCand.SU;
4300b57cec5SDimitry Andric }
4310b57cec5SDimitry Andric IsTopNode = false;
4320b57cec5SDimitry Andric } else {
4330b57cec5SDimitry Andric SU = pickNodeBidirectional(IsTopNode);
4340b57cec5SDimitry Andric }
4350b57cec5SDimitry Andric } while (SU->isScheduled);
4360b57cec5SDimitry Andric
4370b57cec5SDimitry Andric if (SU->isTopReady())
4380b57cec5SDimitry Andric Top.removeReady(SU);
4390b57cec5SDimitry Andric if (SU->isBottomReady())
4400b57cec5SDimitry Andric Bot.removeReady(SU);
4410b57cec5SDimitry Andric
4420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4430b57cec5SDimitry Andric << *SU->getInstr());
4440b57cec5SDimitry Andric return SU;
4450b57cec5SDimitry Andric }
4460b57cec5SDimitry Andric
getCurrentStage()447bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
448bdd1243dSDimitry Andric assert(CurrentStage && CurrentStage != SchedStages.end());
449bdd1243dSDimitry Andric return *CurrentStage;
450bdd1243dSDimitry Andric }
451bdd1243dSDimitry Andric
advanceStage()452bdd1243dSDimitry Andric bool GCNSchedStrategy::advanceStage() {
453bdd1243dSDimitry Andric assert(CurrentStage != SchedStages.end());
454bdd1243dSDimitry Andric if (!CurrentStage)
455bdd1243dSDimitry Andric CurrentStage = SchedStages.begin();
456bdd1243dSDimitry Andric else
457bdd1243dSDimitry Andric CurrentStage++;
458bdd1243dSDimitry Andric
459bdd1243dSDimitry Andric return CurrentStage != SchedStages.end();
460bdd1243dSDimitry Andric }
461bdd1243dSDimitry Andric
hasNextStage() const462bdd1243dSDimitry Andric bool GCNSchedStrategy::hasNextStage() const {
463bdd1243dSDimitry Andric assert(CurrentStage);
464bdd1243dSDimitry Andric return std::next(CurrentStage) != SchedStages.end();
465bdd1243dSDimitry Andric }
466bdd1243dSDimitry Andric
getNextStage() const467bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getNextStage() const {
468bdd1243dSDimitry Andric assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
469bdd1243dSDimitry Andric return *std::next(CurrentStage);
470bdd1243dSDimitry Andric }
471bdd1243dSDimitry Andric
GCNMaxOccupancySchedStrategy(const MachineSchedContext * C)472bdd1243dSDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
473bdd1243dSDimitry Andric const MachineSchedContext *C)
474bdd1243dSDimitry Andric : GCNSchedStrategy(C) {
475bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
476bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
477bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
478bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
479bdd1243dSDimitry Andric }
480bdd1243dSDimitry Andric
GCNMaxILPSchedStrategy(const MachineSchedContext * C)481bdd1243dSDimitry Andric GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
482bdd1243dSDimitry Andric : GCNSchedStrategy(C) {
483bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);
484bdd1243dSDimitry Andric }
485bdd1243dSDimitry Andric
tryCandidate(SchedCandidate & Cand,SchedCandidate & TryCand,SchedBoundary * Zone) const486bdd1243dSDimitry Andric bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
487bdd1243dSDimitry Andric SchedCandidate &TryCand,
488bdd1243dSDimitry Andric SchedBoundary *Zone) const {
489bdd1243dSDimitry Andric // Initialize the candidate if needed.
490bdd1243dSDimitry Andric if (!Cand.isValid()) {
491bdd1243dSDimitry Andric TryCand.Reason = NodeOrder;
492bdd1243dSDimitry Andric return true;
493bdd1243dSDimitry Andric }
494bdd1243dSDimitry Andric
495bdd1243dSDimitry Andric // Avoid spilling by exceeding the register limit.
496bdd1243dSDimitry Andric if (DAG->isTrackingPressure() &&
497bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
498bdd1243dSDimitry Andric RegExcess, TRI, DAG->MF))
499bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
500bdd1243dSDimitry Andric
501bdd1243dSDimitry Andric // Bias PhysReg Defs and copies to their uses and defined respectively.
502bdd1243dSDimitry Andric if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
503bdd1243dSDimitry Andric biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
504bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
505bdd1243dSDimitry Andric
506bdd1243dSDimitry Andric bool SameBoundary = Zone != nullptr;
507bdd1243dSDimitry Andric if (SameBoundary) {
508bdd1243dSDimitry Andric // Prioritize instructions that read unbuffered resources by stall cycles.
509bdd1243dSDimitry Andric if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
510bdd1243dSDimitry Andric Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
511bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
512bdd1243dSDimitry Andric
513bdd1243dSDimitry Andric // Avoid critical resource consumption and balance the schedule.
514bdd1243dSDimitry Andric TryCand.initResourceDelta(DAG, SchedModel);
515bdd1243dSDimitry Andric if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
516bdd1243dSDimitry Andric TryCand, Cand, ResourceReduce))
517bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
518bdd1243dSDimitry Andric if (tryGreater(TryCand.ResDelta.DemandedResources,
519bdd1243dSDimitry Andric Cand.ResDelta.DemandedResources, TryCand, Cand,
520bdd1243dSDimitry Andric ResourceDemand))
521bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
522bdd1243dSDimitry Andric
523bdd1243dSDimitry Andric // Unconditionally try to reduce latency.
524bdd1243dSDimitry Andric if (tryLatency(TryCand, Cand, *Zone))
525bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
526bdd1243dSDimitry Andric
527bdd1243dSDimitry Andric // Weak edges are for clustering and other constraints.
528bdd1243dSDimitry Andric if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
529bdd1243dSDimitry Andric getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
530bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
531bdd1243dSDimitry Andric }
532bdd1243dSDimitry Andric
533bdd1243dSDimitry Andric // Keep clustered nodes together to encourage downstream peephole
534bdd1243dSDimitry Andric // optimizations which may reduce resource requirements.
535bdd1243dSDimitry Andric //
536bdd1243dSDimitry Andric // This is a best effort to set things up for a post-RA pass. Optimizations
537bdd1243dSDimitry Andric // like generating loads of multiple registers should ideally be done within
538bdd1243dSDimitry Andric // the scheduler pass by combining the loads during DAG postprocessing.
539bdd1243dSDimitry Andric const SUnit *CandNextClusterSU =
540bdd1243dSDimitry Andric Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
541bdd1243dSDimitry Andric const SUnit *TryCandNextClusterSU =
542bdd1243dSDimitry Andric TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
543bdd1243dSDimitry Andric if (tryGreater(TryCand.SU == TryCandNextClusterSU,
544bdd1243dSDimitry Andric Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
545bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
546bdd1243dSDimitry Andric
547bdd1243dSDimitry Andric // Avoid increasing the max critical pressure in the scheduled region.
548bdd1243dSDimitry Andric if (DAG->isTrackingPressure() &&
549bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
550bdd1243dSDimitry Andric TryCand, Cand, RegCritical, TRI, DAG->MF))
551bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
552bdd1243dSDimitry Andric
553bdd1243dSDimitry Andric // Avoid increasing the max pressure of the entire region.
554bdd1243dSDimitry Andric if (DAG->isTrackingPressure() &&
555bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
556bdd1243dSDimitry Andric Cand, RegMax, TRI, DAG->MF))
557bdd1243dSDimitry Andric return TryCand.Reason != NoCand;
558bdd1243dSDimitry Andric
559bdd1243dSDimitry Andric if (SameBoundary) {
560bdd1243dSDimitry Andric // Fall through to original instruction order.
561bdd1243dSDimitry Andric if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
562bdd1243dSDimitry Andric (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
563bdd1243dSDimitry Andric TryCand.Reason = NodeOrder;
564bdd1243dSDimitry Andric return true;
565bdd1243dSDimitry Andric }
566bdd1243dSDimitry Andric }
567bdd1243dSDimitry Andric return false;
568bdd1243dSDimitry Andric }
569bdd1243dSDimitry Andric
GCNScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)570972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive(
571972a253aSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
572972a253aSDimitry Andric : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
5730b57cec5SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()),
574972a253aSDimitry Andric StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
5750b57cec5SDimitry Andric
5760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
57706c3fb27SDimitry Andric if (RelaxedOcc) {
57806c3fb27SDimitry Andric MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
57906c3fb27SDimitry Andric if (MinOccupancy != StartingOccupancy)
58006c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
58106c3fb27SDimitry Andric << ".\n");
58206c3fb27SDimitry Andric }
5830b57cec5SDimitry Andric }
5840b57cec5SDimitry Andric
585bdd1243dSDimitry Andric std::unique_ptr<GCNSchedStage>
createSchedStage(GCNSchedStageID SchedStageID)586bdd1243dSDimitry Andric GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
587bdd1243dSDimitry Andric switch (SchedStageID) {
588bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule:
589bdd1243dSDimitry Andric return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
590bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule:
591bdd1243dSDimitry Andric return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
592bdd1243dSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule:
593bdd1243dSDimitry Andric return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
594bdd1243dSDimitry Andric case GCNSchedStageID::PreRARematerialize:
595bdd1243dSDimitry Andric return std::make_unique<PreRARematStage>(SchedStageID, *this);
596bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule:
597bdd1243dSDimitry Andric return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
598bdd1243dSDimitry Andric }
599bdd1243dSDimitry Andric
600bdd1243dSDimitry Andric llvm_unreachable("Unknown SchedStageID.");
601bdd1243dSDimitry Andric }
602bdd1243dSDimitry Andric
schedule()6030b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() {
604972a253aSDimitry Andric // Collect all scheduling regions. The actual scheduling is performed in
605972a253aSDimitry Andric // GCNScheduleDAGMILive::finalizeSchedule.
606bdd1243dSDimitry Andric Regions.push_back(std::pair(RegionBegin, RegionEnd));
6070b57cec5SDimitry Andric }
6080b57cec5SDimitry Andric
609972a253aSDimitry Andric GCNRegPressure
getRealRegPressure(unsigned RegionIdx) const610972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
6110b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS);
6120b57cec5SDimitry Andric RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
6130b57cec5SDimitry Andric return RPTracker.moveMaxPressure();
6140b57cec5SDimitry Andric }
6150b57cec5SDimitry Andric
computeBlockPressure(unsigned RegionIdx,const MachineBasicBlock * MBB)616972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
617972a253aSDimitry Andric const MachineBasicBlock *MBB) {
6180b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS);
6190b57cec5SDimitry Andric
6200b57cec5SDimitry Andric // If the block has the only successor then live-ins of that successor are
6210b57cec5SDimitry Andric // live-outs of the current block. We can reuse calculated live set if the
6220b57cec5SDimitry Andric // successor will be sent to scheduling past current block.
62306c3fb27SDimitry Andric
62406c3fb27SDimitry Andric // However, due to the bug in LiveInterval analysis it may happen that two
62506c3fb27SDimitry Andric // predecessors of the same successor block have different lane bitmasks for
62606c3fb27SDimitry Andric // a live-out register. Workaround that by sticking to one-to-one relationship
62706c3fb27SDimitry Andric // i.e. one predecessor with one successor block.
6280b57cec5SDimitry Andric const MachineBasicBlock *OnlySucc = nullptr;
62906c3fb27SDimitry Andric if (MBB->succ_size() == 1) {
63006c3fb27SDimitry Andric auto *Candidate = *MBB->succ_begin();
63106c3fb27SDimitry Andric if (!Candidate->empty() && Candidate->pred_size() == 1) {
6320b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes();
63306c3fb27SDimitry Andric if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
63406c3fb27SDimitry Andric OnlySucc = Candidate;
63506c3fb27SDimitry Andric }
6360b57cec5SDimitry Andric }
6370b57cec5SDimitry Andric
6380b57cec5SDimitry Andric // Scheduler sends regions from the end of the block upwards.
6390b57cec5SDimitry Andric size_t CurRegion = RegionIdx;
6400b57cec5SDimitry Andric for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
6410b57cec5SDimitry Andric if (Regions[CurRegion].first->getParent() != MBB)
6420b57cec5SDimitry Andric break;
6430b57cec5SDimitry Andric --CurRegion;
6440b57cec5SDimitry Andric
6450b57cec5SDimitry Andric auto I = MBB->begin();
6460b57cec5SDimitry Andric auto LiveInIt = MBBLiveIns.find(MBB);
64781ad6265SDimitry Andric auto &Rgn = Regions[CurRegion];
64881ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
6490b57cec5SDimitry Andric if (LiveInIt != MBBLiveIns.end()) {
6500b57cec5SDimitry Andric auto LiveIn = std::move(LiveInIt->second);
6510b57cec5SDimitry Andric RPTracker.reset(*MBB->begin(), &LiveIn);
6520b57cec5SDimitry Andric MBBLiveIns.erase(LiveInIt);
6530b57cec5SDimitry Andric } else {
6540b57cec5SDimitry Andric I = Rgn.first;
6550b57cec5SDimitry Andric auto LRS = BBLiveInMap.lookup(NonDbgMI);
656fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS
6570b57cec5SDimitry Andric assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
658fe6060f1SDimitry Andric #endif
6590b57cec5SDimitry Andric RPTracker.reset(*I, &LRS);
6600b57cec5SDimitry Andric }
6610b57cec5SDimitry Andric
6620b57cec5SDimitry Andric for (;;) {
6630b57cec5SDimitry Andric I = RPTracker.getNext();
6640b57cec5SDimitry Andric
66581ad6265SDimitry Andric if (Regions[CurRegion].first == I || NonDbgMI == I) {
6660b57cec5SDimitry Andric LiveIns[CurRegion] = RPTracker.getLiveRegs();
6670b57cec5SDimitry Andric RPTracker.clearMaxPressure();
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric
6700b57cec5SDimitry Andric if (Regions[CurRegion].second == I) {
6710b57cec5SDimitry Andric Pressure[CurRegion] = RPTracker.moveMaxPressure();
6720b57cec5SDimitry Andric if (CurRegion-- == RegionIdx)
6730b57cec5SDimitry Andric break;
6740b57cec5SDimitry Andric }
6750b57cec5SDimitry Andric RPTracker.advanceToNext();
6760b57cec5SDimitry Andric RPTracker.advanceBeforeNext();
6770b57cec5SDimitry Andric }
6780b57cec5SDimitry Andric
6790b57cec5SDimitry Andric if (OnlySucc) {
6800b57cec5SDimitry Andric if (I != MBB->end()) {
6810b57cec5SDimitry Andric RPTracker.advanceToNext();
6820b57cec5SDimitry Andric RPTracker.advance(MBB->end());
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric RPTracker.advanceBeforeNext();
6850b57cec5SDimitry Andric MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
6860b57cec5SDimitry Andric }
6870b57cec5SDimitry Andric }
6880b57cec5SDimitry Andric
6890b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
getBBLiveInMap() const6900b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const {
6910b57cec5SDimitry Andric assert(!Regions.empty());
6920b57cec5SDimitry Andric std::vector<MachineInstr *> BBStarters;
6930b57cec5SDimitry Andric BBStarters.reserve(Regions.size());
6940b57cec5SDimitry Andric auto I = Regions.rbegin(), E = Regions.rend();
6950b57cec5SDimitry Andric auto *BB = I->first->getParent();
6960b57cec5SDimitry Andric do {
6970b57cec5SDimitry Andric auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
6980b57cec5SDimitry Andric BBStarters.push_back(MI);
6990b57cec5SDimitry Andric do {
7000b57cec5SDimitry Andric ++I;
7010b57cec5SDimitry Andric } while (I != E && I->first->getParent() == BB);
7020b57cec5SDimitry Andric } while (I != E);
7030b57cec5SDimitry Andric return getLiveRegMap(BBStarters, false /*After*/, *LIS);
7040b57cec5SDimitry Andric }
7050b57cec5SDimitry Andric
finalizeSchedule()7060b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() {
707972a253aSDimitry Andric // Start actual scheduling here. This function is called by the base
708972a253aSDimitry Andric // MachineScheduler after all regions have been recorded by
709972a253aSDimitry Andric // GCNScheduleDAGMILive::schedule().
7100b57cec5SDimitry Andric LiveIns.resize(Regions.size());
7110b57cec5SDimitry Andric Pressure.resize(Regions.size());
7125ffd83dbSDimitry Andric RescheduleRegions.resize(Regions.size());
713fe6060f1SDimitry Andric RegionsWithHighRP.resize(Regions.size());
714bdd1243dSDimitry Andric RegionsWithExcessRP.resize(Regions.size());
71581ad6265SDimitry Andric RegionsWithMinOcc.resize(Regions.size());
716bdd1243dSDimitry Andric RegionsWithIGLPInstrs.resize(Regions.size());
7175ffd83dbSDimitry Andric RescheduleRegions.set();
718fe6060f1SDimitry Andric RegionsWithHighRP.reset();
719bdd1243dSDimitry Andric RegionsWithExcessRP.reset();
72081ad6265SDimitry Andric RegionsWithMinOcc.reset();
721bdd1243dSDimitry Andric RegionsWithIGLPInstrs.reset();
7220b57cec5SDimitry Andric
723972a253aSDimitry Andric runSchedStages();
724972a253aSDimitry Andric }
725972a253aSDimitry Andric
runSchedStages()726972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() {
727972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
728972a253aSDimitry Andric
7290b57cec5SDimitry Andric if (!Regions.empty())
7300b57cec5SDimitry Andric BBLiveInMap = getBBLiveInMap();
7310b57cec5SDimitry Andric
732bdd1243dSDimitry Andric GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
733bdd1243dSDimitry Andric while (S.advanceStage()) {
734bdd1243dSDimitry Andric auto Stage = createSchedStage(S.getCurrentStage());
735972a253aSDimitry Andric if (!Stage->initGCNSchedStage())
7365ffd83dbSDimitry Andric continue;
737972a253aSDimitry Andric
738972a253aSDimitry Andric for (auto Region : Regions) {
739972a253aSDimitry Andric RegionBegin = Region.first;
740972a253aSDimitry Andric RegionEnd = Region.second;
741972a253aSDimitry Andric // Setup for scheduling the region and check whether it should be skipped.
742972a253aSDimitry Andric if (!Stage->initGCNRegion()) {
743972a253aSDimitry Andric Stage->advanceRegion();
744972a253aSDimitry Andric exitRegion();
745972a253aSDimitry Andric continue;
7465ffd83dbSDimitry Andric }
7475ffd83dbSDimitry Andric
748972a253aSDimitry Andric ScheduleDAGMILive::schedule();
749972a253aSDimitry Andric Stage->finalizeGCNRegion();
750972a253aSDimitry Andric }
751972a253aSDimitry Andric
752972a253aSDimitry Andric Stage->finalizeGCNSchedStage();
753972a253aSDimitry Andric }
754972a253aSDimitry Andric }
755972a253aSDimitry Andric
756972a253aSDimitry Andric #ifndef NDEBUG
operator <<(raw_ostream & OS,const GCNSchedStageID & StageID)757972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
758972a253aSDimitry Andric switch (StageID) {
759bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule:
760bdd1243dSDimitry Andric OS << "Max Occupancy Initial Schedule";
7610b57cec5SDimitry Andric break;
762bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule:
763bdd1243dSDimitry Andric OS << "Unclustered High Register Pressure Reschedule";
764972a253aSDimitry Andric break;
765972a253aSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule:
766972a253aSDimitry Andric OS << "Clustered Low Occupancy Reschedule";
767972a253aSDimitry Andric break;
768972a253aSDimitry Andric case GCNSchedStageID::PreRARematerialize:
769972a253aSDimitry Andric OS << "Pre-RA Rematerialize";
770972a253aSDimitry Andric break;
771bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule:
772bdd1243dSDimitry Andric OS << "Max ILP Initial Schedule";
773bdd1243dSDimitry Andric break;
774972a253aSDimitry Andric }
775bdd1243dSDimitry Andric
776972a253aSDimitry Andric return OS;
777972a253aSDimitry Andric }
778972a253aSDimitry Andric #endif
779972a253aSDimitry Andric
GCNSchedStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)780972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
781bdd1243dSDimitry Andric : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
782bdd1243dSDimitry Andric MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
783972a253aSDimitry Andric
initGCNSchedStage()784972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() {
785972a253aSDimitry Andric if (!DAG.LIS)
786972a253aSDimitry Andric return false;
787972a253aSDimitry Andric
788972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
789972a253aSDimitry Andric return true;
790972a253aSDimitry Andric }
791972a253aSDimitry Andric
initGCNSchedStage()792bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNSchedStage() {
793bdd1243dSDimitry Andric if (DisableUnclusterHighRP)
794bdd1243dSDimitry Andric return false;
795bdd1243dSDimitry Andric
796972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage())
797972a253aSDimitry Andric return false;
798972a253aSDimitry Andric
799bdd1243dSDimitry Andric if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
800972a253aSDimitry Andric return false;
801972a253aSDimitry Andric
802972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations);
803*0fca6ea1SDimitry Andric DAG.addMutation(
804*0fca6ea1SDimitry Andric createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));
805972a253aSDimitry Andric
806bdd1243dSDimitry Andric InitialOccupancy = DAG.MinOccupancy;
807bdd1243dSDimitry Andric // Aggressivly try to reduce register pressure in the unclustered high RP
808bdd1243dSDimitry Andric // stage. Temporarily increase occupancy target in the region.
809bdd1243dSDimitry Andric S.SGPRLimitBias = S.HighRPSGPRBias;
810bdd1243dSDimitry Andric S.VGPRLimitBias = S.HighRPVGPRBias;
811bdd1243dSDimitry Andric if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
812bdd1243dSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
813bdd1243dSDimitry Andric
814bdd1243dSDimitry Andric LLVM_DEBUG(
815bdd1243dSDimitry Andric dbgs()
816bdd1243dSDimitry Andric << "Retrying function scheduling without clustering. "
817bdd1243dSDimitry Andric "Aggressivly try to reduce register pressure to achieve occupancy "
818bdd1243dSDimitry Andric << DAG.MinOccupancy << ".\n");
819bdd1243dSDimitry Andric
820972a253aSDimitry Andric return true;
821972a253aSDimitry Andric }
822972a253aSDimitry Andric
initGCNSchedStage()823972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() {
8245f757f3fSDimitry Andric if (DisableClusteredLowOccupancy)
8255f757f3fSDimitry Andric return false;
8265f757f3fSDimitry Andric
827972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage())
828972a253aSDimitry Andric return false;
829972a253aSDimitry Andric
830972a253aSDimitry Andric // Don't bother trying to improve ILP in lower RP regions if occupancy has not
831972a253aSDimitry Andric // been dropped. All regions will have already been scheduled with the ideal
832972a253aSDimitry Andric // occupancy targets.
833972a253aSDimitry Andric if (DAG.StartingOccupancy <= DAG.MinOccupancy)
834972a253aSDimitry Andric return false;
8350b57cec5SDimitry Andric
8360b57cec5SDimitry Andric LLVM_DEBUG(
837972a253aSDimitry Andric dbgs() << "Retrying function scheduling with lowest recorded occupancy "
838972a253aSDimitry Andric << DAG.MinOccupancy << ".\n");
839972a253aSDimitry Andric return true;
8400b57cec5SDimitry Andric }
84181ad6265SDimitry Andric
initGCNSchedStage()842972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() {
843972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage())
844972a253aSDimitry Andric return false;
84581ad6265SDimitry Andric
846972a253aSDimitry Andric if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
847972a253aSDimitry Andric return false;
848972a253aSDimitry Andric
84981ad6265SDimitry Andric const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
85081ad6265SDimitry Andric // Check maximum occupancy
85181ad6265SDimitry Andric if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
852972a253aSDimitry Andric DAG.MinOccupancy)
853972a253aSDimitry Andric return false;
85481ad6265SDimitry Andric
85581ad6265SDimitry Andric // FIXME: This pass will invalidate cached MBBLiveIns for regions
85681ad6265SDimitry Andric // inbetween the defs and region we sinked the def to. Cached pressure
85781ad6265SDimitry Andric // for regions where a def is sinked from will also be invalidated. Will
85881ad6265SDimitry Andric // need to be fixed if there is another pass after this pass.
859bdd1243dSDimitry Andric assert(!S.hasNextStage());
86081ad6265SDimitry Andric
86181ad6265SDimitry Andric collectRematerializableInstructions();
86281ad6265SDimitry Andric if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
863972a253aSDimitry Andric return false;
86481ad6265SDimitry Andric
86581ad6265SDimitry Andric LLVM_DEBUG(
86681ad6265SDimitry Andric dbgs() << "Retrying function scheduling with improved occupancy of "
867972a253aSDimitry Andric << DAG.MinOccupancy << " from rematerializing\n");
868972a253aSDimitry Andric return true;
8695ffd83dbSDimitry Andric }
8705ffd83dbSDimitry Andric
finalizeGCNSchedStage()871972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() {
872972a253aSDimitry Andric DAG.finishBlock();
873972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
874e8d8bef9SDimitry Andric }
8755ffd83dbSDimitry Andric
finalizeGCNSchedStage()876bdd1243dSDimitry Andric void UnclusteredHighRPStage::finalizeGCNSchedStage() {
877972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations);
878bdd1243dSDimitry Andric S.SGPRLimitBias = S.VGPRLimitBias = 0;
879bdd1243dSDimitry Andric if (DAG.MinOccupancy > InitialOccupancy) {
880bdd1243dSDimitry Andric for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
881bdd1243dSDimitry Andric DAG.RegionsWithMinOcc[IDX] =
882bdd1243dSDimitry Andric DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
883bdd1243dSDimitry Andric
884bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << StageID
885bdd1243dSDimitry Andric << " stage successfully increased occupancy to "
886bdd1243dSDimitry Andric << DAG.MinOccupancy << '\n');
887bdd1243dSDimitry Andric }
8880b57cec5SDimitry Andric
889972a253aSDimitry Andric GCNSchedStage::finalizeGCNSchedStage();
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric
initGCNRegion()892972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() {
893972a253aSDimitry Andric // Check whether this new region is also a new block.
894972a253aSDimitry Andric if (DAG.RegionBegin->getParent() != CurrentMBB)
895972a253aSDimitry Andric setupNewBlock();
896972a253aSDimitry Andric
897972a253aSDimitry Andric unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
898972a253aSDimitry Andric DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
8990b57cec5SDimitry Andric
9000b57cec5SDimitry Andric // Skip empty scheduling regions (0 or 1 schedulable instructions).
901972a253aSDimitry Andric if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
902972a253aSDimitry Andric return false;
9030b57cec5SDimitry Andric
9040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
905972a253aSDimitry Andric LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
906972a253aSDimitry Andric << " " << CurrentMBB->getName()
907972a253aSDimitry Andric << "\n From: " << *DAG.begin() << " To: ";
908972a253aSDimitry Andric if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
9090b57cec5SDimitry Andric else dbgs() << "End";
9100b57cec5SDimitry Andric dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
9110b57cec5SDimitry Andric
912972a253aSDimitry Andric // Save original instruction order before scheduling for possible revert.
913972a253aSDimitry Andric Unsched.clear();
914972a253aSDimitry Andric Unsched.reserve(DAG.NumRegionInstrs);
915bdd1243dSDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule ||
916bdd1243dSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) {
917bdd1243dSDimitry Andric for (auto &I : DAG) {
918bdd1243dSDimitry Andric Unsched.push_back(&I);
919bdd1243dSDimitry Andric if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
920bdd1243dSDimitry Andric I.getOpcode() == AMDGPU::IGLP_OPT)
921bdd1243dSDimitry Andric DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
922bdd1243dSDimitry Andric }
923bdd1243dSDimitry Andric } else {
924972a253aSDimitry Andric for (auto &I : DAG)
925972a253aSDimitry Andric Unsched.push_back(&I);
926bdd1243dSDimitry Andric }
9270b57cec5SDimitry Andric
928972a253aSDimitry Andric PressureBefore = DAG.Pressure[RegionIdx];
9290b57cec5SDimitry Andric
930972a253aSDimitry Andric LLVM_DEBUG(
931bdd1243dSDimitry Andric dbgs() << "Pressure before scheduling:\nRegion live-ins:"
932bdd1243dSDimitry Andric << print(DAG.LiveIns[RegionIdx], DAG.MRI)
933bdd1243dSDimitry Andric << "Region live-in pressure: "
934bdd1243dSDimitry Andric << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
935bdd1243dSDimitry Andric << "Region register pressure: " << print(PressureBefore));
936972a253aSDimitry Andric
937bdd1243dSDimitry Andric S.HasHighPressure = false;
938bdd1243dSDimitry Andric S.KnownExcessRP = isRegionWithExcessRP();
939bdd1243dSDimitry Andric
940bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
941bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
942bdd1243dSDimitry Andric SavedMutations.clear();
943bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations);
9445f757f3fSDimitry Andric bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
9455f757f3fSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule;
946*0fca6ea1SDimitry Andric DAG.addMutation(createIGroupLPDAGMutation(
947*0fca6ea1SDimitry Andric IsInitialStage ? AMDGPU::SchedulingPhase::Initial
948*0fca6ea1SDimitry Andric : AMDGPU::SchedulingPhase::PreRAReentry));
949bdd1243dSDimitry Andric }
950972a253aSDimitry Andric
951972a253aSDimitry Andric return true;
9520b57cec5SDimitry Andric }
95381ad6265SDimitry Andric
initGCNRegion()954bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNRegion() {
955bdd1243dSDimitry Andric // Only reschedule regions with the minimum occupancy or regions that may have
956bdd1243dSDimitry Andric // spilling (excess register pressure).
957bdd1243dSDimitry Andric if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
958bdd1243dSDimitry Andric DAG.MinOccupancy <= InitialOccupancy) &&
959bdd1243dSDimitry Andric !DAG.RegionsWithExcessRP[RegionIdx])
960972a253aSDimitry Andric return false;
961972a253aSDimitry Andric
962972a253aSDimitry Andric return GCNSchedStage::initGCNRegion();
963972a253aSDimitry Andric }
964972a253aSDimitry Andric
initGCNRegion()965972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() {
966bdd1243dSDimitry Andric // We may need to reschedule this region if it wasn't rescheduled in the last
967bdd1243dSDimitry Andric // stage, or if we found it was testing critical register pressure limits in
968bdd1243dSDimitry Andric // the unclustered reschedule stage. The later is because we may not have been
969bdd1243dSDimitry Andric // able to raise the min occupancy in the previous stage so the region may be
970bdd1243dSDimitry Andric // overly constrained even if it was already rescheduled.
971bdd1243dSDimitry Andric if (!DAG.RegionsWithHighRP[RegionIdx])
972972a253aSDimitry Andric return false;
973972a253aSDimitry Andric
974972a253aSDimitry Andric return GCNSchedStage::initGCNRegion();
975972a253aSDimitry Andric }
976972a253aSDimitry Andric
initGCNRegion()977972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() {
978972a253aSDimitry Andric if (!DAG.RescheduleRegions[RegionIdx])
979972a253aSDimitry Andric return false;
980972a253aSDimitry Andric
981972a253aSDimitry Andric return GCNSchedStage::initGCNRegion();
982972a253aSDimitry Andric }
983972a253aSDimitry Andric
setupNewBlock()984972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() {
985972a253aSDimitry Andric if (CurrentMBB)
986972a253aSDimitry Andric DAG.finishBlock();
987972a253aSDimitry Andric
988972a253aSDimitry Andric CurrentMBB = DAG.RegionBegin->getParent();
989972a253aSDimitry Andric DAG.startBlock(CurrentMBB);
990972a253aSDimitry Andric // Get real RP for the region if it hasn't be calculated before. After the
991972a253aSDimitry Andric // initial schedule stage real RP will be collected after scheduling.
99206c3fb27SDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule ||
99306c3fb27SDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule)
994972a253aSDimitry Andric DAG.computeBlockPressure(RegionIdx, CurrentMBB);
995972a253aSDimitry Andric }
996972a253aSDimitry Andric
finalizeGCNRegion()997972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() {
998bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
999972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = false;
1000bdd1243dSDimitry Andric if (S.HasHighPressure)
1001972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true;
1002972a253aSDimitry Andric
1003972a253aSDimitry Andric // Revert scheduling if we have dropped occupancy or there is some other
1004972a253aSDimitry Andric // reason that the original schedule is better.
1005972a253aSDimitry Andric checkScheduling();
1006972a253aSDimitry Andric
1007bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
1008bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule)
1009bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations);
1010bdd1243dSDimitry Andric
1011972a253aSDimitry Andric DAG.exitRegion();
1012972a253aSDimitry Andric RegionIdx++;
1013972a253aSDimitry Andric }
1014972a253aSDimitry Andric
checkScheduling()1015972a253aSDimitry Andric void GCNSchedStage::checkScheduling() {
1016972a253aSDimitry Andric // Check the results of scheduling.
1017972a253aSDimitry Andric PressureAfter = DAG.getRealRegPressure(RegionIdx);
1018bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
1019bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
1020972a253aSDimitry Andric
1021972a253aSDimitry Andric if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
1022972a253aSDimitry Andric PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
1023972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter;
1024972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] =
1025972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1026972a253aSDimitry Andric
1027*0fca6ea1SDimitry Andric // Early out if we have achieved the occupancy target.
1028972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
1029972a253aSDimitry Andric return;
1030972a253aSDimitry Andric }
1031972a253aSDimitry Andric
1032bdd1243dSDimitry Andric unsigned TargetOccupancy =
1033bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF));
1034972a253aSDimitry Andric unsigned WavesAfter =
1035bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
1036972a253aSDimitry Andric unsigned WavesBefore =
1037bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
1038972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
1039972a253aSDimitry Andric << ", after " << WavesAfter << ".\n");
1040972a253aSDimitry Andric
1041972a253aSDimitry Andric // We may not be able to keep the current target occupancy because of the just
1042972a253aSDimitry Andric // scheduled region. We might still be able to revert scheduling if the
1043972a253aSDimitry Andric // occupancy before was higher, or if the current schedule has register
1044972a253aSDimitry Andric // pressure higher than the excess limits which could lead to more spilling.
1045972a253aSDimitry Andric unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1046972a253aSDimitry Andric
1047972a253aSDimitry Andric // Allow memory bound functions to drop to 4 waves if not limited by an
1048972a253aSDimitry Andric // attribute.
1049972a253aSDimitry Andric if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
1050972a253aSDimitry Andric WavesAfter >= MFI.getMinAllowedOccupancy()) {
1051972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
1052972a253aSDimitry Andric << MFI.getMinAllowedOccupancy() << " waves\n");
1053972a253aSDimitry Andric NewOccupancy = WavesAfter;
1054972a253aSDimitry Andric }
1055972a253aSDimitry Andric
1056972a253aSDimitry Andric if (NewOccupancy < DAG.MinOccupancy) {
1057972a253aSDimitry Andric DAG.MinOccupancy = NewOccupancy;
1058972a253aSDimitry Andric MFI.limitOccupancy(DAG.MinOccupancy);
1059972a253aSDimitry Andric DAG.RegionsWithMinOcc.reset();
1060972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
1061972a253aSDimitry Andric << DAG.MinOccupancy << ".\n");
1062972a253aSDimitry Andric }
1063*0fca6ea1SDimitry Andric // The maximum number of arch VGPR on non-unified register file, or the
1064*0fca6ea1SDimitry Andric // maximum VGPR + AGPR in the unified register file case.
1065972a253aSDimitry Andric unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
1066*0fca6ea1SDimitry Andric // The maximum number of arch VGPR for both unified and non-unified register
1067*0fca6ea1SDimitry Andric // file.
1068*0fca6ea1SDimitry Andric unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
1069972a253aSDimitry Andric unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
1070*0fca6ea1SDimitry Andric
1071*0fca6ea1SDimitry Andric if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
1072*0fca6ea1SDimitry Andric PressureAfter.getVGPRNum(false) > MaxArchVGPRs ||
1073*0fca6ea1SDimitry Andric PressureAfter.getAGPRNum() > MaxArchVGPRs ||
1074972a253aSDimitry Andric PressureAfter.getSGPRNum() > MaxSGPRs) {
1075972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = true;
1076972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true;
1077bdd1243dSDimitry Andric DAG.RegionsWithExcessRP[RegionIdx] = true;
1078972a253aSDimitry Andric }
1079972a253aSDimitry Andric
1080972a253aSDimitry Andric // Revert if this region's schedule would cause a drop in occupancy or
1081972a253aSDimitry Andric // spilling.
1082972a253aSDimitry Andric if (shouldRevertScheduling(WavesAfter)) {
1083972a253aSDimitry Andric revertScheduling();
1084972a253aSDimitry Andric } else {
1085972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter;
1086972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] =
1087972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1088972a253aSDimitry Andric }
1089972a253aSDimitry Andric }
1090972a253aSDimitry Andric
1091bdd1243dSDimitry Andric unsigned
computeSUnitReadyCycle(const SUnit & SU,unsigned CurrCycle,DenseMap<unsigned,unsigned> & ReadyCycles,const TargetSchedModel & SM)1092bdd1243dSDimitry Andric GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
1093bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> &ReadyCycles,
1094bdd1243dSDimitry Andric const TargetSchedModel &SM) {
1095bdd1243dSDimitry Andric unsigned ReadyCycle = CurrCycle;
1096bdd1243dSDimitry Andric for (auto &D : SU.Preds) {
1097bdd1243dSDimitry Andric if (D.isAssignedRegDep()) {
1098bdd1243dSDimitry Andric MachineInstr *DefMI = D.getSUnit()->getInstr();
1099bdd1243dSDimitry Andric unsigned Latency = SM.computeInstrLatency(DefMI);
1100bdd1243dSDimitry Andric unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
1101bdd1243dSDimitry Andric ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1102bdd1243dSDimitry Andric }
1103bdd1243dSDimitry Andric }
1104bdd1243dSDimitry Andric ReadyCycles[SU.NodeNum] = ReadyCycle;
1105bdd1243dSDimitry Andric return ReadyCycle;
1106bdd1243dSDimitry Andric }
1107bdd1243dSDimitry Andric
1108bdd1243dSDimitry Andric #ifndef NDEBUG
1109bdd1243dSDimitry Andric struct EarlierIssuingCycle {
operator ()EarlierIssuingCycle1110bdd1243dSDimitry Andric bool operator()(std::pair<MachineInstr *, unsigned> A,
1111bdd1243dSDimitry Andric std::pair<MachineInstr *, unsigned> B) const {
1112bdd1243dSDimitry Andric return A.second < B.second;
1113bdd1243dSDimitry Andric }
1114bdd1243dSDimitry Andric };
1115bdd1243dSDimitry Andric
printScheduleModel(std::set<std::pair<MachineInstr *,unsigned>,EarlierIssuingCycle> & ReadyCycles)1116bdd1243dSDimitry Andric static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1117bdd1243dSDimitry Andric EarlierIssuingCycle> &ReadyCycles) {
1118bdd1243dSDimitry Andric if (ReadyCycles.empty())
1119bdd1243dSDimitry Andric return;
1120bdd1243dSDimitry Andric unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1121bdd1243dSDimitry Andric dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1122bdd1243dSDimitry Andric << " ##################\n# Cycle #\t\t\tInstruction "
1123bdd1243dSDimitry Andric " "
1124bdd1243dSDimitry Andric " \n";
1125bdd1243dSDimitry Andric unsigned IPrev = 1;
1126bdd1243dSDimitry Andric for (auto &I : ReadyCycles) {
1127bdd1243dSDimitry Andric if (I.second > IPrev + 1)
1128bdd1243dSDimitry Andric dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1129bdd1243dSDimitry Andric << " CYCLES DETECTED ******************************\n\n";
1130bdd1243dSDimitry Andric dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1131bdd1243dSDimitry Andric IPrev = I.second;
1132bdd1243dSDimitry Andric }
1133bdd1243dSDimitry Andric }
1134bdd1243dSDimitry Andric #endif
1135bdd1243dSDimitry Andric
1136bdd1243dSDimitry Andric ScheduleMetrics
getScheduleMetrics(const std::vector<SUnit> & InputSchedule)1137bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1138bdd1243dSDimitry Andric #ifndef NDEBUG
1139bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1140bdd1243dSDimitry Andric ReadyCyclesSorted;
1141bdd1243dSDimitry Andric #endif
1142bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1143bdd1243dSDimitry Andric unsigned SumBubbles = 0;
1144bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles;
1145bdd1243dSDimitry Andric unsigned CurrCycle = 0;
1146bdd1243dSDimitry Andric for (auto &SU : InputSchedule) {
1147bdd1243dSDimitry Andric unsigned ReadyCycle =
1148bdd1243dSDimitry Andric computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1149bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle;
1150bdd1243dSDimitry Andric #ifndef NDEBUG
1151bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1152bdd1243dSDimitry Andric #endif
1153bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle;
1154bdd1243dSDimitry Andric }
1155bdd1243dSDimitry Andric #ifndef NDEBUG
1156bdd1243dSDimitry Andric LLVM_DEBUG(
1157bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted);
1158bdd1243dSDimitry Andric dbgs() << "\n\t"
1159bdd1243dSDimitry Andric << "Metric: "
1160bdd1243dSDimitry Andric << (SumBubbles
1161bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1162bdd1243dSDimitry Andric : 1)
1163bdd1243dSDimitry Andric << "\n\n");
1164bdd1243dSDimitry Andric #endif
1165bdd1243dSDimitry Andric
1166bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles);
1167bdd1243dSDimitry Andric }
1168bdd1243dSDimitry Andric
1169bdd1243dSDimitry Andric ScheduleMetrics
getScheduleMetrics(const GCNScheduleDAGMILive & DAG)1170bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
1171bdd1243dSDimitry Andric #ifndef NDEBUG
1172bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1173bdd1243dSDimitry Andric ReadyCyclesSorted;
1174bdd1243dSDimitry Andric #endif
1175bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1176bdd1243dSDimitry Andric unsigned SumBubbles = 0;
1177bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles;
1178bdd1243dSDimitry Andric unsigned CurrCycle = 0;
1179bdd1243dSDimitry Andric for (auto &MI : DAG) {
1180bdd1243dSDimitry Andric SUnit *SU = DAG.getSUnit(&MI);
1181bdd1243dSDimitry Andric if (!SU)
1182bdd1243dSDimitry Andric continue;
1183bdd1243dSDimitry Andric unsigned ReadyCycle =
1184bdd1243dSDimitry Andric computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1185bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle;
1186bdd1243dSDimitry Andric #ifndef NDEBUG
1187bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1188bdd1243dSDimitry Andric #endif
1189bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle;
1190bdd1243dSDimitry Andric }
1191bdd1243dSDimitry Andric #ifndef NDEBUG
1192bdd1243dSDimitry Andric LLVM_DEBUG(
1193bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted);
1194bdd1243dSDimitry Andric dbgs() << "\n\t"
1195bdd1243dSDimitry Andric << "Metric: "
1196bdd1243dSDimitry Andric << (SumBubbles
1197bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1198bdd1243dSDimitry Andric : 1)
1199bdd1243dSDimitry Andric << "\n\n");
1200bdd1243dSDimitry Andric #endif
1201bdd1243dSDimitry Andric
1202bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles);
1203bdd1243dSDimitry Andric }
1204bdd1243dSDimitry Andric
shouldRevertScheduling(unsigned WavesAfter)1205972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1206972a253aSDimitry Andric if (WavesAfter < DAG.MinOccupancy)
1207972a253aSDimitry Andric return true;
1208972a253aSDimitry Andric
1209972a253aSDimitry Andric return false;
1210972a253aSDimitry Andric }
1211972a253aSDimitry Andric
shouldRevertScheduling(unsigned WavesAfter)1212bdd1243dSDimitry Andric bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1213bdd1243dSDimitry Andric if (PressureAfter == PressureBefore)
1214bdd1243dSDimitry Andric return false;
1215bdd1243dSDimitry Andric
1216972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1217972a253aSDimitry Andric return true;
1218972a253aSDimitry Andric
1219972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter))
1220972a253aSDimitry Andric return true;
1221972a253aSDimitry Andric
1222972a253aSDimitry Andric return false;
1223972a253aSDimitry Andric }
1224972a253aSDimitry Andric
shouldRevertScheduling(unsigned WavesAfter)1225bdd1243dSDimitry Andric bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
12265f757f3fSDimitry Andric // If RP is not reduced in the unclustered reschedule stage, revert to the
1227bdd1243dSDimitry Andric // old schedule.
1228bdd1243dSDimitry Andric if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1229bdd1243dSDimitry Andric mayCauseSpilling(WavesAfter)) ||
1230bdd1243dSDimitry Andric GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
1231972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1232972a253aSDimitry Andric return true;
1233972a253aSDimitry Andric }
1234972a253aSDimitry Andric
123506c3fb27SDimitry Andric // Do not attempt to relax schedule even more if we are already spilling.
123606c3fb27SDimitry Andric if (isRegionWithExcessRP())
123706c3fb27SDimitry Andric return false;
123806c3fb27SDimitry Andric
1239bdd1243dSDimitry Andric LLVM_DEBUG(
1240bdd1243dSDimitry Andric dbgs()
1241bdd1243dSDimitry Andric << "\n\t *** In shouldRevertScheduling ***\n"
1242bdd1243dSDimitry Andric << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1243bdd1243dSDimitry Andric ScheduleMetrics MBefore =
1244bdd1243dSDimitry Andric getScheduleMetrics(DAG.SUnits);
1245bdd1243dSDimitry Andric LLVM_DEBUG(
1246bdd1243dSDimitry Andric dbgs()
1247bdd1243dSDimitry Andric << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1248bdd1243dSDimitry Andric ScheduleMetrics MAfter = getScheduleMetrics(DAG);
1249bdd1243dSDimitry Andric unsigned OldMetric = MBefore.getMetric();
1250bdd1243dSDimitry Andric unsigned NewMetric = MAfter.getMetric();
1251bdd1243dSDimitry Andric unsigned WavesBefore =
1252bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
1253bdd1243dSDimitry Andric unsigned Profit =
1254bdd1243dSDimitry Andric ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1255bdd1243dSDimitry Andric ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1256bdd1243dSDimitry Andric NewMetric) /
1257bdd1243dSDimitry Andric ScheduleMetrics::ScaleFactor;
1258bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1259bdd1243dSDimitry Andric << MAfter << "Profit: " << Profit << "\n");
1260bdd1243dSDimitry Andric return Profit < ScheduleMetrics::ScaleFactor;
1261972a253aSDimitry Andric }
1262972a253aSDimitry Andric
shouldRevertScheduling(unsigned WavesAfter)1263972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
1264bdd1243dSDimitry Andric if (PressureAfter == PressureBefore)
1265bdd1243dSDimitry Andric return false;
1266bdd1243dSDimitry Andric
1267972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1268972a253aSDimitry Andric return true;
1269972a253aSDimitry Andric
1270972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter))
1271972a253aSDimitry Andric return true;
1272972a253aSDimitry Andric
1273972a253aSDimitry Andric return false;
1274972a253aSDimitry Andric }
1275972a253aSDimitry Andric
shouldRevertScheduling(unsigned WavesAfter)1276972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
1277972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1278972a253aSDimitry Andric return true;
1279972a253aSDimitry Andric
1280972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter))
1281972a253aSDimitry Andric return true;
1282972a253aSDimitry Andric
1283972a253aSDimitry Andric return false;
1284972a253aSDimitry Andric }
1285972a253aSDimitry Andric
shouldRevertScheduling(unsigned WavesAfter)1286bdd1243dSDimitry Andric bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1287bdd1243dSDimitry Andric if (mayCauseSpilling(WavesAfter))
1288bdd1243dSDimitry Andric return true;
1289bdd1243dSDimitry Andric
1290bdd1243dSDimitry Andric return false;
1291bdd1243dSDimitry Andric }
1292bdd1243dSDimitry Andric
mayCauseSpilling(unsigned WavesAfter)1293972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1294*0fca6ea1SDimitry Andric if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1295*0fca6ea1SDimitry Andric !PressureAfter.less(MF, PressureBefore)) {
1296972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1297972a253aSDimitry Andric return true;
1298972a253aSDimitry Andric }
1299972a253aSDimitry Andric
1300972a253aSDimitry Andric return false;
1301972a253aSDimitry Andric }
1302972a253aSDimitry Andric
revertScheduling()1303972a253aSDimitry Andric void GCNSchedStage::revertScheduling() {
1304972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] =
1305972a253aSDimitry Andric PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1306972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1307972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] =
1308bdd1243dSDimitry Andric S.hasNextStage() &&
1309bdd1243dSDimitry Andric S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
1310972a253aSDimitry Andric DAG.RegionEnd = DAG.RegionBegin;
1311972a253aSDimitry Andric int SkippedDebugInstr = 0;
1312972a253aSDimitry Andric for (MachineInstr *MI : Unsched) {
1313972a253aSDimitry Andric if (MI->isDebugInstr()) {
1314972a253aSDimitry Andric ++SkippedDebugInstr;
1315972a253aSDimitry Andric continue;
1316972a253aSDimitry Andric }
1317972a253aSDimitry Andric
1318972a253aSDimitry Andric if (MI->getIterator() != DAG.RegionEnd) {
1319972a253aSDimitry Andric DAG.BB->remove(MI);
1320972a253aSDimitry Andric DAG.BB->insert(DAG.RegionEnd, MI);
1321972a253aSDimitry Andric if (!MI->isDebugInstr())
1322972a253aSDimitry Andric DAG.LIS->handleMove(*MI, true);
1323972a253aSDimitry Andric }
1324972a253aSDimitry Andric
1325972a253aSDimitry Andric // Reset read-undef flags and update them later.
132606c3fb27SDimitry Andric for (auto &Op : MI->all_defs())
1327972a253aSDimitry Andric Op.setIsUndef(false);
1328972a253aSDimitry Andric RegisterOperands RegOpers;
1329972a253aSDimitry Andric RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1330972a253aSDimitry Andric if (!MI->isDebugInstr()) {
1331972a253aSDimitry Andric if (DAG.ShouldTrackLaneMasks) {
1332972a253aSDimitry Andric // Adjust liveness and add missing dead+read-undef flags.
1333972a253aSDimitry Andric SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1334972a253aSDimitry Andric RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1335972a253aSDimitry Andric } else {
1336972a253aSDimitry Andric // Adjust for missing dead-def flags.
1337972a253aSDimitry Andric RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1338972a253aSDimitry Andric }
1339972a253aSDimitry Andric }
1340972a253aSDimitry Andric DAG.RegionEnd = MI->getIterator();
1341972a253aSDimitry Andric ++DAG.RegionEnd;
1342972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1343972a253aSDimitry Andric }
1344972a253aSDimitry Andric
1345972a253aSDimitry Andric // After reverting schedule, debug instrs will now be at the end of the block
1346972a253aSDimitry Andric // and RegionEnd will point to the first debug instr. Increment RegionEnd
1347972a253aSDimitry Andric // pass debug instrs to the actual end of the scheduling region.
1348972a253aSDimitry Andric while (SkippedDebugInstr-- > 0)
1349972a253aSDimitry Andric ++DAG.RegionEnd;
1350972a253aSDimitry Andric
1351972a253aSDimitry Andric // If Unsched.front() instruction is a debug instruction, this will actually
1352972a253aSDimitry Andric // shrink the region since we moved all debug instructions to the end of the
1353972a253aSDimitry Andric // block. Find the first instruction that is not a debug instruction.
1354972a253aSDimitry Andric DAG.RegionBegin = Unsched.front()->getIterator();
1355972a253aSDimitry Andric if (DAG.RegionBegin->isDebugInstr()) {
1356972a253aSDimitry Andric for (MachineInstr *MI : Unsched) {
1357972a253aSDimitry Andric if (MI->isDebugInstr())
1358972a253aSDimitry Andric continue;
1359972a253aSDimitry Andric DAG.RegionBegin = MI->getIterator();
1360972a253aSDimitry Andric break;
1361972a253aSDimitry Andric }
1362972a253aSDimitry Andric }
1363972a253aSDimitry Andric
1364972a253aSDimitry Andric // Then move the debug instructions back into their correct place and set
1365972a253aSDimitry Andric // RegionBegin and RegionEnd if needed.
1366972a253aSDimitry Andric DAG.placeDebugValues();
1367972a253aSDimitry Andric
1368bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1369972a253aSDimitry Andric }
1370972a253aSDimitry Andric
collectRematerializableInstructions()1371972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() {
1372972a253aSDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1373972a253aSDimitry Andric for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
137481ad6265SDimitry Andric Register Reg = Register::index2VirtReg(I);
1375972a253aSDimitry Andric if (!DAG.LIS->hasInterval(Reg))
137681ad6265SDimitry Andric continue;
137781ad6265SDimitry Andric
137881ad6265SDimitry Andric // TODO: Handle AGPR and SGPR rematerialization
1379972a253aSDimitry Andric if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1380972a253aSDimitry Andric !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
138181ad6265SDimitry Andric continue;
138281ad6265SDimitry Andric
1383972a253aSDimitry Andric MachineOperand *Op = DAG.MRI.getOneDef(Reg);
138481ad6265SDimitry Andric MachineInstr *Def = Op->getParent();
1385fcaf7f86SDimitry Andric if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
138681ad6265SDimitry Andric continue;
138781ad6265SDimitry Andric
1388972a253aSDimitry Andric MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
138981ad6265SDimitry Andric if (Def->getParent() == UseI->getParent())
139081ad6265SDimitry Andric continue;
139181ad6265SDimitry Andric
139281ad6265SDimitry Andric // We are only collecting defs that are defined in another block and are
139381ad6265SDimitry Andric // live-through or used inside regions at MinOccupancy. This means that the
139481ad6265SDimitry Andric // register must be in the live-in set for the region.
139581ad6265SDimitry Andric bool AddedToRematList = false;
1396972a253aSDimitry Andric for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1397972a253aSDimitry Andric auto It = DAG.LiveIns[I].find(Reg);
1398972a253aSDimitry Andric if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1399972a253aSDimitry Andric if (DAG.RegionsWithMinOcc[I]) {
140081ad6265SDimitry Andric RematerializableInsts[I][Def] = UseI;
140181ad6265SDimitry Andric AddedToRematList = true;
140281ad6265SDimitry Andric }
140381ad6265SDimitry Andric
140481ad6265SDimitry Andric // Collect regions with rematerializable reg as live-in to avoid
140581ad6265SDimitry Andric // searching later when updating RP.
140681ad6265SDimitry Andric RematDefToLiveInRegions[Def].push_back(I);
140781ad6265SDimitry Andric }
140881ad6265SDimitry Andric }
140981ad6265SDimitry Andric if (!AddedToRematList)
141081ad6265SDimitry Andric RematDefToLiveInRegions.erase(Def);
141181ad6265SDimitry Andric }
141281ad6265SDimitry Andric }
141381ad6265SDimitry Andric
sinkTriviallyRematInsts(const GCNSubtarget & ST,const TargetInstrInfo * TII)1414972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
141581ad6265SDimitry Andric const TargetInstrInfo *TII) {
141681ad6265SDimitry Andric // Temporary copies of cached variables we will be modifying and replacing if
141781ad6265SDimitry Andric // sinking succeeds.
141881ad6265SDimitry Andric SmallVector<
141981ad6265SDimitry Andric std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
142081ad6265SDimitry Andric NewRegions;
142181ad6265SDimitry Andric DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
142281ad6265SDimitry Andric DenseMap<unsigned, GCNRegPressure> NewPressure;
142381ad6265SDimitry Andric BitVector NewRescheduleRegions;
1424972a253aSDimitry Andric LiveIntervals *LIS = DAG.LIS;
142581ad6265SDimitry Andric
1426972a253aSDimitry Andric NewRegions.resize(DAG.Regions.size());
1427972a253aSDimitry Andric NewRescheduleRegions.resize(DAG.Regions.size());
142881ad6265SDimitry Andric
142981ad6265SDimitry Andric // Collect only regions that has a rematerializable def as a live-in.
143081ad6265SDimitry Andric SmallSet<unsigned, 16> ImpactedRegions;
143181ad6265SDimitry Andric for (const auto &It : RematDefToLiveInRegions)
143281ad6265SDimitry Andric ImpactedRegions.insert(It.second.begin(), It.second.end());
143381ad6265SDimitry Andric
143481ad6265SDimitry Andric // Make copies of register pressure and live-ins cache that will be updated
143581ad6265SDimitry Andric // as we rematerialize.
143681ad6265SDimitry Andric for (auto Idx : ImpactedRegions) {
1437972a253aSDimitry Andric NewPressure[Idx] = DAG.Pressure[Idx];
1438972a253aSDimitry Andric NewLiveIns[Idx] = DAG.LiveIns[Idx];
143981ad6265SDimitry Andric }
1440972a253aSDimitry Andric NewRegions = DAG.Regions;
144181ad6265SDimitry Andric NewRescheduleRegions.reset();
144281ad6265SDimitry Andric
144381ad6265SDimitry Andric DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
144481ad6265SDimitry Andric bool Improved = false;
144581ad6265SDimitry Andric for (auto I : ImpactedRegions) {
1446972a253aSDimitry Andric if (!DAG.RegionsWithMinOcc[I])
144781ad6265SDimitry Andric continue;
144881ad6265SDimitry Andric
144981ad6265SDimitry Andric Improved = false;
145081ad6265SDimitry Andric int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
145181ad6265SDimitry Andric int SGPRUsage = NewPressure[I].getSGPRNum();
145281ad6265SDimitry Andric
145381ad6265SDimitry Andric // TODO: Handle occupancy drop due to AGPR and SGPR.
145481ad6265SDimitry Andric // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1455972a253aSDimitry Andric if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
145681ad6265SDimitry Andric break;
145781ad6265SDimitry Andric
145881ad6265SDimitry Andric // The occupancy of this region could have been improved by a previous
145981ad6265SDimitry Andric // iteration's sinking of defs.
1460972a253aSDimitry Andric if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
146181ad6265SDimitry Andric NewRescheduleRegions[I] = true;
146281ad6265SDimitry Andric Improved = true;
146381ad6265SDimitry Andric continue;
146481ad6265SDimitry Andric }
146581ad6265SDimitry Andric
146681ad6265SDimitry Andric // First check if we have enough trivially rematerializable instructions to
146781ad6265SDimitry Andric // improve occupancy. Optimistically assume all instructions we are able to
146881ad6265SDimitry Andric // sink decreased RP.
146981ad6265SDimitry Andric int TotalSinkableRegs = 0;
147081ad6265SDimitry Andric for (const auto &It : RematerializableInsts[I]) {
147181ad6265SDimitry Andric MachineInstr *Def = It.first;
147281ad6265SDimitry Andric Register DefReg = Def->getOperand(0).getReg();
147381ad6265SDimitry Andric TotalSinkableRegs +=
147481ad6265SDimitry Andric SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
147581ad6265SDimitry Andric }
147681ad6265SDimitry Andric int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
147781ad6265SDimitry Andric unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
147881ad6265SDimitry Andric // If in the most optimistic scenario, we cannot improve occupancy, then do
147981ad6265SDimitry Andric // not attempt to sink any instructions.
1480972a253aSDimitry Andric if (OptimisticOccupancy <= DAG.MinOccupancy)
148181ad6265SDimitry Andric break;
148281ad6265SDimitry Andric
148381ad6265SDimitry Andric unsigned ImproveOccupancy = 0;
148481ad6265SDimitry Andric SmallVector<MachineInstr *, 4> SinkedDefs;
148581ad6265SDimitry Andric for (auto &It : RematerializableInsts[I]) {
148681ad6265SDimitry Andric MachineInstr *Def = It.first;
148781ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos =
148881ad6265SDimitry Andric MachineBasicBlock::iterator(It.second);
148981ad6265SDimitry Andric Register Reg = Def->getOperand(0).getReg();
149081ad6265SDimitry Andric // Rematerialize MI to its use block. Since we are only rematerializing
149181ad6265SDimitry Andric // instructions that do not have any virtual reg uses, we do not need to
149281ad6265SDimitry Andric // call LiveRangeEdit::allUsesAvailableAt() and
149381ad6265SDimitry Andric // LiveRangeEdit::canRematerializeAt().
149481ad6265SDimitry Andric TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1495972a253aSDimitry Andric Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1496bdd1243dSDimitry Andric MachineInstr *NewMI = &*std::prev(InsertPos);
149781ad6265SDimitry Andric LIS->InsertMachineInstrInMaps(*NewMI);
149881ad6265SDimitry Andric LIS->removeInterval(Reg);
149981ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg);
150081ad6265SDimitry Andric InsertedMIToOldDef[NewMI] = Def;
150181ad6265SDimitry Andric
150281ad6265SDimitry Andric // Update region boundaries in scheduling region we sinked from since we
150381ad6265SDimitry Andric // may sink an instruction that was at the beginning or end of its region
1504972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
150581ad6265SDimitry Andric /*Removing =*/true);
150681ad6265SDimitry Andric
150781ad6265SDimitry Andric // Update region boundaries in region we sinked to.
1508972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
150981ad6265SDimitry Andric
151081ad6265SDimitry Andric LaneBitmask PrevMask = NewLiveIns[I][Reg];
151181ad6265SDimitry Andric // FIXME: Also update cached pressure for where the def was sinked from.
151281ad6265SDimitry Andric // Update RP for all regions that has this reg as a live-in and remove
151381ad6265SDimitry Andric // the reg from all regions as a live-in.
151481ad6265SDimitry Andric for (auto Idx : RematDefToLiveInRegions[Def]) {
151581ad6265SDimitry Andric NewLiveIns[Idx].erase(Reg);
1516972a253aSDimitry Andric if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
151781ad6265SDimitry Andric // Def is live-through and not used in this block.
1518972a253aSDimitry Andric NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
151981ad6265SDimitry Andric } else {
152081ad6265SDimitry Andric // Def is used and rematerialized into this block.
152181ad6265SDimitry Andric GCNDownwardRPTracker RPT(*LIS);
152281ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(
152381ad6265SDimitry Andric NewRegions[Idx].first, NewRegions[Idx].second);
152481ad6265SDimitry Andric RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
152581ad6265SDimitry Andric RPT.advance(NewRegions[Idx].second);
152681ad6265SDimitry Andric NewPressure[Idx] = RPT.moveMaxPressure();
152781ad6265SDimitry Andric }
152881ad6265SDimitry Andric }
152981ad6265SDimitry Andric
153081ad6265SDimitry Andric SinkedDefs.push_back(Def);
153181ad6265SDimitry Andric ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1532972a253aSDimitry Andric if (ImproveOccupancy > DAG.MinOccupancy)
153381ad6265SDimitry Andric break;
153481ad6265SDimitry Andric }
153581ad6265SDimitry Andric
153681ad6265SDimitry Andric // Remove defs we just sinked from all regions' list of sinkable defs
153781ad6265SDimitry Andric for (auto &Def : SinkedDefs)
153881ad6265SDimitry Andric for (auto TrackedIdx : RematDefToLiveInRegions[Def])
153981ad6265SDimitry Andric RematerializableInsts[TrackedIdx].erase(Def);
154081ad6265SDimitry Andric
1541972a253aSDimitry Andric if (ImproveOccupancy <= DAG.MinOccupancy)
154281ad6265SDimitry Andric break;
154381ad6265SDimitry Andric
154481ad6265SDimitry Andric NewRescheduleRegions[I] = true;
154581ad6265SDimitry Andric Improved = true;
154681ad6265SDimitry Andric }
154781ad6265SDimitry Andric
154881ad6265SDimitry Andric if (!Improved) {
154981ad6265SDimitry Andric // Occupancy was not improved for all regions that were at MinOccupancy.
155081ad6265SDimitry Andric // Undo sinking and remove newly rematerialized instructions.
155181ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) {
155281ad6265SDimitry Andric MachineInstr *MI = Entry.first;
155381ad6265SDimitry Andric MachineInstr *OldMI = Entry.second;
155481ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg();
155581ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI);
155681ad6265SDimitry Andric MI->eraseFromParent();
155781ad6265SDimitry Andric OldMI->clearRegisterDeads(Reg);
155881ad6265SDimitry Andric LIS->removeInterval(Reg);
155981ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg);
156081ad6265SDimitry Andric }
156181ad6265SDimitry Andric return false;
156281ad6265SDimitry Andric }
156381ad6265SDimitry Andric
156481ad6265SDimitry Andric // Occupancy was improved for all regions.
156581ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) {
156681ad6265SDimitry Andric MachineInstr *MI = Entry.first;
156781ad6265SDimitry Andric MachineInstr *OldMI = Entry.second;
156881ad6265SDimitry Andric
156981ad6265SDimitry Andric // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1570972a253aSDimitry Andric DAG.BBLiveInMap.erase(OldMI);
157181ad6265SDimitry Andric
157281ad6265SDimitry Andric // Remove OldMI and update LIS
157381ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg();
157481ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*OldMI);
157581ad6265SDimitry Andric OldMI->eraseFromParent();
157681ad6265SDimitry Andric LIS->removeInterval(Reg);
157781ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg);
157881ad6265SDimitry Andric }
157981ad6265SDimitry Andric
158081ad6265SDimitry Andric // Update live-ins, register pressure, and regions caches.
158181ad6265SDimitry Andric for (auto Idx : ImpactedRegions) {
1582972a253aSDimitry Andric DAG.LiveIns[Idx] = NewLiveIns[Idx];
1583972a253aSDimitry Andric DAG.Pressure[Idx] = NewPressure[Idx];
1584972a253aSDimitry Andric DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
158581ad6265SDimitry Andric }
1586972a253aSDimitry Andric DAG.Regions = NewRegions;
1587972a253aSDimitry Andric DAG.RescheduleRegions = NewRescheduleRegions;
158881ad6265SDimitry Andric
158981ad6265SDimitry Andric SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1590972a253aSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
159181ad6265SDimitry Andric
159281ad6265SDimitry Andric return true;
159381ad6265SDimitry Andric }
159481ad6265SDimitry Andric
159581ad6265SDimitry Andric // Copied from MachineLICM
isTriviallyReMaterializable(const MachineInstr & MI)1596972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1597972a253aSDimitry Andric if (!DAG.TII->isTriviallyReMaterializable(MI))
159881ad6265SDimitry Andric return false;
159981ad6265SDimitry Andric
160006c3fb27SDimitry Andric for (const MachineOperand &MO : MI.all_uses())
160106c3fb27SDimitry Andric if (MO.getReg().isVirtual())
160281ad6265SDimitry Andric return false;
160381ad6265SDimitry Andric
160481ad6265SDimitry Andric return true;
160581ad6265SDimitry Andric }
160681ad6265SDimitry Andric
160781ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region.
160881ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front
160981ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only
161081ad6265SDimitry Andric // ever be inserting before an already existing MI.
updateRegionBoundaries(SmallVectorImpl<std::pair<MachineBasicBlock::iterator,MachineBasicBlock::iterator>> & RegionBoundaries,MachineBasicBlock::iterator MI,MachineInstr * NewMI,bool Removing)161181ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries(
161281ad6265SDimitry Andric SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
161381ad6265SDimitry Andric MachineBasicBlock::iterator>> &RegionBoundaries,
161481ad6265SDimitry Andric MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
161581ad6265SDimitry Andric unsigned I = 0, E = RegionBoundaries.size();
161681ad6265SDimitry Andric // Search for first region of the block where MI is located
161781ad6265SDimitry Andric while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
161881ad6265SDimitry Andric ++I;
161981ad6265SDimitry Andric
162081ad6265SDimitry Andric for (; I != E; ++I) {
162181ad6265SDimitry Andric if (MI->getParent() != RegionBoundaries[I].first->getParent())
162281ad6265SDimitry Andric return;
162381ad6265SDimitry Andric
162481ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].first &&
162581ad6265SDimitry Andric MI == RegionBoundaries[I].second) {
162681ad6265SDimitry Andric // MI is in a region with size 1, after removing, the region will be
162781ad6265SDimitry Andric // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
162881ad6265SDimitry Andric RegionBoundaries[I] =
1629bdd1243dSDimitry Andric std::pair(MI->getParent()->end(), MI->getParent()->end());
163081ad6265SDimitry Andric return;
163181ad6265SDimitry Andric }
163281ad6265SDimitry Andric if (MI == RegionBoundaries[I].first) {
163381ad6265SDimitry Andric if (Removing)
163481ad6265SDimitry Andric RegionBoundaries[I] =
1635bdd1243dSDimitry Andric std::pair(std::next(MI), RegionBoundaries[I].second);
163681ad6265SDimitry Andric else
163781ad6265SDimitry Andric // Inserted NewMI in front of region, set new RegionBegin to NewMI
1638bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
163981ad6265SDimitry Andric RegionBoundaries[I].second);
164081ad6265SDimitry Andric return;
164181ad6265SDimitry Andric }
164281ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].second) {
1643bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
164481ad6265SDimitry Andric return;
164581ad6265SDimitry Andric }
164681ad6265SDimitry Andric }
164781ad6265SDimitry Andric }
1648bdd1243dSDimitry Andric
hasIGLPInstrs(ScheduleDAGInstrs * DAG)1649bdd1243dSDimitry Andric static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1650bdd1243dSDimitry Andric return std::any_of(
1651bdd1243dSDimitry Andric DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1652bdd1243dSDimitry Andric unsigned Opc = MI->getOpcode();
1653bdd1243dSDimitry Andric return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1654bdd1243dSDimitry Andric });
1655bdd1243dSDimitry Andric }
1656bdd1243dSDimitry Andric
GCNPostScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)1657bdd1243dSDimitry Andric GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1658bdd1243dSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1659bdd1243dSDimitry Andric bool RemoveKillFlags)
1660bdd1243dSDimitry Andric : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1661bdd1243dSDimitry Andric
schedule()1662bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::schedule() {
1663bdd1243dSDimitry Andric HasIGLPInstrs = hasIGLPInstrs(this);
1664bdd1243dSDimitry Andric if (HasIGLPInstrs) {
1665bdd1243dSDimitry Andric SavedMutations.clear();
1666bdd1243dSDimitry Andric SavedMutations.swap(Mutations);
1667*0fca6ea1SDimitry Andric addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
1668bdd1243dSDimitry Andric }
1669bdd1243dSDimitry Andric
1670bdd1243dSDimitry Andric ScheduleDAGMI::schedule();
1671bdd1243dSDimitry Andric }
1672bdd1243dSDimitry Andric
finalizeSchedule()1673bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::finalizeSchedule() {
1674bdd1243dSDimitry Andric if (HasIGLPInstrs)
1675bdd1243dSDimitry Andric SavedMutations.swap(Mutations);
1676bdd1243dSDimitry Andric
1677bdd1243dSDimitry Andric ScheduleDAGMI::finalizeSchedule();
1678bdd1243dSDimitry Andric }
1679