1*0b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric /// \file 10*0b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave 11*0b57cec5SDimitry Andric /// occupancy on GCN hardware. 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "GCNSchedStrategy.h" 15*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 16*0b57cec5SDimitry Andric #include "SIInstrInfo.h" 17*0b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 18*0b57cec5SDimitry Andric #include "SIRegisterInfo.h" 19*0b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 20*0b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 21*0b57cec5SDimitry Andric 22*0b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 23*0b57cec5SDimitry Andric 24*0b57cec5SDimitry Andric using namespace llvm; 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 27*0b57cec5SDimitry Andric const MachineSchedContext *C) : 28*0b57cec5SDimitry Andric GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } 29*0b57cec5SDimitry Andric 30*0b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 31*0b57cec5SDimitry Andric GenericScheduler::initialize(DAG); 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric MF = &DAG->MF; 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric // FIXME: This is also necessary, because some passes that run after 40*0b57cec5SDimitry Andric // scheduling and before regalloc increase register pressure. 41*0b57cec5SDimitry Andric const int ErrorMargin = 3; 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric SGPRExcessLimit = Context->RegClassInfo 44*0b57cec5SDimitry Andric ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; 45*0b57cec5SDimitry Andric VGPRExcessLimit = Context->RegClassInfo 46*0b57cec5SDimitry Andric ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; 47*0b57cec5SDimitry Andric if (TargetOccupancy) { 48*0b57cec5SDimitry Andric SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); 49*0b57cec5SDimitry Andric VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); 50*0b57cec5SDimitry Andric } else { 51*0b57cec5SDimitry Andric SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 52*0b57cec5SDimitry Andric SRI->getSGPRPressureSet()); 53*0b57cec5SDimitry Andric VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 54*0b57cec5SDimitry Andric SRI->getVGPRPressureSet()); 55*0b57cec5SDimitry Andric } 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric SGPRCriticalLimit -= ErrorMargin; 58*0b57cec5SDimitry Andric VGPRCriticalLimit -= ErrorMargin; 59*0b57cec5SDimitry Andric } 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 62*0b57cec5SDimitry Andric bool AtTop, const RegPressureTracker &RPTracker, 63*0b57cec5SDimitry Andric const SIRegisterInfo *SRI, 64*0b57cec5SDimitry Andric unsigned SGPRPressure, 65*0b57cec5SDimitry Andric unsigned VGPRPressure) { 66*0b57cec5SDimitry Andric 67*0b57cec5SDimitry Andric Cand.SU = SU; 68*0b57cec5SDimitry Andric Cand.AtTop = AtTop; 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric // getDownwardPressure() and getUpwardPressure() make temporary changes to 71*0b57cec5SDimitry Andric // the tracker, so we need to pass those function a non-const copy. 72*0b57cec5SDimitry Andric RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 73*0b57cec5SDimitry Andric 74*0b57cec5SDimitry Andric std::vector<unsigned> Pressure; 75*0b57cec5SDimitry Andric std::vector<unsigned> MaxPressure; 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric if (AtTop) 78*0b57cec5SDimitry Andric TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 79*0b57cec5SDimitry Andric else { 80*0b57cec5SDimitry Andric // FIXME: I think for bottom up scheduling, the register pressure is cached 81*0b57cec5SDimitry Andric // and can be retrieved by DAG->getPressureDif(SU). 82*0b57cec5SDimitry Andric TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 83*0b57cec5SDimitry Andric } 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 86*0b57cec5SDimitry Andric unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 87*0b57cec5SDimitry Andric 88*0b57cec5SDimitry Andric // If two instructions increase the pressure of different register sets 89*0b57cec5SDimitry Andric // by the same amount, the generic scheduler will prefer to schedule the 90*0b57cec5SDimitry Andric // instruction that increases the set with the least amount of registers, 91*0b57cec5SDimitry Andric // which in our case would be SGPRs. This is rarely what we want, so 92*0b57cec5SDimitry Andric // when we report excess/critical register pressure, we do it either 93*0b57cec5SDimitry Andric // only for VGPRs or only for SGPRs. 94*0b57cec5SDimitry Andric 95*0b57cec5SDimitry Andric // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 96*0b57cec5SDimitry Andric const unsigned MaxVGPRPressureInc = 16; 97*0b57cec5SDimitry Andric bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 98*0b57cec5SDimitry Andric bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 102*0b57cec5SDimitry Andric // to increase the likelihood we don't go over the limits. We should improve 103*0b57cec5SDimitry Andric // the analysis to look through dependencies to find the path with the least 104*0b57cec5SDimitry Andric // register pressure. 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric // We only need to update the RPDelata for instructions that increase 107*0b57cec5SDimitry Andric // register pressure. Instructions that decrease or keep reg pressure 108*0b57cec5SDimitry Andric // the same will be marked as RegExcess in tryCandidate() when they 109*0b57cec5SDimitry Andric // are compared with instructions that increase the register pressure. 110*0b57cec5SDimitry Andric if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 111*0b57cec5SDimitry Andric Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); 112*0b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 113*0b57cec5SDimitry Andric } 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 116*0b57cec5SDimitry Andric Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); 117*0b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 118*0b57cec5SDimitry Andric } 119*0b57cec5SDimitry Andric 120*0b57cec5SDimitry Andric // Register pressure is considered 'CRITICAL' if it is approaching a value 121*0b57cec5SDimitry Andric // that would reduce the wave occupancy for the execution unit. When 122*0b57cec5SDimitry Andric // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 123*0b57cec5SDimitry Andric // has the same cost, so we don't need to prefer one over the other. 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 126*0b57cec5SDimitry Andric int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 127*0b57cec5SDimitry Andric 128*0b57cec5SDimitry Andric if (SGPRDelta >= 0 || VGPRDelta >= 0) { 129*0b57cec5SDimitry Andric if (SGPRDelta > VGPRDelta) { 130*0b57cec5SDimitry Andric Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); 131*0b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 132*0b57cec5SDimitry Andric } else { 133*0b57cec5SDimitry Andric Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); 134*0b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 135*0b57cec5SDimitry Andric } 136*0b57cec5SDimitry Andric } 137*0b57cec5SDimitry Andric } 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric // This function is mostly cut and pasted from 140*0b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue() 141*0b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 142*0b57cec5SDimitry Andric const CandPolicy &ZonePolicy, 143*0b57cec5SDimitry Andric const RegPressureTracker &RPTracker, 144*0b57cec5SDimitry Andric SchedCandidate &Cand) { 145*0b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 146*0b57cec5SDimitry Andric ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 147*0b57cec5SDimitry Andric unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; 148*0b57cec5SDimitry Andric unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; 149*0b57cec5SDimitry Andric ReadyQueue &Q = Zone.Available; 150*0b57cec5SDimitry Andric for (SUnit *SU : Q) { 151*0b57cec5SDimitry Andric 152*0b57cec5SDimitry Andric SchedCandidate TryCand(ZonePolicy); 153*0b57cec5SDimitry Andric initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 154*0b57cec5SDimitry Andric SGPRPressure, VGPRPressure); 155*0b57cec5SDimitry Andric // Pass SchedBoundary only when comparing nodes from the same boundary. 156*0b57cec5SDimitry Andric SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 157*0b57cec5SDimitry Andric GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 158*0b57cec5SDimitry Andric if (TryCand.Reason != NoCand) { 159*0b57cec5SDimitry Andric // Initialize resource delta if needed in case future heuristics query it. 160*0b57cec5SDimitry Andric if (TryCand.ResDelta == SchedResourceDelta()) 161*0b57cec5SDimitry Andric TryCand.initResourceDelta(Zone.DAG, SchedModel); 162*0b57cec5SDimitry Andric Cand.setBest(TryCand); 163*0b57cec5SDimitry Andric } 164*0b57cec5SDimitry Andric } 165*0b57cec5SDimitry Andric } 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric // This function is mostly cut and pasted from 168*0b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional() 169*0b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 170*0b57cec5SDimitry Andric // Schedule as far as possible in the direction of no choice. This is most 171*0b57cec5SDimitry Andric // efficient, but also provides the best heuristics for CriticalPSets. 172*0b57cec5SDimitry Andric if (SUnit *SU = Bot.pickOnlyChoice()) { 173*0b57cec5SDimitry Andric IsTopNode = false; 174*0b57cec5SDimitry Andric return SU; 175*0b57cec5SDimitry Andric } 176*0b57cec5SDimitry Andric if (SUnit *SU = Top.pickOnlyChoice()) { 177*0b57cec5SDimitry Andric IsTopNode = true; 178*0b57cec5SDimitry Andric return SU; 179*0b57cec5SDimitry Andric } 180*0b57cec5SDimitry Andric // Set the bottom-up policy based on the state of the current bottom zone and 181*0b57cec5SDimitry Andric // the instructions outside the zone, including the top zone. 182*0b57cec5SDimitry Andric CandPolicy BotPolicy; 183*0b57cec5SDimitry Andric setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 184*0b57cec5SDimitry Andric // Set the top-down policy based on the state of the current top zone and 185*0b57cec5SDimitry Andric // the instructions outside the zone, including the bottom zone. 186*0b57cec5SDimitry Andric CandPolicy TopPolicy; 187*0b57cec5SDimitry Andric setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 188*0b57cec5SDimitry Andric 189*0b57cec5SDimitry Andric // See if BotCand is still valid (because we previously scheduled from Top). 190*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 191*0b57cec5SDimitry Andric if (!BotCand.isValid() || BotCand.SU->isScheduled || 192*0b57cec5SDimitry Andric BotCand.Policy != BotPolicy) { 193*0b57cec5SDimitry Andric BotCand.reset(CandPolicy()); 194*0b57cec5SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 195*0b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 196*0b57cec5SDimitry Andric } else { 197*0b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(BotCand)); 198*0b57cec5SDimitry Andric } 199*0b57cec5SDimitry Andric 200*0b57cec5SDimitry Andric // Check if the top Q has a better candidate. 201*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 202*0b57cec5SDimitry Andric if (!TopCand.isValid() || TopCand.SU->isScheduled || 203*0b57cec5SDimitry Andric TopCand.Policy != TopPolicy) { 204*0b57cec5SDimitry Andric TopCand.reset(CandPolicy()); 205*0b57cec5SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 206*0b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 207*0b57cec5SDimitry Andric } else { 208*0b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(TopCand)); 209*0b57cec5SDimitry Andric } 210*0b57cec5SDimitry Andric 211*0b57cec5SDimitry Andric // Pick best from BotCand and TopCand. 212*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 213*0b57cec5SDimitry Andric dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 214*0b57cec5SDimitry Andric SchedCandidate Cand; 215*0b57cec5SDimitry Andric if (TopCand.Reason == BotCand.Reason) { 216*0b57cec5SDimitry Andric Cand = BotCand; 217*0b57cec5SDimitry Andric GenericSchedulerBase::CandReason TopReason = TopCand.Reason; 218*0b57cec5SDimitry Andric TopCand.Reason = NoCand; 219*0b57cec5SDimitry Andric GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 220*0b57cec5SDimitry Andric if (TopCand.Reason != NoCand) { 221*0b57cec5SDimitry Andric Cand.setBest(TopCand); 222*0b57cec5SDimitry Andric } else { 223*0b57cec5SDimitry Andric TopCand.Reason = TopReason; 224*0b57cec5SDimitry Andric } 225*0b57cec5SDimitry Andric } else { 226*0b57cec5SDimitry Andric if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { 227*0b57cec5SDimitry Andric Cand = TopCand; 228*0b57cec5SDimitry Andric } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { 229*0b57cec5SDimitry Andric Cand = BotCand; 230*0b57cec5SDimitry Andric } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 231*0b57cec5SDimitry Andric Cand = TopCand; 232*0b57cec5SDimitry Andric } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { 233*0b57cec5SDimitry Andric Cand = BotCand; 234*0b57cec5SDimitry Andric } else { 235*0b57cec5SDimitry Andric if (BotCand.Reason > TopCand.Reason) { 236*0b57cec5SDimitry Andric Cand = TopCand; 237*0b57cec5SDimitry Andric } else { 238*0b57cec5SDimitry Andric Cand = BotCand; 239*0b57cec5SDimitry Andric } 240*0b57cec5SDimitry Andric } 241*0b57cec5SDimitry Andric } 242*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 243*0b57cec5SDimitry Andric 244*0b57cec5SDimitry Andric IsTopNode = Cand.AtTop; 245*0b57cec5SDimitry Andric return Cand.SU; 246*0b57cec5SDimitry Andric } 247*0b57cec5SDimitry Andric 248*0b57cec5SDimitry Andric // This function is mostly cut and pasted from 249*0b57cec5SDimitry Andric // GenericScheduler::pickNode() 250*0b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 251*0b57cec5SDimitry Andric if (DAG->top() == DAG->bottom()) { 252*0b57cec5SDimitry Andric assert(Top.Available.empty() && Top.Pending.empty() && 253*0b57cec5SDimitry Andric Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 254*0b57cec5SDimitry Andric return nullptr; 255*0b57cec5SDimitry Andric } 256*0b57cec5SDimitry Andric SUnit *SU; 257*0b57cec5SDimitry Andric do { 258*0b57cec5SDimitry Andric if (RegionPolicy.OnlyTopDown) { 259*0b57cec5SDimitry Andric SU = Top.pickOnlyChoice(); 260*0b57cec5SDimitry Andric if (!SU) { 261*0b57cec5SDimitry Andric CandPolicy NoPolicy; 262*0b57cec5SDimitry Andric TopCand.reset(NoPolicy); 263*0b57cec5SDimitry Andric pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 264*0b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find a candidate"); 265*0b57cec5SDimitry Andric SU = TopCand.SU; 266*0b57cec5SDimitry Andric } 267*0b57cec5SDimitry Andric IsTopNode = true; 268*0b57cec5SDimitry Andric } else if (RegionPolicy.OnlyBottomUp) { 269*0b57cec5SDimitry Andric SU = Bot.pickOnlyChoice(); 270*0b57cec5SDimitry Andric if (!SU) { 271*0b57cec5SDimitry Andric CandPolicy NoPolicy; 272*0b57cec5SDimitry Andric BotCand.reset(NoPolicy); 273*0b57cec5SDimitry Andric pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 274*0b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find a candidate"); 275*0b57cec5SDimitry Andric SU = BotCand.SU; 276*0b57cec5SDimitry Andric } 277*0b57cec5SDimitry Andric IsTopNode = false; 278*0b57cec5SDimitry Andric } else { 279*0b57cec5SDimitry Andric SU = pickNodeBidirectional(IsTopNode); 280*0b57cec5SDimitry Andric } 281*0b57cec5SDimitry Andric } while (SU->isScheduled); 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric if (SU->isTopReady()) 284*0b57cec5SDimitry Andric Top.removeReady(SU); 285*0b57cec5SDimitry Andric if (SU->isBottomReady()) 286*0b57cec5SDimitry Andric Bot.removeReady(SU); 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 289*0b57cec5SDimitry Andric << *SU->getInstr()); 290*0b57cec5SDimitry Andric return SU; 291*0b57cec5SDimitry Andric } 292*0b57cec5SDimitry Andric 293*0b57cec5SDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 294*0b57cec5SDimitry Andric std::unique_ptr<MachineSchedStrategy> S) : 295*0b57cec5SDimitry Andric ScheduleDAGMILive(C, std::move(S)), 296*0b57cec5SDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), 297*0b57cec5SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()), 298*0b57cec5SDimitry Andric StartingOccupancy(MFI.getOccupancy()), 299*0b57cec5SDimitry Andric MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { 300*0b57cec5SDimitry Andric 301*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 302*0b57cec5SDimitry Andric } 303*0b57cec5SDimitry Andric 304*0b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() { 305*0b57cec5SDimitry Andric if (Stage == 0) { 306*0b57cec5SDimitry Andric // Just record regions at the first pass. 307*0b57cec5SDimitry Andric Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 308*0b57cec5SDimitry Andric return; 309*0b57cec5SDimitry Andric } 310*0b57cec5SDimitry Andric 311*0b57cec5SDimitry Andric std::vector<MachineInstr*> Unsched; 312*0b57cec5SDimitry Andric Unsched.reserve(NumRegionInstrs); 313*0b57cec5SDimitry Andric for (auto &I : *this) { 314*0b57cec5SDimitry Andric Unsched.push_back(&I); 315*0b57cec5SDimitry Andric } 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric GCNRegPressure PressureBefore; 318*0b57cec5SDimitry Andric if (LIS) { 319*0b57cec5SDimitry Andric PressureBefore = Pressure[RegionIdx]; 320*0b57cec5SDimitry Andric 321*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 322*0b57cec5SDimitry Andric GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 323*0b57cec5SDimitry Andric dbgs() << "Region live-in pressure: "; 324*0b57cec5SDimitry Andric llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 325*0b57cec5SDimitry Andric dbgs() << "Region register pressure: "; 326*0b57cec5SDimitry Andric PressureBefore.print(dbgs())); 327*0b57cec5SDimitry Andric } 328*0b57cec5SDimitry Andric 329*0b57cec5SDimitry Andric ScheduleDAGMILive::schedule(); 330*0b57cec5SDimitry Andric Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric if (!LIS) 333*0b57cec5SDimitry Andric return; 334*0b57cec5SDimitry Andric 335*0b57cec5SDimitry Andric // Check the results of scheduling. 336*0b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 337*0b57cec5SDimitry Andric auto PressureAfter = getRealRegPressure(); 338*0b57cec5SDimitry Andric 339*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 340*0b57cec5SDimitry Andric PressureAfter.print(dbgs())); 341*0b57cec5SDimitry Andric 342*0b57cec5SDimitry Andric if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 343*0b57cec5SDimitry Andric PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { 344*0b57cec5SDimitry Andric Pressure[RegionIdx] = PressureAfter; 345*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 346*0b57cec5SDimitry Andric return; 347*0b57cec5SDimitry Andric } 348*0b57cec5SDimitry Andric unsigned Occ = MFI.getOccupancy(); 349*0b57cec5SDimitry Andric unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST)); 350*0b57cec5SDimitry Andric unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST)); 351*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 352*0b57cec5SDimitry Andric << ", after " << WavesAfter << ".\n"); 353*0b57cec5SDimitry Andric 354*0b57cec5SDimitry Andric // We could not keep current target occupancy because of the just scheduled 355*0b57cec5SDimitry Andric // region. Record new occupancy for next scheduling cycle. 356*0b57cec5SDimitry Andric unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 357*0b57cec5SDimitry Andric // Allow memory bound functions to drop to 4 waves if not limited by an 358*0b57cec5SDimitry Andric // attribute. 359*0b57cec5SDimitry Andric if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 360*0b57cec5SDimitry Andric WavesAfter >= MFI.getMinAllowedOccupancy()) { 361*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 362*0b57cec5SDimitry Andric << MFI.getMinAllowedOccupancy() << " waves\n"); 363*0b57cec5SDimitry Andric NewOccupancy = WavesAfter; 364*0b57cec5SDimitry Andric } 365*0b57cec5SDimitry Andric if (NewOccupancy < MinOccupancy) { 366*0b57cec5SDimitry Andric MinOccupancy = NewOccupancy; 367*0b57cec5SDimitry Andric MFI.limitOccupancy(MinOccupancy); 368*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 369*0b57cec5SDimitry Andric << MinOccupancy << ".\n"); 370*0b57cec5SDimitry Andric } 371*0b57cec5SDimitry Andric 372*0b57cec5SDimitry Andric if (WavesAfter >= MinOccupancy) { 373*0b57cec5SDimitry Andric Pressure[RegionIdx] = PressureAfter; 374*0b57cec5SDimitry Andric return; 375*0b57cec5SDimitry Andric } 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 378*0b57cec5SDimitry Andric RegionEnd = RegionBegin; 379*0b57cec5SDimitry Andric for (MachineInstr *MI : Unsched) { 380*0b57cec5SDimitry Andric if (MI->isDebugInstr()) 381*0b57cec5SDimitry Andric continue; 382*0b57cec5SDimitry Andric 383*0b57cec5SDimitry Andric if (MI->getIterator() != RegionEnd) { 384*0b57cec5SDimitry Andric BB->remove(MI); 385*0b57cec5SDimitry Andric BB->insert(RegionEnd, MI); 386*0b57cec5SDimitry Andric if (!MI->isDebugInstr()) 387*0b57cec5SDimitry Andric LIS->handleMove(*MI, true); 388*0b57cec5SDimitry Andric } 389*0b57cec5SDimitry Andric // Reset read-undef flags and update them later. 390*0b57cec5SDimitry Andric for (auto &Op : MI->operands()) 391*0b57cec5SDimitry Andric if (Op.isReg() && Op.isDef()) 392*0b57cec5SDimitry Andric Op.setIsUndef(false); 393*0b57cec5SDimitry Andric RegisterOperands RegOpers; 394*0b57cec5SDimitry Andric RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 395*0b57cec5SDimitry Andric if (!MI->isDebugInstr()) { 396*0b57cec5SDimitry Andric if (ShouldTrackLaneMasks) { 397*0b57cec5SDimitry Andric // Adjust liveness and add missing dead+read-undef flags. 398*0b57cec5SDimitry Andric SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 399*0b57cec5SDimitry Andric RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 400*0b57cec5SDimitry Andric } else { 401*0b57cec5SDimitry Andric // Adjust for missing dead-def flags. 402*0b57cec5SDimitry Andric RegOpers.detectDeadDefs(*MI, *LIS); 403*0b57cec5SDimitry Andric } 404*0b57cec5SDimitry Andric } 405*0b57cec5SDimitry Andric RegionEnd = MI->getIterator(); 406*0b57cec5SDimitry Andric ++RegionEnd; 407*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 408*0b57cec5SDimitry Andric } 409*0b57cec5SDimitry Andric RegionBegin = Unsched.front()->getIterator(); 410*0b57cec5SDimitry Andric Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 411*0b57cec5SDimitry Andric 412*0b57cec5SDimitry Andric placeDebugValues(); 413*0b57cec5SDimitry Andric } 414*0b57cec5SDimitry Andric 415*0b57cec5SDimitry Andric GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 416*0b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 417*0b57cec5SDimitry Andric RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 418*0b57cec5SDimitry Andric return RPTracker.moveMaxPressure(); 419*0b57cec5SDimitry Andric } 420*0b57cec5SDimitry Andric 421*0b57cec5SDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 422*0b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 423*0b57cec5SDimitry Andric 424*0b57cec5SDimitry Andric // If the block has the only successor then live-ins of that successor are 425*0b57cec5SDimitry Andric // live-outs of the current block. We can reuse calculated live set if the 426*0b57cec5SDimitry Andric // successor will be sent to scheduling past current block. 427*0b57cec5SDimitry Andric const MachineBasicBlock *OnlySucc = nullptr; 428*0b57cec5SDimitry Andric if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 429*0b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes(); 430*0b57cec5SDimitry Andric if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 431*0b57cec5SDimitry Andric OnlySucc = *MBB->succ_begin(); 432*0b57cec5SDimitry Andric } 433*0b57cec5SDimitry Andric 434*0b57cec5SDimitry Andric // Scheduler sends regions from the end of the block upwards. 435*0b57cec5SDimitry Andric size_t CurRegion = RegionIdx; 436*0b57cec5SDimitry Andric for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 437*0b57cec5SDimitry Andric if (Regions[CurRegion].first->getParent() != MBB) 438*0b57cec5SDimitry Andric break; 439*0b57cec5SDimitry Andric --CurRegion; 440*0b57cec5SDimitry Andric 441*0b57cec5SDimitry Andric auto I = MBB->begin(); 442*0b57cec5SDimitry Andric auto LiveInIt = MBBLiveIns.find(MBB); 443*0b57cec5SDimitry Andric if (LiveInIt != MBBLiveIns.end()) { 444*0b57cec5SDimitry Andric auto LiveIn = std::move(LiveInIt->second); 445*0b57cec5SDimitry Andric RPTracker.reset(*MBB->begin(), &LiveIn); 446*0b57cec5SDimitry Andric MBBLiveIns.erase(LiveInIt); 447*0b57cec5SDimitry Andric } else { 448*0b57cec5SDimitry Andric auto &Rgn = Regions[CurRegion]; 449*0b57cec5SDimitry Andric I = Rgn.first; 450*0b57cec5SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 451*0b57cec5SDimitry Andric auto LRS = BBLiveInMap.lookup(NonDbgMI); 452*0b57cec5SDimitry Andric assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 453*0b57cec5SDimitry Andric RPTracker.reset(*I, &LRS); 454*0b57cec5SDimitry Andric } 455*0b57cec5SDimitry Andric 456*0b57cec5SDimitry Andric for ( ; ; ) { 457*0b57cec5SDimitry Andric I = RPTracker.getNext(); 458*0b57cec5SDimitry Andric 459*0b57cec5SDimitry Andric if (Regions[CurRegion].first == I) { 460*0b57cec5SDimitry Andric LiveIns[CurRegion] = RPTracker.getLiveRegs(); 461*0b57cec5SDimitry Andric RPTracker.clearMaxPressure(); 462*0b57cec5SDimitry Andric } 463*0b57cec5SDimitry Andric 464*0b57cec5SDimitry Andric if (Regions[CurRegion].second == I) { 465*0b57cec5SDimitry Andric Pressure[CurRegion] = RPTracker.moveMaxPressure(); 466*0b57cec5SDimitry Andric if (CurRegion-- == RegionIdx) 467*0b57cec5SDimitry Andric break; 468*0b57cec5SDimitry Andric } 469*0b57cec5SDimitry Andric RPTracker.advanceToNext(); 470*0b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 471*0b57cec5SDimitry Andric } 472*0b57cec5SDimitry Andric 473*0b57cec5SDimitry Andric if (OnlySucc) { 474*0b57cec5SDimitry Andric if (I != MBB->end()) { 475*0b57cec5SDimitry Andric RPTracker.advanceToNext(); 476*0b57cec5SDimitry Andric RPTracker.advance(MBB->end()); 477*0b57cec5SDimitry Andric } 478*0b57cec5SDimitry Andric RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 479*0b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 480*0b57cec5SDimitry Andric MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 481*0b57cec5SDimitry Andric } 482*0b57cec5SDimitry Andric } 483*0b57cec5SDimitry Andric 484*0b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 485*0b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const { 486*0b57cec5SDimitry Andric assert(!Regions.empty()); 487*0b57cec5SDimitry Andric std::vector<MachineInstr *> BBStarters; 488*0b57cec5SDimitry Andric BBStarters.reserve(Regions.size()); 489*0b57cec5SDimitry Andric auto I = Regions.rbegin(), E = Regions.rend(); 490*0b57cec5SDimitry Andric auto *BB = I->first->getParent(); 491*0b57cec5SDimitry Andric do { 492*0b57cec5SDimitry Andric auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 493*0b57cec5SDimitry Andric BBStarters.push_back(MI); 494*0b57cec5SDimitry Andric do { 495*0b57cec5SDimitry Andric ++I; 496*0b57cec5SDimitry Andric } while (I != E && I->first->getParent() == BB); 497*0b57cec5SDimitry Andric } while (I != E); 498*0b57cec5SDimitry Andric return getLiveRegMap(BBStarters, false /*After*/, *LIS); 499*0b57cec5SDimitry Andric } 500*0b57cec5SDimitry Andric 501*0b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() { 502*0b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 503*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 504*0b57cec5SDimitry Andric 505*0b57cec5SDimitry Andric LiveIns.resize(Regions.size()); 506*0b57cec5SDimitry Andric Pressure.resize(Regions.size()); 507*0b57cec5SDimitry Andric 508*0b57cec5SDimitry Andric if (!Regions.empty()) 509*0b57cec5SDimitry Andric BBLiveInMap = getBBLiveInMap(); 510*0b57cec5SDimitry Andric 511*0b57cec5SDimitry Andric do { 512*0b57cec5SDimitry Andric Stage++; 513*0b57cec5SDimitry Andric RegionIdx = 0; 514*0b57cec5SDimitry Andric MachineBasicBlock *MBB = nullptr; 515*0b57cec5SDimitry Andric 516*0b57cec5SDimitry Andric if (Stage > 1) { 517*0b57cec5SDimitry Andric // Retry function scheduling if we found resulting occupancy and it is 518*0b57cec5SDimitry Andric // lower than used for first pass scheduling. This will give more freedom 519*0b57cec5SDimitry Andric // to schedule low register pressure blocks. 520*0b57cec5SDimitry Andric // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 521*0b57cec5SDimitry Andric 522*0b57cec5SDimitry Andric if (!LIS || StartingOccupancy <= MinOccupancy) 523*0b57cec5SDimitry Andric break; 524*0b57cec5SDimitry Andric 525*0b57cec5SDimitry Andric LLVM_DEBUG( 526*0b57cec5SDimitry Andric dbgs() 527*0b57cec5SDimitry Andric << "Retrying function scheduling with lowest recorded occupancy " 528*0b57cec5SDimitry Andric << MinOccupancy << ".\n"); 529*0b57cec5SDimitry Andric 530*0b57cec5SDimitry Andric S.setTargetOccupancy(MinOccupancy); 531*0b57cec5SDimitry Andric } 532*0b57cec5SDimitry Andric 533*0b57cec5SDimitry Andric for (auto Region : Regions) { 534*0b57cec5SDimitry Andric RegionBegin = Region.first; 535*0b57cec5SDimitry Andric RegionEnd = Region.second; 536*0b57cec5SDimitry Andric 537*0b57cec5SDimitry Andric if (RegionBegin->getParent() != MBB) { 538*0b57cec5SDimitry Andric if (MBB) finishBlock(); 539*0b57cec5SDimitry Andric MBB = RegionBegin->getParent(); 540*0b57cec5SDimitry Andric startBlock(MBB); 541*0b57cec5SDimitry Andric if (Stage == 1) 542*0b57cec5SDimitry Andric computeBlockPressure(MBB); 543*0b57cec5SDimitry Andric } 544*0b57cec5SDimitry Andric 545*0b57cec5SDimitry Andric unsigned NumRegionInstrs = std::distance(begin(), end()); 546*0b57cec5SDimitry Andric enterRegion(MBB, begin(), end(), NumRegionInstrs); 547*0b57cec5SDimitry Andric 548*0b57cec5SDimitry Andric // Skip empty scheduling regions (0 or 1 schedulable instructions). 549*0b57cec5SDimitry Andric if (begin() == end() || begin() == std::prev(end())) { 550*0b57cec5SDimitry Andric exitRegion(); 551*0b57cec5SDimitry Andric continue; 552*0b57cec5SDimitry Andric } 553*0b57cec5SDimitry Andric 554*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 555*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 556*0b57cec5SDimitry Andric << MBB->getName() << "\n From: " << *begin() 557*0b57cec5SDimitry Andric << " To: "; 558*0b57cec5SDimitry Andric if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 559*0b57cec5SDimitry Andric else dbgs() << "End"; 560*0b57cec5SDimitry Andric dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 561*0b57cec5SDimitry Andric 562*0b57cec5SDimitry Andric schedule(); 563*0b57cec5SDimitry Andric 564*0b57cec5SDimitry Andric exitRegion(); 565*0b57cec5SDimitry Andric ++RegionIdx; 566*0b57cec5SDimitry Andric } 567*0b57cec5SDimitry Andric finishBlock(); 568*0b57cec5SDimitry Andric 569*0b57cec5SDimitry Andric } while (Stage < 2); 570*0b57cec5SDimitry Andric } 571