10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave 110b57cec5SDimitry Andric /// occupancy on GCN hardware. 12972a253aSDimitry Andric /// 13972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function. 14972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual 15972a253aSDimitry Andric /// entry point for the scheduling of those regions is 16972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages. 17972a253aSDimitry Andric 18972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account 19972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy. Usually, only a 20972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit 21972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in 22972a253aSDimitry Andric /// other regions. 23972a253aSDimitry Andric /// 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #include "GCNSchedStrategy.h" 27bdd1243dSDimitry Andric #include "AMDGPUIGroupLP.h" 280b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 2981ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric using namespace llvm; 340b57cec5SDimitry Andric 35bdd1243dSDimitry Andric static cl::opt<bool> 36bdd1243dSDimitry Andric DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule", 37bdd1243dSDimitry Andric cl::Hidden, 38bdd1243dSDimitry Andric cl::desc("Disable unclustred high register pressure " 39bdd1243dSDimitry Andric "reduction scheduling stage."), 40bdd1243dSDimitry Andric cl::init(false)); 41bdd1243dSDimitry Andric static cl::opt<unsigned> ScheduleMetricBias( 42bdd1243dSDimitry Andric "amdgpu-schedule-metric-bias", cl::Hidden, 43bdd1243dSDimitry Andric cl::desc( 44bdd1243dSDimitry Andric "Sets the bias which adds weight to occupancy vs latency. Set it to " 45bdd1243dSDimitry Andric "100 to chase the occupancy only."), 46bdd1243dSDimitry Andric cl::init(10)); 470b57cec5SDimitry Andric 48*06c3fb27SDimitry Andric static cl::opt<bool> 49*06c3fb27SDimitry Andric RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, 50*06c3fb27SDimitry Andric cl::desc("Relax occupancy targets for kernels which are memory " 51*06c3fb27SDimitry Andric "bound (amdgpu-membound-threshold), or " 52*06c3fb27SDimitry Andric "Wave Limited (amdgpu-limit-wave-threshold)."), 53*06c3fb27SDimitry Andric cl::init(false)); 54*06c3fb27SDimitry Andric 55bdd1243dSDimitry Andric const unsigned ScheduleMetrics::ScaleFactor = 100; 56bdd1243dSDimitry Andric 57bdd1243dSDimitry Andric GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) 58bdd1243dSDimitry Andric : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), 59bdd1243dSDimitry Andric HasHighPressure(false) {} 60bdd1243dSDimitry Andric 61bdd1243dSDimitry Andric void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { 620b57cec5SDimitry Andric GenericScheduler::initialize(DAG); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric MF = &DAG->MF; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 670b57cec5SDimitry Andric 68349cc55cSDimitry Andric SGPRExcessLimit = 69349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 70349cc55cSDimitry Andric VGPRExcessLimit = 71349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 720b57cec5SDimitry Andric 73349cc55cSDimitry Andric SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 74349cc55cSDimitry Andric // Set the initial TargetOccupnacy to the maximum occupancy that we can 75349cc55cSDimitry Andric // achieve for this function. This effectively sets a lower bound on the 76349cc55cSDimitry Andric // 'Critical' register limits in the scheduler. 77*06c3fb27SDimitry Andric // Allow for lower occupancy targets if kernel is wave limited or memory 78*06c3fb27SDimitry Andric // bound, and using the relaxed occupancy feature. 79*06c3fb27SDimitry Andric TargetOccupancy = 80*06c3fb27SDimitry Andric RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy(); 81349cc55cSDimitry Andric SGPRCriticalLimit = 82349cc55cSDimitry Andric std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 83bdd1243dSDimitry Andric 84bdd1243dSDimitry Andric if (!KnownExcessRP) { 85349cc55cSDimitry Andric VGPRCriticalLimit = 86349cc55cSDimitry Andric std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 87bdd1243dSDimitry Andric } else { 88bdd1243dSDimitry Andric // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except 89bdd1243dSDimitry Andric // returns a reasonably small number for targets with lots of VGPRs, such 90bdd1243dSDimitry Andric // as GFX10 and GFX11. 91bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative " 92bdd1243dSDimitry Andric "VGPRCriticalLimit calculation method.\n"); 93349cc55cSDimitry Andric 94bdd1243dSDimitry Andric unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST); 95bdd1243dSDimitry Andric unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST); 96bdd1243dSDimitry Andric unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule); 97bdd1243dSDimitry Andric VGPRBudget = std::max(VGPRBudget, Granule); 98bdd1243dSDimitry Andric VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit); 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 101bdd1243dSDimitry Andric // Subtract error margin and bias from register limits and avoid overflow. 102bdd1243dSDimitry Andric SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit); 103bdd1243dSDimitry Andric VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit); 104bdd1243dSDimitry Andric SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit); 105bdd1243dSDimitry Andric VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit); 106bdd1243dSDimitry Andric 107bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit 108bdd1243dSDimitry Andric << ", VGPRExcessLimit = " << VGPRExcessLimit 109bdd1243dSDimitry Andric << ", SGPRCriticalLimit = " << SGPRCriticalLimit 110bdd1243dSDimitry Andric << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n"); 111bdd1243dSDimitry Andric } 112bdd1243dSDimitry Andric 113bdd1243dSDimitry Andric void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 114bdd1243dSDimitry Andric bool AtTop, 115bdd1243dSDimitry Andric const RegPressureTracker &RPTracker, 1160b57cec5SDimitry Andric const SIRegisterInfo *SRI, 1170b57cec5SDimitry Andric unsigned SGPRPressure, 1180b57cec5SDimitry Andric unsigned VGPRPressure) { 1190b57cec5SDimitry Andric Cand.SU = SU; 1200b57cec5SDimitry Andric Cand.AtTop = AtTop; 1210b57cec5SDimitry Andric 122bdd1243dSDimitry Andric if (!DAG->isTrackingPressure()) 123bdd1243dSDimitry Andric return; 124bdd1243dSDimitry Andric 1250b57cec5SDimitry Andric // getDownwardPressure() and getUpwardPressure() make temporary changes to 1260b57cec5SDimitry Andric // the tracker, so we need to pass those function a non-const copy. 1270b57cec5SDimitry Andric RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1280b57cec5SDimitry Andric 1298bcb0991SDimitry Andric Pressure.clear(); 1308bcb0991SDimitry Andric MaxPressure.clear(); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric if (AtTop) 1330b57cec5SDimitry Andric TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 1340b57cec5SDimitry Andric else { 1350b57cec5SDimitry Andric // FIXME: I think for bottom up scheduling, the register pressure is cached 1360b57cec5SDimitry Andric // and can be retrieved by DAG->getPressureDif(SU). 1370b57cec5SDimitry Andric TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 1380b57cec5SDimitry Andric } 1390b57cec5SDimitry Andric 1405ffd83dbSDimitry Andric unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 1415ffd83dbSDimitry Andric unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric // If two instructions increase the pressure of different register sets 1440b57cec5SDimitry Andric // by the same amount, the generic scheduler will prefer to schedule the 1450b57cec5SDimitry Andric // instruction that increases the set with the least amount of registers, 1460b57cec5SDimitry Andric // which in our case would be SGPRs. This is rarely what we want, so 1470b57cec5SDimitry Andric // when we report excess/critical register pressure, we do it either 1480b57cec5SDimitry Andric // only for VGPRs or only for SGPRs. 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 1510b57cec5SDimitry Andric const unsigned MaxVGPRPressureInc = 16; 1520b57cec5SDimitry Andric bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 1530b57cec5SDimitry Andric bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 1570b57cec5SDimitry Andric // to increase the likelihood we don't go over the limits. We should improve 1580b57cec5SDimitry Andric // the analysis to look through dependencies to find the path with the least 1590b57cec5SDimitry Andric // register pressure. 1600b57cec5SDimitry Andric 1618bcb0991SDimitry Andric // We only need to update the RPDelta for instructions that increase register 1628bcb0991SDimitry Andric // pressure. Instructions that decrease or keep reg pressure the same will be 1638bcb0991SDimitry Andric // marked as RegExcess in tryCandidate() when they are compared with 1648bcb0991SDimitry Andric // instructions that increase the register pressure. 1650b57cec5SDimitry Andric if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 166bdd1243dSDimitry Andric HasHighPressure = true; 1675ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 1680b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 172bdd1243dSDimitry Andric HasHighPressure = true; 1735ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 1740b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric // Register pressure is considered 'CRITICAL' if it is approaching a value 1780b57cec5SDimitry Andric // that would reduce the wave occupancy for the execution unit. When 179349cc55cSDimitry Andric // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 1800b57cec5SDimitry Andric // has the same cost, so we don't need to prefer one over the other. 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 1830b57cec5SDimitry Andric int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric if (SGPRDelta >= 0 || VGPRDelta >= 0) { 186bdd1243dSDimitry Andric HasHighPressure = true; 1870b57cec5SDimitry Andric if (SGPRDelta > VGPRDelta) { 1885ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 1895ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 1900b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 1910b57cec5SDimitry Andric } else { 1925ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 1935ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 1940b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric } 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric // This function is mostly cut and pasted from 2000b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue() 201bdd1243dSDimitry Andric void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 2020b57cec5SDimitry Andric const CandPolicy &ZonePolicy, 2030b57cec5SDimitry Andric const RegPressureTracker &RPTracker, 2040b57cec5SDimitry Andric SchedCandidate &Cand) { 2050b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 2060b57cec5SDimitry Andric ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 207bdd1243dSDimitry Andric unsigned SGPRPressure = 0; 208bdd1243dSDimitry Andric unsigned VGPRPressure = 0; 209bdd1243dSDimitry Andric if (DAG->isTrackingPressure()) { 210bdd1243dSDimitry Andric SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 211bdd1243dSDimitry Andric VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 212bdd1243dSDimitry Andric } 2130b57cec5SDimitry Andric ReadyQueue &Q = Zone.Available; 2140b57cec5SDimitry Andric for (SUnit *SU : Q) { 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric SchedCandidate TryCand(ZonePolicy); 2170b57cec5SDimitry Andric initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 2180b57cec5SDimitry Andric SGPRPressure, VGPRPressure); 2190b57cec5SDimitry Andric // Pass SchedBoundary only when comparing nodes from the same boundary. 2200b57cec5SDimitry Andric SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 221bdd1243dSDimitry Andric tryCandidate(Cand, TryCand, ZoneArg); 2220b57cec5SDimitry Andric if (TryCand.Reason != NoCand) { 2230b57cec5SDimitry Andric // Initialize resource delta if needed in case future heuristics query it. 2240b57cec5SDimitry Andric if (TryCand.ResDelta == SchedResourceDelta()) 2250b57cec5SDimitry Andric TryCand.initResourceDelta(Zone.DAG, SchedModel); 2260b57cec5SDimitry Andric Cand.setBest(TryCand); 2278bcb0991SDimitry Andric LLVM_DEBUG(traceCandidate(Cand)); 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric // This function is mostly cut and pasted from 2330b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional() 234bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 2350b57cec5SDimitry Andric // Schedule as far as possible in the direction of no choice. This is most 2360b57cec5SDimitry Andric // efficient, but also provides the best heuristics for CriticalPSets. 2370b57cec5SDimitry Andric if (SUnit *SU = Bot.pickOnlyChoice()) { 2380b57cec5SDimitry Andric IsTopNode = false; 2390b57cec5SDimitry Andric return SU; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric if (SUnit *SU = Top.pickOnlyChoice()) { 2420b57cec5SDimitry Andric IsTopNode = true; 2430b57cec5SDimitry Andric return SU; 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric // Set the bottom-up policy based on the state of the current bottom zone and 2460b57cec5SDimitry Andric // the instructions outside the zone, including the top zone. 2470b57cec5SDimitry Andric CandPolicy BotPolicy; 2480b57cec5SDimitry Andric setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 2490b57cec5SDimitry Andric // Set the top-down policy based on the state of the current top zone and 2500b57cec5SDimitry Andric // the instructions outside the zone, including the bottom zone. 2510b57cec5SDimitry Andric CandPolicy TopPolicy; 2520b57cec5SDimitry Andric setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric // See if BotCand is still valid (because we previously scheduled from Top). 2550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 2560b57cec5SDimitry Andric if (!BotCand.isValid() || BotCand.SU->isScheduled || 2570b57cec5SDimitry Andric BotCand.Policy != BotPolicy) { 2580b57cec5SDimitry Andric BotCand.reset(CandPolicy()); 2590b57cec5SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 2600b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2610b57cec5SDimitry Andric } else { 2620b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(BotCand)); 2638bcb0991SDimitry Andric #ifndef NDEBUG 2648bcb0991SDimitry Andric if (VerifyScheduling) { 2658bcb0991SDimitry Andric SchedCandidate TCand; 2668bcb0991SDimitry Andric TCand.reset(CandPolicy()); 2678bcb0991SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 2688bcb0991SDimitry Andric assert(TCand.SU == BotCand.SU && 2698bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 2708bcb0991SDimitry Andric } 2718bcb0991SDimitry Andric #endif 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric // Check if the top Q has a better candidate. 2750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 2760b57cec5SDimitry Andric if (!TopCand.isValid() || TopCand.SU->isScheduled || 2770b57cec5SDimitry Andric TopCand.Policy != TopPolicy) { 2780b57cec5SDimitry Andric TopCand.reset(CandPolicy()); 2790b57cec5SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 2800b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2810b57cec5SDimitry Andric } else { 2820b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(TopCand)); 2838bcb0991SDimitry Andric #ifndef NDEBUG 2848bcb0991SDimitry Andric if (VerifyScheduling) { 2858bcb0991SDimitry Andric SchedCandidate TCand; 2868bcb0991SDimitry Andric TCand.reset(CandPolicy()); 2878bcb0991SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 2888bcb0991SDimitry Andric assert(TCand.SU == TopCand.SU && 2898bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 2908bcb0991SDimitry Andric } 2918bcb0991SDimitry Andric #endif 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric // Pick best from BotCand and TopCand. 2950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 2960b57cec5SDimitry Andric dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 2975ffd83dbSDimitry Andric SchedCandidate Cand = BotCand; 2980b57cec5SDimitry Andric TopCand.Reason = NoCand; 299bdd1243dSDimitry Andric tryCandidate(Cand, TopCand, nullptr); 3000b57cec5SDimitry Andric if (TopCand.Reason != NoCand) { 3010b57cec5SDimitry Andric Cand.setBest(TopCand); 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric IsTopNode = Cand.AtTop; 3060b57cec5SDimitry Andric return Cand.SU; 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // This function is mostly cut and pasted from 3100b57cec5SDimitry Andric // GenericScheduler::pickNode() 311bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { 3120b57cec5SDimitry Andric if (DAG->top() == DAG->bottom()) { 3130b57cec5SDimitry Andric assert(Top.Available.empty() && Top.Pending.empty() && 3140b57cec5SDimitry Andric Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 3150b57cec5SDimitry Andric return nullptr; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric SUnit *SU; 3180b57cec5SDimitry Andric do { 3190b57cec5SDimitry Andric if (RegionPolicy.OnlyTopDown) { 3200b57cec5SDimitry Andric SU = Top.pickOnlyChoice(); 3210b57cec5SDimitry Andric if (!SU) { 3220b57cec5SDimitry Andric CandPolicy NoPolicy; 3230b57cec5SDimitry Andric TopCand.reset(NoPolicy); 3240b57cec5SDimitry Andric pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 3250b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3260b57cec5SDimitry Andric SU = TopCand.SU; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric IsTopNode = true; 3290b57cec5SDimitry Andric } else if (RegionPolicy.OnlyBottomUp) { 3300b57cec5SDimitry Andric SU = Bot.pickOnlyChoice(); 3310b57cec5SDimitry Andric if (!SU) { 3320b57cec5SDimitry Andric CandPolicy NoPolicy; 3330b57cec5SDimitry Andric BotCand.reset(NoPolicy); 3340b57cec5SDimitry Andric pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 3350b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find a candidate"); 3360b57cec5SDimitry Andric SU = BotCand.SU; 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric IsTopNode = false; 3390b57cec5SDimitry Andric } else { 3400b57cec5SDimitry Andric SU = pickNodeBidirectional(IsTopNode); 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric } while (SU->isScheduled); 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric if (SU->isTopReady()) 3450b57cec5SDimitry Andric Top.removeReady(SU); 3460b57cec5SDimitry Andric if (SU->isBottomReady()) 3470b57cec5SDimitry Andric Bot.removeReady(SU); 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3500b57cec5SDimitry Andric << *SU->getInstr()); 3510b57cec5SDimitry Andric return SU; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric 354bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getCurrentStage() { 355bdd1243dSDimitry Andric assert(CurrentStage && CurrentStage != SchedStages.end()); 356bdd1243dSDimitry Andric return *CurrentStage; 357bdd1243dSDimitry Andric } 358bdd1243dSDimitry Andric 359bdd1243dSDimitry Andric bool GCNSchedStrategy::advanceStage() { 360bdd1243dSDimitry Andric assert(CurrentStage != SchedStages.end()); 361bdd1243dSDimitry Andric if (!CurrentStage) 362bdd1243dSDimitry Andric CurrentStage = SchedStages.begin(); 363bdd1243dSDimitry Andric else 364bdd1243dSDimitry Andric CurrentStage++; 365bdd1243dSDimitry Andric 366bdd1243dSDimitry Andric return CurrentStage != SchedStages.end(); 367bdd1243dSDimitry Andric } 368bdd1243dSDimitry Andric 369bdd1243dSDimitry Andric bool GCNSchedStrategy::hasNextStage() const { 370bdd1243dSDimitry Andric assert(CurrentStage); 371bdd1243dSDimitry Andric return std::next(CurrentStage) != SchedStages.end(); 372bdd1243dSDimitry Andric } 373bdd1243dSDimitry Andric 374bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getNextStage() const { 375bdd1243dSDimitry Andric assert(CurrentStage && std::next(CurrentStage) != SchedStages.end()); 376bdd1243dSDimitry Andric return *std::next(CurrentStage); 377bdd1243dSDimitry Andric } 378bdd1243dSDimitry Andric 379bdd1243dSDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 380bdd1243dSDimitry Andric const MachineSchedContext *C) 381bdd1243dSDimitry Andric : GCNSchedStrategy(C) { 382bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); 383bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); 384bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); 385bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::PreRARematerialize); 386bdd1243dSDimitry Andric } 387bdd1243dSDimitry Andric 388bdd1243dSDimitry Andric GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C) 389bdd1243dSDimitry Andric : GCNSchedStrategy(C) { 390bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule); 391bdd1243dSDimitry Andric } 392bdd1243dSDimitry Andric 393bdd1243dSDimitry Andric bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand, 394bdd1243dSDimitry Andric SchedCandidate &TryCand, 395bdd1243dSDimitry Andric SchedBoundary *Zone) const { 396bdd1243dSDimitry Andric // Initialize the candidate if needed. 397bdd1243dSDimitry Andric if (!Cand.isValid()) { 398bdd1243dSDimitry Andric TryCand.Reason = NodeOrder; 399bdd1243dSDimitry Andric return true; 400bdd1243dSDimitry Andric } 401bdd1243dSDimitry Andric 402bdd1243dSDimitry Andric // Avoid spilling by exceeding the register limit. 403bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 404bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, 405bdd1243dSDimitry Andric RegExcess, TRI, DAG->MF)) 406bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 407bdd1243dSDimitry Andric 408bdd1243dSDimitry Andric // Bias PhysReg Defs and copies to their uses and defined respectively. 409bdd1243dSDimitry Andric if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), 410bdd1243dSDimitry Andric biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) 411bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 412bdd1243dSDimitry Andric 413bdd1243dSDimitry Andric bool SameBoundary = Zone != nullptr; 414bdd1243dSDimitry Andric if (SameBoundary) { 415bdd1243dSDimitry Andric // Prioritize instructions that read unbuffered resources by stall cycles. 416bdd1243dSDimitry Andric if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 417bdd1243dSDimitry Andric Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 418bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 419bdd1243dSDimitry Andric 420bdd1243dSDimitry Andric // Avoid critical resource consumption and balance the schedule. 421bdd1243dSDimitry Andric TryCand.initResourceDelta(DAG, SchedModel); 422bdd1243dSDimitry Andric if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 423bdd1243dSDimitry Andric TryCand, Cand, ResourceReduce)) 424bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 425bdd1243dSDimitry Andric if (tryGreater(TryCand.ResDelta.DemandedResources, 426bdd1243dSDimitry Andric Cand.ResDelta.DemandedResources, TryCand, Cand, 427bdd1243dSDimitry Andric ResourceDemand)) 428bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 429bdd1243dSDimitry Andric 430bdd1243dSDimitry Andric // Unconditionally try to reduce latency. 431bdd1243dSDimitry Andric if (tryLatency(TryCand, Cand, *Zone)) 432bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 433bdd1243dSDimitry Andric 434bdd1243dSDimitry Andric // Weak edges are for clustering and other constraints. 435bdd1243dSDimitry Andric if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 436bdd1243dSDimitry Andric getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) 437bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 438bdd1243dSDimitry Andric } 439bdd1243dSDimitry Andric 440bdd1243dSDimitry Andric // Keep clustered nodes together to encourage downstream peephole 441bdd1243dSDimitry Andric // optimizations which may reduce resource requirements. 442bdd1243dSDimitry Andric // 443bdd1243dSDimitry Andric // This is a best effort to set things up for a post-RA pass. Optimizations 444bdd1243dSDimitry Andric // like generating loads of multiple registers should ideally be done within 445bdd1243dSDimitry Andric // the scheduler pass by combining the loads during DAG postprocessing. 446bdd1243dSDimitry Andric const SUnit *CandNextClusterSU = 447bdd1243dSDimitry Andric Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 448bdd1243dSDimitry Andric const SUnit *TryCandNextClusterSU = 449bdd1243dSDimitry Andric TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 450bdd1243dSDimitry Andric if (tryGreater(TryCand.SU == TryCandNextClusterSU, 451bdd1243dSDimitry Andric Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) 452bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 453bdd1243dSDimitry Andric 454bdd1243dSDimitry Andric // Avoid increasing the max critical pressure in the scheduled region. 455bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 456bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, 457bdd1243dSDimitry Andric TryCand, Cand, RegCritical, TRI, DAG->MF)) 458bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 459bdd1243dSDimitry Andric 460bdd1243dSDimitry Andric // Avoid increasing the max pressure of the entire region. 461bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 462bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, 463bdd1243dSDimitry Andric Cand, RegMax, TRI, DAG->MF)) 464bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 465bdd1243dSDimitry Andric 466bdd1243dSDimitry Andric if (SameBoundary) { 467bdd1243dSDimitry Andric // Fall through to original instruction order. 468bdd1243dSDimitry Andric if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || 469bdd1243dSDimitry Andric (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 470bdd1243dSDimitry Andric TryCand.Reason = NodeOrder; 471bdd1243dSDimitry Andric return true; 472bdd1243dSDimitry Andric } 473bdd1243dSDimitry Andric } 474bdd1243dSDimitry Andric return false; 475bdd1243dSDimitry Andric } 476bdd1243dSDimitry Andric 477972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive( 478972a253aSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) 479972a253aSDimitry Andric : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()), 4800b57cec5SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()), 481972a253aSDimitry Andric StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) { 4820b57cec5SDimitry Andric 4830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 484*06c3fb27SDimitry Andric if (RelaxedOcc) { 485*06c3fb27SDimitry Andric MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy); 486*06c3fb27SDimitry Andric if (MinOccupancy != StartingOccupancy) 487*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy 488*06c3fb27SDimitry Andric << ".\n"); 489*06c3fb27SDimitry Andric } 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 492bdd1243dSDimitry Andric std::unique_ptr<GCNSchedStage> 493bdd1243dSDimitry Andric GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { 494bdd1243dSDimitry Andric switch (SchedStageID) { 495bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule: 496bdd1243dSDimitry Andric return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this); 497bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule: 498bdd1243dSDimitry Andric return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this); 499bdd1243dSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 500bdd1243dSDimitry Andric return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this); 501bdd1243dSDimitry Andric case GCNSchedStageID::PreRARematerialize: 502bdd1243dSDimitry Andric return std::make_unique<PreRARematStage>(SchedStageID, *this); 503bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule: 504bdd1243dSDimitry Andric return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this); 505bdd1243dSDimitry Andric } 506bdd1243dSDimitry Andric 507bdd1243dSDimitry Andric llvm_unreachable("Unknown SchedStageID."); 508bdd1243dSDimitry Andric } 509bdd1243dSDimitry Andric 5100b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() { 511972a253aSDimitry Andric // Collect all scheduling regions. The actual scheduling is performed in 512972a253aSDimitry Andric // GCNScheduleDAGMILive::finalizeSchedule. 513bdd1243dSDimitry Andric Regions.push_back(std::pair(RegionBegin, RegionEnd)); 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 516972a253aSDimitry Andric GCNRegPressure 517972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { 5180b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 5190b57cec5SDimitry Andric RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 5200b57cec5SDimitry Andric return RPTracker.moveMaxPressure(); 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 523972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, 524972a253aSDimitry Andric const MachineBasicBlock *MBB) { 5250b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric // If the block has the only successor then live-ins of that successor are 5280b57cec5SDimitry Andric // live-outs of the current block. We can reuse calculated live set if the 5290b57cec5SDimitry Andric // successor will be sent to scheduling past current block. 530*06c3fb27SDimitry Andric 531*06c3fb27SDimitry Andric // However, due to the bug in LiveInterval analysis it may happen that two 532*06c3fb27SDimitry Andric // predecessors of the same successor block have different lane bitmasks for 533*06c3fb27SDimitry Andric // a live-out register. Workaround that by sticking to one-to-one relationship 534*06c3fb27SDimitry Andric // i.e. one predecessor with one successor block. 5350b57cec5SDimitry Andric const MachineBasicBlock *OnlySucc = nullptr; 536*06c3fb27SDimitry Andric if (MBB->succ_size() == 1) { 537*06c3fb27SDimitry Andric auto *Candidate = *MBB->succ_begin(); 538*06c3fb27SDimitry Andric if (!Candidate->empty() && Candidate->pred_size() == 1) { 5390b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes(); 540*06c3fb27SDimitry Andric if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate)) 541*06c3fb27SDimitry Andric OnlySucc = Candidate; 542*06c3fb27SDimitry Andric } 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric // Scheduler sends regions from the end of the block upwards. 5460b57cec5SDimitry Andric size_t CurRegion = RegionIdx; 5470b57cec5SDimitry Andric for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 5480b57cec5SDimitry Andric if (Regions[CurRegion].first->getParent() != MBB) 5490b57cec5SDimitry Andric break; 5500b57cec5SDimitry Andric --CurRegion; 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric auto I = MBB->begin(); 5530b57cec5SDimitry Andric auto LiveInIt = MBBLiveIns.find(MBB); 55481ad6265SDimitry Andric auto &Rgn = Regions[CurRegion]; 55581ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 5560b57cec5SDimitry Andric if (LiveInIt != MBBLiveIns.end()) { 5570b57cec5SDimitry Andric auto LiveIn = std::move(LiveInIt->second); 5580b57cec5SDimitry Andric RPTracker.reset(*MBB->begin(), &LiveIn); 5590b57cec5SDimitry Andric MBBLiveIns.erase(LiveInIt); 5600b57cec5SDimitry Andric } else { 5610b57cec5SDimitry Andric I = Rgn.first; 5620b57cec5SDimitry Andric auto LRS = BBLiveInMap.lookup(NonDbgMI); 563fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS 5640b57cec5SDimitry Andric assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 565fe6060f1SDimitry Andric #endif 5660b57cec5SDimitry Andric RPTracker.reset(*I, &LRS); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric for (;;) { 5700b57cec5SDimitry Andric I = RPTracker.getNext(); 5710b57cec5SDimitry Andric 57281ad6265SDimitry Andric if (Regions[CurRegion].first == I || NonDbgMI == I) { 5730b57cec5SDimitry Andric LiveIns[CurRegion] = RPTracker.getLiveRegs(); 5740b57cec5SDimitry Andric RPTracker.clearMaxPressure(); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric if (Regions[CurRegion].second == I) { 5780b57cec5SDimitry Andric Pressure[CurRegion] = RPTracker.moveMaxPressure(); 5790b57cec5SDimitry Andric if (CurRegion-- == RegionIdx) 5800b57cec5SDimitry Andric break; 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric RPTracker.advanceToNext(); 5830b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric if (OnlySucc) { 5870b57cec5SDimitry Andric if (I != MBB->end()) { 5880b57cec5SDimitry Andric RPTracker.advanceToNext(); 5890b57cec5SDimitry Andric RPTracker.advance(MBB->end()); 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 5920b57cec5SDimitry Andric MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 5970b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const { 5980b57cec5SDimitry Andric assert(!Regions.empty()); 5990b57cec5SDimitry Andric std::vector<MachineInstr *> BBStarters; 6000b57cec5SDimitry Andric BBStarters.reserve(Regions.size()); 6010b57cec5SDimitry Andric auto I = Regions.rbegin(), E = Regions.rend(); 6020b57cec5SDimitry Andric auto *BB = I->first->getParent(); 6030b57cec5SDimitry Andric do { 6040b57cec5SDimitry Andric auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 6050b57cec5SDimitry Andric BBStarters.push_back(MI); 6060b57cec5SDimitry Andric do { 6070b57cec5SDimitry Andric ++I; 6080b57cec5SDimitry Andric } while (I != E && I->first->getParent() == BB); 6090b57cec5SDimitry Andric } while (I != E); 6100b57cec5SDimitry Andric return getLiveRegMap(BBStarters, false /*After*/, *LIS); 6110b57cec5SDimitry Andric } 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() { 614972a253aSDimitry Andric // Start actual scheduling here. This function is called by the base 615972a253aSDimitry Andric // MachineScheduler after all regions have been recorded by 616972a253aSDimitry Andric // GCNScheduleDAGMILive::schedule(). 6170b57cec5SDimitry Andric LiveIns.resize(Regions.size()); 6180b57cec5SDimitry Andric Pressure.resize(Regions.size()); 6195ffd83dbSDimitry Andric RescheduleRegions.resize(Regions.size()); 620fe6060f1SDimitry Andric RegionsWithHighRP.resize(Regions.size()); 621bdd1243dSDimitry Andric RegionsWithExcessRP.resize(Regions.size()); 62281ad6265SDimitry Andric RegionsWithMinOcc.resize(Regions.size()); 623bdd1243dSDimitry Andric RegionsWithIGLPInstrs.resize(Regions.size()); 6245ffd83dbSDimitry Andric RescheduleRegions.set(); 625fe6060f1SDimitry Andric RegionsWithHighRP.reset(); 626bdd1243dSDimitry Andric RegionsWithExcessRP.reset(); 62781ad6265SDimitry Andric RegionsWithMinOcc.reset(); 628bdd1243dSDimitry Andric RegionsWithIGLPInstrs.reset(); 6290b57cec5SDimitry Andric 630972a253aSDimitry Andric runSchedStages(); 631972a253aSDimitry Andric } 632972a253aSDimitry Andric 633972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() { 634972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 635972a253aSDimitry Andric 6360b57cec5SDimitry Andric if (!Regions.empty()) 6370b57cec5SDimitry Andric BBLiveInMap = getBBLiveInMap(); 6380b57cec5SDimitry Andric 639bdd1243dSDimitry Andric GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); 640bdd1243dSDimitry Andric while (S.advanceStage()) { 641bdd1243dSDimitry Andric auto Stage = createSchedStage(S.getCurrentStage()); 642972a253aSDimitry Andric if (!Stage->initGCNSchedStage()) 6435ffd83dbSDimitry Andric continue; 644972a253aSDimitry Andric 645972a253aSDimitry Andric for (auto Region : Regions) { 646972a253aSDimitry Andric RegionBegin = Region.first; 647972a253aSDimitry Andric RegionEnd = Region.second; 648972a253aSDimitry Andric // Setup for scheduling the region and check whether it should be skipped. 649972a253aSDimitry Andric if (!Stage->initGCNRegion()) { 650972a253aSDimitry Andric Stage->advanceRegion(); 651972a253aSDimitry Andric exitRegion(); 652972a253aSDimitry Andric continue; 6535ffd83dbSDimitry Andric } 6545ffd83dbSDimitry Andric 655972a253aSDimitry Andric ScheduleDAGMILive::schedule(); 656972a253aSDimitry Andric Stage->finalizeGCNRegion(); 657972a253aSDimitry Andric } 658972a253aSDimitry Andric 659972a253aSDimitry Andric Stage->finalizeGCNSchedStage(); 660972a253aSDimitry Andric } 661972a253aSDimitry Andric } 662972a253aSDimitry Andric 663972a253aSDimitry Andric #ifndef NDEBUG 664972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { 665972a253aSDimitry Andric switch (StageID) { 666bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule: 667bdd1243dSDimitry Andric OS << "Max Occupancy Initial Schedule"; 6680b57cec5SDimitry Andric break; 669bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule: 670bdd1243dSDimitry Andric OS << "Unclustered High Register Pressure Reschedule"; 671972a253aSDimitry Andric break; 672972a253aSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 673972a253aSDimitry Andric OS << "Clustered Low Occupancy Reschedule"; 674972a253aSDimitry Andric break; 675972a253aSDimitry Andric case GCNSchedStageID::PreRARematerialize: 676972a253aSDimitry Andric OS << "Pre-RA Rematerialize"; 677972a253aSDimitry Andric break; 678bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule: 679bdd1243dSDimitry Andric OS << "Max ILP Initial Schedule"; 680bdd1243dSDimitry Andric break; 681972a253aSDimitry Andric } 682bdd1243dSDimitry Andric 683972a253aSDimitry Andric return OS; 684972a253aSDimitry Andric } 685972a253aSDimitry Andric #endif 686972a253aSDimitry Andric 687972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 688bdd1243dSDimitry Andric : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF), 689bdd1243dSDimitry Andric MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {} 690972a253aSDimitry Andric 691972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() { 692972a253aSDimitry Andric if (!DAG.LIS) 693972a253aSDimitry Andric return false; 694972a253aSDimitry Andric 695972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n"); 696972a253aSDimitry Andric return true; 697972a253aSDimitry Andric } 698972a253aSDimitry Andric 699bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNSchedStage() { 700bdd1243dSDimitry Andric if (DisableUnclusterHighRP) 701bdd1243dSDimitry Andric return false; 702bdd1243dSDimitry Andric 703972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 704972a253aSDimitry Andric return false; 705972a253aSDimitry Andric 706bdd1243dSDimitry Andric if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none()) 707972a253aSDimitry Andric return false; 708972a253aSDimitry Andric 709972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 710bdd1243dSDimitry Andric DAG.addMutation(createIGroupLPDAGMutation()); 711972a253aSDimitry Andric 712bdd1243dSDimitry Andric InitialOccupancy = DAG.MinOccupancy; 713bdd1243dSDimitry Andric // Aggressivly try to reduce register pressure in the unclustered high RP 714bdd1243dSDimitry Andric // stage. Temporarily increase occupancy target in the region. 715bdd1243dSDimitry Andric S.SGPRLimitBias = S.HighRPSGPRBias; 716bdd1243dSDimitry Andric S.VGPRLimitBias = S.HighRPVGPRBias; 717bdd1243dSDimitry Andric if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) 718bdd1243dSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 719bdd1243dSDimitry Andric 720bdd1243dSDimitry Andric LLVM_DEBUG( 721bdd1243dSDimitry Andric dbgs() 722bdd1243dSDimitry Andric << "Retrying function scheduling without clustering. " 723bdd1243dSDimitry Andric "Aggressivly try to reduce register pressure to achieve occupancy " 724bdd1243dSDimitry Andric << DAG.MinOccupancy << ".\n"); 725bdd1243dSDimitry Andric 726972a253aSDimitry Andric return true; 727972a253aSDimitry Andric } 728972a253aSDimitry Andric 729972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() { 730972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 731972a253aSDimitry Andric return false; 732972a253aSDimitry Andric 733972a253aSDimitry Andric // Don't bother trying to improve ILP in lower RP regions if occupancy has not 734972a253aSDimitry Andric // been dropped. All regions will have already been scheduled with the ideal 735972a253aSDimitry Andric // occupancy targets. 736972a253aSDimitry Andric if (DAG.StartingOccupancy <= DAG.MinOccupancy) 737972a253aSDimitry Andric return false; 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric LLVM_DEBUG( 740972a253aSDimitry Andric dbgs() << "Retrying function scheduling with lowest recorded occupancy " 741972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 742972a253aSDimitry Andric return true; 7430b57cec5SDimitry Andric } 74481ad6265SDimitry Andric 745972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() { 746972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 747972a253aSDimitry Andric return false; 74881ad6265SDimitry Andric 749972a253aSDimitry Andric if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1) 750972a253aSDimitry Andric return false; 751972a253aSDimitry Andric 75281ad6265SDimitry Andric const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 75381ad6265SDimitry Andric // Check maximum occupancy 75481ad6265SDimitry Andric if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) == 755972a253aSDimitry Andric DAG.MinOccupancy) 756972a253aSDimitry Andric return false; 75781ad6265SDimitry Andric 75881ad6265SDimitry Andric // FIXME: This pass will invalidate cached MBBLiveIns for regions 75981ad6265SDimitry Andric // inbetween the defs and region we sinked the def to. Cached pressure 76081ad6265SDimitry Andric // for regions where a def is sinked from will also be invalidated. Will 76181ad6265SDimitry Andric // need to be fixed if there is another pass after this pass. 762bdd1243dSDimitry Andric assert(!S.hasNextStage()); 76381ad6265SDimitry Andric 76481ad6265SDimitry Andric collectRematerializableInstructions(); 76581ad6265SDimitry Andric if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII)) 766972a253aSDimitry Andric return false; 76781ad6265SDimitry Andric 76881ad6265SDimitry Andric LLVM_DEBUG( 76981ad6265SDimitry Andric dbgs() << "Retrying function scheduling with improved occupancy of " 770972a253aSDimitry Andric << DAG.MinOccupancy << " from rematerializing\n"); 771972a253aSDimitry Andric return true; 7725ffd83dbSDimitry Andric } 7735ffd83dbSDimitry Andric 774972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() { 775972a253aSDimitry Andric DAG.finishBlock(); 776972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n"); 777e8d8bef9SDimitry Andric } 7785ffd83dbSDimitry Andric 779bdd1243dSDimitry Andric void UnclusteredHighRPStage::finalizeGCNSchedStage() { 780972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 781bdd1243dSDimitry Andric S.SGPRLimitBias = S.VGPRLimitBias = 0; 782bdd1243dSDimitry Andric if (DAG.MinOccupancy > InitialOccupancy) { 783bdd1243dSDimitry Andric for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX) 784bdd1243dSDimitry Andric DAG.RegionsWithMinOcc[IDX] = 785bdd1243dSDimitry Andric DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy; 786bdd1243dSDimitry Andric 787bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << StageID 788bdd1243dSDimitry Andric << " stage successfully increased occupancy to " 789bdd1243dSDimitry Andric << DAG.MinOccupancy << '\n'); 790bdd1243dSDimitry Andric } 7910b57cec5SDimitry Andric 792972a253aSDimitry Andric GCNSchedStage::finalizeGCNSchedStage(); 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 795972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() { 796972a253aSDimitry Andric // Check whether this new region is also a new block. 797972a253aSDimitry Andric if (DAG.RegionBegin->getParent() != CurrentMBB) 798972a253aSDimitry Andric setupNewBlock(); 799972a253aSDimitry Andric 800972a253aSDimitry Andric unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end()); 801972a253aSDimitry Andric DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs); 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric // Skip empty scheduling regions (0 or 1 schedulable instructions). 804972a253aSDimitry Andric if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) 805972a253aSDimitry Andric return false; 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 808972a253aSDimitry Andric LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB) 809972a253aSDimitry Andric << " " << CurrentMBB->getName() 810972a253aSDimitry Andric << "\n From: " << *DAG.begin() << " To: "; 811972a253aSDimitry Andric if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; 8120b57cec5SDimitry Andric else dbgs() << "End"; 8130b57cec5SDimitry Andric dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 8140b57cec5SDimitry Andric 815972a253aSDimitry Andric // Save original instruction order before scheduling for possible revert. 816972a253aSDimitry Andric Unsched.clear(); 817972a253aSDimitry Andric Unsched.reserve(DAG.NumRegionInstrs); 818bdd1243dSDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule || 819bdd1243dSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) { 820bdd1243dSDimitry Andric for (auto &I : DAG) { 821bdd1243dSDimitry Andric Unsched.push_back(&I); 822bdd1243dSDimitry Andric if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER || 823bdd1243dSDimitry Andric I.getOpcode() == AMDGPU::IGLP_OPT) 824bdd1243dSDimitry Andric DAG.RegionsWithIGLPInstrs[RegionIdx] = true; 825bdd1243dSDimitry Andric } 826bdd1243dSDimitry Andric } else { 827972a253aSDimitry Andric for (auto &I : DAG) 828972a253aSDimitry Andric Unsched.push_back(&I); 829bdd1243dSDimitry Andric } 8300b57cec5SDimitry Andric 831972a253aSDimitry Andric PressureBefore = DAG.Pressure[RegionIdx]; 8320b57cec5SDimitry Andric 833972a253aSDimitry Andric LLVM_DEBUG( 834bdd1243dSDimitry Andric dbgs() << "Pressure before scheduling:\nRegion live-ins:" 835bdd1243dSDimitry Andric << print(DAG.LiveIns[RegionIdx], DAG.MRI) 836bdd1243dSDimitry Andric << "Region live-in pressure: " 837bdd1243dSDimitry Andric << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx])) 838bdd1243dSDimitry Andric << "Region register pressure: " << print(PressureBefore)); 839972a253aSDimitry Andric 840bdd1243dSDimitry Andric S.HasHighPressure = false; 841bdd1243dSDimitry Andric S.KnownExcessRP = isRegionWithExcessRP(); 842bdd1243dSDimitry Andric 843bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] && 844bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) { 845bdd1243dSDimitry Andric SavedMutations.clear(); 846bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations); 847bdd1243dSDimitry Andric DAG.addMutation(createIGroupLPDAGMutation()); 848bdd1243dSDimitry Andric } 849972a253aSDimitry Andric 850972a253aSDimitry Andric return true; 8510b57cec5SDimitry Andric } 85281ad6265SDimitry Andric 853bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNRegion() { 854bdd1243dSDimitry Andric // Only reschedule regions with the minimum occupancy or regions that may have 855bdd1243dSDimitry Andric // spilling (excess register pressure). 856bdd1243dSDimitry Andric if ((!DAG.RegionsWithMinOcc[RegionIdx] || 857bdd1243dSDimitry Andric DAG.MinOccupancy <= InitialOccupancy) && 858bdd1243dSDimitry Andric !DAG.RegionsWithExcessRP[RegionIdx]) 859972a253aSDimitry Andric return false; 860972a253aSDimitry Andric 861972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 862972a253aSDimitry Andric } 863972a253aSDimitry Andric 864972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() { 865bdd1243dSDimitry Andric // We may need to reschedule this region if it wasn't rescheduled in the last 866bdd1243dSDimitry Andric // stage, or if we found it was testing critical register pressure limits in 867bdd1243dSDimitry Andric // the unclustered reschedule stage. The later is because we may not have been 868bdd1243dSDimitry Andric // able to raise the min occupancy in the previous stage so the region may be 869bdd1243dSDimitry Andric // overly constrained even if it was already rescheduled. 870bdd1243dSDimitry Andric if (!DAG.RegionsWithHighRP[RegionIdx]) 871972a253aSDimitry Andric return false; 872972a253aSDimitry Andric 873972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 874972a253aSDimitry Andric } 875972a253aSDimitry Andric 876972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() { 877972a253aSDimitry Andric if (!DAG.RescheduleRegions[RegionIdx]) 878972a253aSDimitry Andric return false; 879972a253aSDimitry Andric 880972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 881972a253aSDimitry Andric } 882972a253aSDimitry Andric 883972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() { 884972a253aSDimitry Andric if (CurrentMBB) 885972a253aSDimitry Andric DAG.finishBlock(); 886972a253aSDimitry Andric 887972a253aSDimitry Andric CurrentMBB = DAG.RegionBegin->getParent(); 888972a253aSDimitry Andric DAG.startBlock(CurrentMBB); 889972a253aSDimitry Andric // Get real RP for the region if it hasn't be calculated before. After the 890972a253aSDimitry Andric // initial schedule stage real RP will be collected after scheduling. 891*06c3fb27SDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule || 892*06c3fb27SDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) 893972a253aSDimitry Andric DAG.computeBlockPressure(RegionIdx, CurrentMBB); 894972a253aSDimitry Andric } 895972a253aSDimitry Andric 896972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() { 897bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); 898972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = false; 899bdd1243dSDimitry Andric if (S.HasHighPressure) 900972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 901972a253aSDimitry Andric 902972a253aSDimitry Andric // Revert scheduling if we have dropped occupancy or there is some other 903972a253aSDimitry Andric // reason that the original schedule is better. 904972a253aSDimitry Andric checkScheduling(); 905972a253aSDimitry Andric 906bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] && 907bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) 908bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations); 909bdd1243dSDimitry Andric 910972a253aSDimitry Andric DAG.exitRegion(); 911972a253aSDimitry Andric RegionIdx++; 912972a253aSDimitry Andric } 913972a253aSDimitry Andric 914972a253aSDimitry Andric void GCNSchedStage::checkScheduling() { 915972a253aSDimitry Andric // Check the results of scheduling. 916972a253aSDimitry Andric PressureAfter = DAG.getRealRegPressure(RegionIdx); 917bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter)); 918bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n"); 919972a253aSDimitry Andric 920972a253aSDimitry Andric if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 921972a253aSDimitry Andric PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 922972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 923972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 924972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 925972a253aSDimitry Andric 926972a253aSDimitry Andric // Early out if we have achieve the occupancy target. 927972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 928972a253aSDimitry Andric return; 929972a253aSDimitry Andric } 930972a253aSDimitry Andric 931bdd1243dSDimitry Andric unsigned TargetOccupancy = 932bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF)); 933972a253aSDimitry Andric unsigned WavesAfter = 934bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureAfter.getOccupancy(ST)); 935972a253aSDimitry Andric unsigned WavesBefore = 936bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureBefore.getOccupancy(ST)); 937972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 938972a253aSDimitry Andric << ", after " << WavesAfter << ".\n"); 939972a253aSDimitry Andric 940972a253aSDimitry Andric // We may not be able to keep the current target occupancy because of the just 941972a253aSDimitry Andric // scheduled region. We might still be able to revert scheduling if the 942972a253aSDimitry Andric // occupancy before was higher, or if the current schedule has register 943972a253aSDimitry Andric // pressure higher than the excess limits which could lead to more spilling. 944972a253aSDimitry Andric unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 945972a253aSDimitry Andric 946972a253aSDimitry Andric // Allow memory bound functions to drop to 4 waves if not limited by an 947972a253aSDimitry Andric // attribute. 948972a253aSDimitry Andric if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy && 949972a253aSDimitry Andric WavesAfter >= MFI.getMinAllowedOccupancy()) { 950972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 951972a253aSDimitry Andric << MFI.getMinAllowedOccupancy() << " waves\n"); 952972a253aSDimitry Andric NewOccupancy = WavesAfter; 953972a253aSDimitry Andric } 954972a253aSDimitry Andric 955972a253aSDimitry Andric if (NewOccupancy < DAG.MinOccupancy) { 956972a253aSDimitry Andric DAG.MinOccupancy = NewOccupancy; 957972a253aSDimitry Andric MFI.limitOccupancy(DAG.MinOccupancy); 958972a253aSDimitry Andric DAG.RegionsWithMinOcc.reset(); 959972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 960972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 961972a253aSDimitry Andric } 962972a253aSDimitry Andric 963972a253aSDimitry Andric unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 964972a253aSDimitry Andric unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 965972a253aSDimitry Andric if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 966972a253aSDimitry Andric PressureAfter.getAGPRNum() > MaxVGPRs || 967972a253aSDimitry Andric PressureAfter.getSGPRNum() > MaxSGPRs) { 968972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = true; 969972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 970bdd1243dSDimitry Andric DAG.RegionsWithExcessRP[RegionIdx] = true; 971972a253aSDimitry Andric } 972972a253aSDimitry Andric 973972a253aSDimitry Andric // Revert if this region's schedule would cause a drop in occupancy or 974972a253aSDimitry Andric // spilling. 975972a253aSDimitry Andric if (shouldRevertScheduling(WavesAfter)) { 976972a253aSDimitry Andric revertScheduling(); 977972a253aSDimitry Andric } else { 978972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 979972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 980972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 981972a253aSDimitry Andric } 982972a253aSDimitry Andric } 983972a253aSDimitry Andric 984bdd1243dSDimitry Andric unsigned 985bdd1243dSDimitry Andric GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, 986bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> &ReadyCycles, 987bdd1243dSDimitry Andric const TargetSchedModel &SM) { 988bdd1243dSDimitry Andric unsigned ReadyCycle = CurrCycle; 989bdd1243dSDimitry Andric for (auto &D : SU.Preds) { 990bdd1243dSDimitry Andric if (D.isAssignedRegDep()) { 991bdd1243dSDimitry Andric MachineInstr *DefMI = D.getSUnit()->getInstr(); 992bdd1243dSDimitry Andric unsigned Latency = SM.computeInstrLatency(DefMI); 993bdd1243dSDimitry Andric unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum]; 994bdd1243dSDimitry Andric ReadyCycle = std::max(ReadyCycle, DefReady + Latency); 995bdd1243dSDimitry Andric } 996bdd1243dSDimitry Andric } 997bdd1243dSDimitry Andric ReadyCycles[SU.NodeNum] = ReadyCycle; 998bdd1243dSDimitry Andric return ReadyCycle; 999bdd1243dSDimitry Andric } 1000bdd1243dSDimitry Andric 1001bdd1243dSDimitry Andric #ifndef NDEBUG 1002bdd1243dSDimitry Andric struct EarlierIssuingCycle { 1003bdd1243dSDimitry Andric bool operator()(std::pair<MachineInstr *, unsigned> A, 1004bdd1243dSDimitry Andric std::pair<MachineInstr *, unsigned> B) const { 1005bdd1243dSDimitry Andric return A.second < B.second; 1006bdd1243dSDimitry Andric } 1007bdd1243dSDimitry Andric }; 1008bdd1243dSDimitry Andric 1009bdd1243dSDimitry Andric static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>, 1010bdd1243dSDimitry Andric EarlierIssuingCycle> &ReadyCycles) { 1011bdd1243dSDimitry Andric if (ReadyCycles.empty()) 1012bdd1243dSDimitry Andric return; 1013bdd1243dSDimitry Andric unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber(); 1014bdd1243dSDimitry Andric dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum 1015bdd1243dSDimitry Andric << " ##################\n# Cycle #\t\t\tInstruction " 1016bdd1243dSDimitry Andric " " 1017bdd1243dSDimitry Andric " \n"; 1018bdd1243dSDimitry Andric unsigned IPrev = 1; 1019bdd1243dSDimitry Andric for (auto &I : ReadyCycles) { 1020bdd1243dSDimitry Andric if (I.second > IPrev + 1) 1021bdd1243dSDimitry Andric dbgs() << "****************************** BUBBLE OF " << I.second - IPrev 1022bdd1243dSDimitry Andric << " CYCLES DETECTED ******************************\n\n"; 1023bdd1243dSDimitry Andric dbgs() << "[ " << I.second << " ] : " << *I.first << "\n"; 1024bdd1243dSDimitry Andric IPrev = I.second; 1025bdd1243dSDimitry Andric } 1026bdd1243dSDimitry Andric } 1027bdd1243dSDimitry Andric #endif 1028bdd1243dSDimitry Andric 1029bdd1243dSDimitry Andric ScheduleMetrics 1030bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) { 1031bdd1243dSDimitry Andric #ifndef NDEBUG 1032bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle> 1033bdd1243dSDimitry Andric ReadyCyclesSorted; 1034bdd1243dSDimitry Andric #endif 1035bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); 1036bdd1243dSDimitry Andric unsigned SumBubbles = 0; 1037bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles; 1038bdd1243dSDimitry Andric unsigned CurrCycle = 0; 1039bdd1243dSDimitry Andric for (auto &SU : InputSchedule) { 1040bdd1243dSDimitry Andric unsigned ReadyCycle = 1041bdd1243dSDimitry Andric computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM); 1042bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle; 1043bdd1243dSDimitry Andric #ifndef NDEBUG 1044bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle)); 1045bdd1243dSDimitry Andric #endif 1046bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle; 1047bdd1243dSDimitry Andric } 1048bdd1243dSDimitry Andric #ifndef NDEBUG 1049bdd1243dSDimitry Andric LLVM_DEBUG( 1050bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted); 1051bdd1243dSDimitry Andric dbgs() << "\n\t" 1052bdd1243dSDimitry Andric << "Metric: " 1053bdd1243dSDimitry Andric << (SumBubbles 1054bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle 1055bdd1243dSDimitry Andric : 1) 1056bdd1243dSDimitry Andric << "\n\n"); 1057bdd1243dSDimitry Andric #endif 1058bdd1243dSDimitry Andric 1059bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles); 1060bdd1243dSDimitry Andric } 1061bdd1243dSDimitry Andric 1062bdd1243dSDimitry Andric ScheduleMetrics 1063bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) { 1064bdd1243dSDimitry Andric #ifndef NDEBUG 1065bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle> 1066bdd1243dSDimitry Andric ReadyCyclesSorted; 1067bdd1243dSDimitry Andric #endif 1068bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); 1069bdd1243dSDimitry Andric unsigned SumBubbles = 0; 1070bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles; 1071bdd1243dSDimitry Andric unsigned CurrCycle = 0; 1072bdd1243dSDimitry Andric for (auto &MI : DAG) { 1073bdd1243dSDimitry Andric SUnit *SU = DAG.getSUnit(&MI); 1074bdd1243dSDimitry Andric if (!SU) 1075bdd1243dSDimitry Andric continue; 1076bdd1243dSDimitry Andric unsigned ReadyCycle = 1077bdd1243dSDimitry Andric computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM); 1078bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle; 1079bdd1243dSDimitry Andric #ifndef NDEBUG 1080bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle)); 1081bdd1243dSDimitry Andric #endif 1082bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle; 1083bdd1243dSDimitry Andric } 1084bdd1243dSDimitry Andric #ifndef NDEBUG 1085bdd1243dSDimitry Andric LLVM_DEBUG( 1086bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted); 1087bdd1243dSDimitry Andric dbgs() << "\n\t" 1088bdd1243dSDimitry Andric << "Metric: " 1089bdd1243dSDimitry Andric << (SumBubbles 1090bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle 1091bdd1243dSDimitry Andric : 1) 1092bdd1243dSDimitry Andric << "\n\n"); 1093bdd1243dSDimitry Andric #endif 1094bdd1243dSDimitry Andric 1095bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles); 1096bdd1243dSDimitry Andric } 1097bdd1243dSDimitry Andric 1098972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) { 1099972a253aSDimitry Andric if (WavesAfter < DAG.MinOccupancy) 1100972a253aSDimitry Andric return true; 1101972a253aSDimitry Andric 1102972a253aSDimitry Andric return false; 1103972a253aSDimitry Andric } 1104972a253aSDimitry Andric 1105bdd1243dSDimitry Andric bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 1106bdd1243dSDimitry Andric if (PressureAfter == PressureBefore) 1107bdd1243dSDimitry Andric return false; 1108bdd1243dSDimitry Andric 1109972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1110972a253aSDimitry Andric return true; 1111972a253aSDimitry Andric 1112972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1113972a253aSDimitry Andric return true; 1114972a253aSDimitry Andric 1115972a253aSDimitry Andric return false; 1116972a253aSDimitry Andric } 1117972a253aSDimitry Andric 1118bdd1243dSDimitry Andric bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) { 1119bdd1243dSDimitry Andric // If RP is not reduced in the unclustred reschedule stage, revert to the 1120bdd1243dSDimitry Andric // old schedule. 1121bdd1243dSDimitry Andric if ((WavesAfter <= PressureBefore.getOccupancy(ST) && 1122bdd1243dSDimitry Andric mayCauseSpilling(WavesAfter)) || 1123bdd1243dSDimitry Andric GCNSchedStage::shouldRevertScheduling(WavesAfter)) { 1124972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 1125972a253aSDimitry Andric return true; 1126972a253aSDimitry Andric } 1127972a253aSDimitry Andric 1128*06c3fb27SDimitry Andric // Do not attempt to relax schedule even more if we are already spilling. 1129*06c3fb27SDimitry Andric if (isRegionWithExcessRP()) 1130*06c3fb27SDimitry Andric return false; 1131*06c3fb27SDimitry Andric 1132bdd1243dSDimitry Andric LLVM_DEBUG( 1133bdd1243dSDimitry Andric dbgs() 1134bdd1243dSDimitry Andric << "\n\t *** In shouldRevertScheduling ***\n" 1135bdd1243dSDimitry Andric << " *********** BEFORE UnclusteredHighRPStage ***********\n"); 1136bdd1243dSDimitry Andric ScheduleMetrics MBefore = 1137bdd1243dSDimitry Andric getScheduleMetrics(DAG.SUnits); 1138bdd1243dSDimitry Andric LLVM_DEBUG( 1139bdd1243dSDimitry Andric dbgs() 1140bdd1243dSDimitry Andric << "\n *********** AFTER UnclusteredHighRPStage ***********\n"); 1141bdd1243dSDimitry Andric ScheduleMetrics MAfter = getScheduleMetrics(DAG); 1142bdd1243dSDimitry Andric unsigned OldMetric = MBefore.getMetric(); 1143bdd1243dSDimitry Andric unsigned NewMetric = MAfter.getMetric(); 1144bdd1243dSDimitry Andric unsigned WavesBefore = 1145bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST)); 1146bdd1243dSDimitry Andric unsigned Profit = 1147bdd1243dSDimitry Andric ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore * 1148bdd1243dSDimitry Andric ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) / 1149bdd1243dSDimitry Andric NewMetric) / 1150bdd1243dSDimitry Andric ScheduleMetrics::ScaleFactor; 1151bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after " 1152bdd1243dSDimitry Andric << MAfter << "Profit: " << Profit << "\n"); 1153bdd1243dSDimitry Andric return Profit < ScheduleMetrics::ScaleFactor; 1154972a253aSDimitry Andric } 1155972a253aSDimitry Andric 1156972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) { 1157bdd1243dSDimitry Andric if (PressureAfter == PressureBefore) 1158bdd1243dSDimitry Andric return false; 1159bdd1243dSDimitry Andric 1160972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1161972a253aSDimitry Andric return true; 1162972a253aSDimitry Andric 1163972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1164972a253aSDimitry Andric return true; 1165972a253aSDimitry Andric 1166972a253aSDimitry Andric return false; 1167972a253aSDimitry Andric } 1168972a253aSDimitry Andric 1169972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) { 1170972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1171972a253aSDimitry Andric return true; 1172972a253aSDimitry Andric 1173972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1174972a253aSDimitry Andric return true; 1175972a253aSDimitry Andric 1176972a253aSDimitry Andric return false; 1177972a253aSDimitry Andric } 1178972a253aSDimitry Andric 1179bdd1243dSDimitry Andric bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 1180bdd1243dSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1181bdd1243dSDimitry Andric return true; 1182bdd1243dSDimitry Andric 1183bdd1243dSDimitry Andric return false; 1184bdd1243dSDimitry Andric } 1185bdd1243dSDimitry Andric 1186972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { 1187972a253aSDimitry Andric if (WavesAfter <= MFI.getMinWavesPerEU() && 1188972a253aSDimitry Andric !PressureAfter.less(ST, PressureBefore) && 1189bdd1243dSDimitry Andric isRegionWithExcessRP()) { 1190972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 1191972a253aSDimitry Andric return true; 1192972a253aSDimitry Andric } 1193972a253aSDimitry Andric 1194972a253aSDimitry Andric return false; 1195972a253aSDimitry Andric } 1196972a253aSDimitry Andric 1197972a253aSDimitry Andric void GCNSchedStage::revertScheduling() { 1198972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 1199972a253aSDimitry Andric PressureBefore.getOccupancy(ST) == DAG.MinOccupancy; 1200972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 1201972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = 1202bdd1243dSDimitry Andric S.hasNextStage() && 1203bdd1243dSDimitry Andric S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule; 1204972a253aSDimitry Andric DAG.RegionEnd = DAG.RegionBegin; 1205972a253aSDimitry Andric int SkippedDebugInstr = 0; 1206972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 1207972a253aSDimitry Andric if (MI->isDebugInstr()) { 1208972a253aSDimitry Andric ++SkippedDebugInstr; 1209972a253aSDimitry Andric continue; 1210972a253aSDimitry Andric } 1211972a253aSDimitry Andric 1212972a253aSDimitry Andric if (MI->getIterator() != DAG.RegionEnd) { 1213972a253aSDimitry Andric DAG.BB->remove(MI); 1214972a253aSDimitry Andric DAG.BB->insert(DAG.RegionEnd, MI); 1215972a253aSDimitry Andric if (!MI->isDebugInstr()) 1216972a253aSDimitry Andric DAG.LIS->handleMove(*MI, true); 1217972a253aSDimitry Andric } 1218972a253aSDimitry Andric 1219972a253aSDimitry Andric // Reset read-undef flags and update them later. 1220*06c3fb27SDimitry Andric for (auto &Op : MI->all_defs()) 1221972a253aSDimitry Andric Op.setIsUndef(false); 1222972a253aSDimitry Andric RegisterOperands RegOpers; 1223972a253aSDimitry Andric RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false); 1224972a253aSDimitry Andric if (!MI->isDebugInstr()) { 1225972a253aSDimitry Andric if (DAG.ShouldTrackLaneMasks) { 1226972a253aSDimitry Andric // Adjust liveness and add missing dead+read-undef flags. 1227972a253aSDimitry Andric SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot(); 1228972a253aSDimitry Andric RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI); 1229972a253aSDimitry Andric } else { 1230972a253aSDimitry Andric // Adjust for missing dead-def flags. 1231972a253aSDimitry Andric RegOpers.detectDeadDefs(*MI, *DAG.LIS); 1232972a253aSDimitry Andric } 1233972a253aSDimitry Andric } 1234972a253aSDimitry Andric DAG.RegionEnd = MI->getIterator(); 1235972a253aSDimitry Andric ++DAG.RegionEnd; 1236972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 1237972a253aSDimitry Andric } 1238972a253aSDimitry Andric 1239972a253aSDimitry Andric // After reverting schedule, debug instrs will now be at the end of the block 1240972a253aSDimitry Andric // and RegionEnd will point to the first debug instr. Increment RegionEnd 1241972a253aSDimitry Andric // pass debug instrs to the actual end of the scheduling region. 1242972a253aSDimitry Andric while (SkippedDebugInstr-- > 0) 1243972a253aSDimitry Andric ++DAG.RegionEnd; 1244972a253aSDimitry Andric 1245972a253aSDimitry Andric // If Unsched.front() instruction is a debug instruction, this will actually 1246972a253aSDimitry Andric // shrink the region since we moved all debug instructions to the end of the 1247972a253aSDimitry Andric // block. Find the first instruction that is not a debug instruction. 1248972a253aSDimitry Andric DAG.RegionBegin = Unsched.front()->getIterator(); 1249972a253aSDimitry Andric if (DAG.RegionBegin->isDebugInstr()) { 1250972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 1251972a253aSDimitry Andric if (MI->isDebugInstr()) 1252972a253aSDimitry Andric continue; 1253972a253aSDimitry Andric DAG.RegionBegin = MI->getIterator(); 1254972a253aSDimitry Andric break; 1255972a253aSDimitry Andric } 1256972a253aSDimitry Andric } 1257972a253aSDimitry Andric 1258972a253aSDimitry Andric // Then move the debug instructions back into their correct place and set 1259972a253aSDimitry Andric // RegionBegin and RegionEnd if needed. 1260972a253aSDimitry Andric DAG.placeDebugValues(); 1261972a253aSDimitry Andric 1262bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); 1263972a253aSDimitry Andric } 1264972a253aSDimitry Andric 1265972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() { 1266972a253aSDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI); 1267972a253aSDimitry Andric for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) { 126881ad6265SDimitry Andric Register Reg = Register::index2VirtReg(I); 1269972a253aSDimitry Andric if (!DAG.LIS->hasInterval(Reg)) 127081ad6265SDimitry Andric continue; 127181ad6265SDimitry Andric 127281ad6265SDimitry Andric // TODO: Handle AGPR and SGPR rematerialization 1273972a253aSDimitry Andric if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) || 1274972a253aSDimitry Andric !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg)) 127581ad6265SDimitry Andric continue; 127681ad6265SDimitry Andric 1277972a253aSDimitry Andric MachineOperand *Op = DAG.MRI.getOneDef(Reg); 127881ad6265SDimitry Andric MachineInstr *Def = Op->getParent(); 1279fcaf7f86SDimitry Andric if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) 128081ad6265SDimitry Andric continue; 128181ad6265SDimitry Andric 1282972a253aSDimitry Andric MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg); 128381ad6265SDimitry Andric if (Def->getParent() == UseI->getParent()) 128481ad6265SDimitry Andric continue; 128581ad6265SDimitry Andric 128681ad6265SDimitry Andric // We are only collecting defs that are defined in another block and are 128781ad6265SDimitry Andric // live-through or used inside regions at MinOccupancy. This means that the 128881ad6265SDimitry Andric // register must be in the live-in set for the region. 128981ad6265SDimitry Andric bool AddedToRematList = false; 1290972a253aSDimitry Andric for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { 1291972a253aSDimitry Andric auto It = DAG.LiveIns[I].find(Reg); 1292972a253aSDimitry Andric if (It != DAG.LiveIns[I].end() && !It->second.none()) { 1293972a253aSDimitry Andric if (DAG.RegionsWithMinOcc[I]) { 129481ad6265SDimitry Andric RematerializableInsts[I][Def] = UseI; 129581ad6265SDimitry Andric AddedToRematList = true; 129681ad6265SDimitry Andric } 129781ad6265SDimitry Andric 129881ad6265SDimitry Andric // Collect regions with rematerializable reg as live-in to avoid 129981ad6265SDimitry Andric // searching later when updating RP. 130081ad6265SDimitry Andric RematDefToLiveInRegions[Def].push_back(I); 130181ad6265SDimitry Andric } 130281ad6265SDimitry Andric } 130381ad6265SDimitry Andric if (!AddedToRematList) 130481ad6265SDimitry Andric RematDefToLiveInRegions.erase(Def); 130581ad6265SDimitry Andric } 130681ad6265SDimitry Andric } 130781ad6265SDimitry Andric 1308972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, 130981ad6265SDimitry Andric const TargetInstrInfo *TII) { 131081ad6265SDimitry Andric // Temporary copies of cached variables we will be modifying and replacing if 131181ad6265SDimitry Andric // sinking succeeds. 131281ad6265SDimitry Andric SmallVector< 131381ad6265SDimitry Andric std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32> 131481ad6265SDimitry Andric NewRegions; 131581ad6265SDimitry Andric DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns; 131681ad6265SDimitry Andric DenseMap<unsigned, GCNRegPressure> NewPressure; 131781ad6265SDimitry Andric BitVector NewRescheduleRegions; 1318972a253aSDimitry Andric LiveIntervals *LIS = DAG.LIS; 131981ad6265SDimitry Andric 1320972a253aSDimitry Andric NewRegions.resize(DAG.Regions.size()); 1321972a253aSDimitry Andric NewRescheduleRegions.resize(DAG.Regions.size()); 132281ad6265SDimitry Andric 132381ad6265SDimitry Andric // Collect only regions that has a rematerializable def as a live-in. 132481ad6265SDimitry Andric SmallSet<unsigned, 16> ImpactedRegions; 132581ad6265SDimitry Andric for (const auto &It : RematDefToLiveInRegions) 132681ad6265SDimitry Andric ImpactedRegions.insert(It.second.begin(), It.second.end()); 132781ad6265SDimitry Andric 132881ad6265SDimitry Andric // Make copies of register pressure and live-ins cache that will be updated 132981ad6265SDimitry Andric // as we rematerialize. 133081ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1331972a253aSDimitry Andric NewPressure[Idx] = DAG.Pressure[Idx]; 1332972a253aSDimitry Andric NewLiveIns[Idx] = DAG.LiveIns[Idx]; 133381ad6265SDimitry Andric } 1334972a253aSDimitry Andric NewRegions = DAG.Regions; 133581ad6265SDimitry Andric NewRescheduleRegions.reset(); 133681ad6265SDimitry Andric 133781ad6265SDimitry Andric DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef; 133881ad6265SDimitry Andric bool Improved = false; 133981ad6265SDimitry Andric for (auto I : ImpactedRegions) { 1340972a253aSDimitry Andric if (!DAG.RegionsWithMinOcc[I]) 134181ad6265SDimitry Andric continue; 134281ad6265SDimitry Andric 134381ad6265SDimitry Andric Improved = false; 134481ad6265SDimitry Andric int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts()); 134581ad6265SDimitry Andric int SGPRUsage = NewPressure[I].getSGPRNum(); 134681ad6265SDimitry Andric 134781ad6265SDimitry Andric // TODO: Handle occupancy drop due to AGPR and SGPR. 134881ad6265SDimitry Andric // Check if cause of occupancy drop is due to VGPR usage and not SGPR. 1349972a253aSDimitry Andric if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy) 135081ad6265SDimitry Andric break; 135181ad6265SDimitry Andric 135281ad6265SDimitry Andric // The occupancy of this region could have been improved by a previous 135381ad6265SDimitry Andric // iteration's sinking of defs. 1354972a253aSDimitry Andric if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) { 135581ad6265SDimitry Andric NewRescheduleRegions[I] = true; 135681ad6265SDimitry Andric Improved = true; 135781ad6265SDimitry Andric continue; 135881ad6265SDimitry Andric } 135981ad6265SDimitry Andric 136081ad6265SDimitry Andric // First check if we have enough trivially rematerializable instructions to 136181ad6265SDimitry Andric // improve occupancy. Optimistically assume all instructions we are able to 136281ad6265SDimitry Andric // sink decreased RP. 136381ad6265SDimitry Andric int TotalSinkableRegs = 0; 136481ad6265SDimitry Andric for (const auto &It : RematerializableInsts[I]) { 136581ad6265SDimitry Andric MachineInstr *Def = It.first; 136681ad6265SDimitry Andric Register DefReg = Def->getOperand(0).getReg(); 136781ad6265SDimitry Andric TotalSinkableRegs += 136881ad6265SDimitry Andric SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]); 136981ad6265SDimitry Andric } 137081ad6265SDimitry Andric int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; 137181ad6265SDimitry Andric unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink); 137281ad6265SDimitry Andric // If in the most optimistic scenario, we cannot improve occupancy, then do 137381ad6265SDimitry Andric // not attempt to sink any instructions. 1374972a253aSDimitry Andric if (OptimisticOccupancy <= DAG.MinOccupancy) 137581ad6265SDimitry Andric break; 137681ad6265SDimitry Andric 137781ad6265SDimitry Andric unsigned ImproveOccupancy = 0; 137881ad6265SDimitry Andric SmallVector<MachineInstr *, 4> SinkedDefs; 137981ad6265SDimitry Andric for (auto &It : RematerializableInsts[I]) { 138081ad6265SDimitry Andric MachineInstr *Def = It.first; 138181ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos = 138281ad6265SDimitry Andric MachineBasicBlock::iterator(It.second); 138381ad6265SDimitry Andric Register Reg = Def->getOperand(0).getReg(); 138481ad6265SDimitry Andric // Rematerialize MI to its use block. Since we are only rematerializing 138581ad6265SDimitry Andric // instructions that do not have any virtual reg uses, we do not need to 138681ad6265SDimitry Andric // call LiveRangeEdit::allUsesAvailableAt() and 138781ad6265SDimitry Andric // LiveRangeEdit::canRematerializeAt(). 138881ad6265SDimitry Andric TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, 1389972a253aSDimitry Andric Def->getOperand(0).getSubReg(), *Def, *DAG.TRI); 1390bdd1243dSDimitry Andric MachineInstr *NewMI = &*std::prev(InsertPos); 139181ad6265SDimitry Andric LIS->InsertMachineInstrInMaps(*NewMI); 139281ad6265SDimitry Andric LIS->removeInterval(Reg); 139381ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 139481ad6265SDimitry Andric InsertedMIToOldDef[NewMI] = Def; 139581ad6265SDimitry Andric 139681ad6265SDimitry Andric // Update region boundaries in scheduling region we sinked from since we 139781ad6265SDimitry Andric // may sink an instruction that was at the beginning or end of its region 1398972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr, 139981ad6265SDimitry Andric /*Removing =*/true); 140081ad6265SDimitry Andric 140181ad6265SDimitry Andric // Update region boundaries in region we sinked to. 1402972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI); 140381ad6265SDimitry Andric 140481ad6265SDimitry Andric LaneBitmask PrevMask = NewLiveIns[I][Reg]; 140581ad6265SDimitry Andric // FIXME: Also update cached pressure for where the def was sinked from. 140681ad6265SDimitry Andric // Update RP for all regions that has this reg as a live-in and remove 140781ad6265SDimitry Andric // the reg from all regions as a live-in. 140881ad6265SDimitry Andric for (auto Idx : RematDefToLiveInRegions[Def]) { 140981ad6265SDimitry Andric NewLiveIns[Idx].erase(Reg); 1410972a253aSDimitry Andric if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) { 141181ad6265SDimitry Andric // Def is live-through and not used in this block. 1412972a253aSDimitry Andric NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI); 141381ad6265SDimitry Andric } else { 141481ad6265SDimitry Andric // Def is used and rematerialized into this block. 141581ad6265SDimitry Andric GCNDownwardRPTracker RPT(*LIS); 141681ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward( 141781ad6265SDimitry Andric NewRegions[Idx].first, NewRegions[Idx].second); 141881ad6265SDimitry Andric RPT.reset(*NonDbgMI, &NewLiveIns[Idx]); 141981ad6265SDimitry Andric RPT.advance(NewRegions[Idx].second); 142081ad6265SDimitry Andric NewPressure[Idx] = RPT.moveMaxPressure(); 142181ad6265SDimitry Andric } 142281ad6265SDimitry Andric } 142381ad6265SDimitry Andric 142481ad6265SDimitry Andric SinkedDefs.push_back(Def); 142581ad6265SDimitry Andric ImproveOccupancy = NewPressure[I].getOccupancy(ST); 1426972a253aSDimitry Andric if (ImproveOccupancy > DAG.MinOccupancy) 142781ad6265SDimitry Andric break; 142881ad6265SDimitry Andric } 142981ad6265SDimitry Andric 143081ad6265SDimitry Andric // Remove defs we just sinked from all regions' list of sinkable defs 143181ad6265SDimitry Andric for (auto &Def : SinkedDefs) 143281ad6265SDimitry Andric for (auto TrackedIdx : RematDefToLiveInRegions[Def]) 143381ad6265SDimitry Andric RematerializableInsts[TrackedIdx].erase(Def); 143481ad6265SDimitry Andric 1435972a253aSDimitry Andric if (ImproveOccupancy <= DAG.MinOccupancy) 143681ad6265SDimitry Andric break; 143781ad6265SDimitry Andric 143881ad6265SDimitry Andric NewRescheduleRegions[I] = true; 143981ad6265SDimitry Andric Improved = true; 144081ad6265SDimitry Andric } 144181ad6265SDimitry Andric 144281ad6265SDimitry Andric if (!Improved) { 144381ad6265SDimitry Andric // Occupancy was not improved for all regions that were at MinOccupancy. 144481ad6265SDimitry Andric // Undo sinking and remove newly rematerialized instructions. 144581ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 144681ad6265SDimitry Andric MachineInstr *MI = Entry.first; 144781ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 144881ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 144981ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI); 145081ad6265SDimitry Andric MI->eraseFromParent(); 145181ad6265SDimitry Andric OldMI->clearRegisterDeads(Reg); 145281ad6265SDimitry Andric LIS->removeInterval(Reg); 145381ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 145481ad6265SDimitry Andric } 145581ad6265SDimitry Andric return false; 145681ad6265SDimitry Andric } 145781ad6265SDimitry Andric 145881ad6265SDimitry Andric // Occupancy was improved for all regions. 145981ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 146081ad6265SDimitry Andric MachineInstr *MI = Entry.first; 146181ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 146281ad6265SDimitry Andric 146381ad6265SDimitry Andric // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. 1464972a253aSDimitry Andric DAG.BBLiveInMap.erase(OldMI); 146581ad6265SDimitry Andric 146681ad6265SDimitry Andric // Remove OldMI and update LIS 146781ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 146881ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*OldMI); 146981ad6265SDimitry Andric OldMI->eraseFromParent(); 147081ad6265SDimitry Andric LIS->removeInterval(Reg); 147181ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 147281ad6265SDimitry Andric } 147381ad6265SDimitry Andric 147481ad6265SDimitry Andric // Update live-ins, register pressure, and regions caches. 147581ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1476972a253aSDimitry Andric DAG.LiveIns[Idx] = NewLiveIns[Idx]; 1477972a253aSDimitry Andric DAG.Pressure[Idx] = NewPressure[Idx]; 1478972a253aSDimitry Andric DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); 147981ad6265SDimitry Andric } 1480972a253aSDimitry Andric DAG.Regions = NewRegions; 1481972a253aSDimitry Andric DAG.RescheduleRegions = NewRescheduleRegions; 148281ad6265SDimitry Andric 148381ad6265SDimitry Andric SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1484972a253aSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 148581ad6265SDimitry Andric 148681ad6265SDimitry Andric return true; 148781ad6265SDimitry Andric } 148881ad6265SDimitry Andric 148981ad6265SDimitry Andric // Copied from MachineLICM 1490972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) { 1491972a253aSDimitry Andric if (!DAG.TII->isTriviallyReMaterializable(MI)) 149281ad6265SDimitry Andric return false; 149381ad6265SDimitry Andric 1494*06c3fb27SDimitry Andric for (const MachineOperand &MO : MI.all_uses()) 1495*06c3fb27SDimitry Andric if (MO.getReg().isVirtual()) 149681ad6265SDimitry Andric return false; 149781ad6265SDimitry Andric 149881ad6265SDimitry Andric return true; 149981ad6265SDimitry Andric } 150081ad6265SDimitry Andric 150181ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region. 150281ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front 150381ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only 150481ad6265SDimitry Andric // ever be inserting before an already existing MI. 150581ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries( 150681ad6265SDimitry Andric SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 150781ad6265SDimitry Andric MachineBasicBlock::iterator>> &RegionBoundaries, 150881ad6265SDimitry Andric MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) { 150981ad6265SDimitry Andric unsigned I = 0, E = RegionBoundaries.size(); 151081ad6265SDimitry Andric // Search for first region of the block where MI is located 151181ad6265SDimitry Andric while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) 151281ad6265SDimitry Andric ++I; 151381ad6265SDimitry Andric 151481ad6265SDimitry Andric for (; I != E; ++I) { 151581ad6265SDimitry Andric if (MI->getParent() != RegionBoundaries[I].first->getParent()) 151681ad6265SDimitry Andric return; 151781ad6265SDimitry Andric 151881ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].first && 151981ad6265SDimitry Andric MI == RegionBoundaries[I].second) { 152081ad6265SDimitry Andric // MI is in a region with size 1, after removing, the region will be 152181ad6265SDimitry Andric // size 0, set RegionBegin and RegionEnd to pass end of block iterator. 152281ad6265SDimitry Andric RegionBoundaries[I] = 1523bdd1243dSDimitry Andric std::pair(MI->getParent()->end(), MI->getParent()->end()); 152481ad6265SDimitry Andric return; 152581ad6265SDimitry Andric } 152681ad6265SDimitry Andric if (MI == RegionBoundaries[I].first) { 152781ad6265SDimitry Andric if (Removing) 152881ad6265SDimitry Andric RegionBoundaries[I] = 1529bdd1243dSDimitry Andric std::pair(std::next(MI), RegionBoundaries[I].second); 153081ad6265SDimitry Andric else 153181ad6265SDimitry Andric // Inserted NewMI in front of region, set new RegionBegin to NewMI 1532bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI), 153381ad6265SDimitry Andric RegionBoundaries[I].second); 153481ad6265SDimitry Andric return; 153581ad6265SDimitry Andric } 153681ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].second) { 1537bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI)); 153881ad6265SDimitry Andric return; 153981ad6265SDimitry Andric } 154081ad6265SDimitry Andric } 154181ad6265SDimitry Andric } 1542bdd1243dSDimitry Andric 1543bdd1243dSDimitry Andric static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) { 1544bdd1243dSDimitry Andric return std::any_of( 1545bdd1243dSDimitry Andric DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) { 1546bdd1243dSDimitry Andric unsigned Opc = MI->getOpcode(); 1547bdd1243dSDimitry Andric return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; 1548bdd1243dSDimitry Andric }); 1549bdd1243dSDimitry Andric } 1550bdd1243dSDimitry Andric 1551bdd1243dSDimitry Andric GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive( 1552bdd1243dSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 1553bdd1243dSDimitry Andric bool RemoveKillFlags) 1554bdd1243dSDimitry Andric : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {} 1555bdd1243dSDimitry Andric 1556bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::schedule() { 1557bdd1243dSDimitry Andric HasIGLPInstrs = hasIGLPInstrs(this); 1558bdd1243dSDimitry Andric if (HasIGLPInstrs) { 1559bdd1243dSDimitry Andric SavedMutations.clear(); 1560bdd1243dSDimitry Andric SavedMutations.swap(Mutations); 1561bdd1243dSDimitry Andric addMutation(createIGroupLPDAGMutation()); 1562bdd1243dSDimitry Andric } 1563bdd1243dSDimitry Andric 1564bdd1243dSDimitry Andric ScheduleDAGMI::schedule(); 1565bdd1243dSDimitry Andric } 1566bdd1243dSDimitry Andric 1567bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::finalizeSchedule() { 1568bdd1243dSDimitry Andric if (HasIGLPInstrs) 1569bdd1243dSDimitry Andric SavedMutations.swap(Mutations); 1570bdd1243dSDimitry Andric 1571bdd1243dSDimitry Andric ScheduleDAGMI::finalizeSchedule(); 1572bdd1243dSDimitry Andric } 1573