10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave 110b57cec5SDimitry Andric /// occupancy on GCN hardware. 12972a253aSDimitry Andric /// 13972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function. 14972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual 15972a253aSDimitry Andric /// entry point for the scheduling of those regions is 16972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages. 17972a253aSDimitry Andric 18972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account 19972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy. Usually, only a 20972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit 21972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in 22972a253aSDimitry Andric /// other regions. 23972a253aSDimitry Andric /// 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #include "GCNSchedStrategy.h" 27bdd1243dSDimitry Andric #include "AMDGPUIGroupLP.h" 280b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 2981ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric using namespace llvm; 340b57cec5SDimitry Andric 35*5f757f3fSDimitry Andric static cl::opt<bool> DisableUnclusterHighRP( 36*5f757f3fSDimitry Andric "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, 37*5f757f3fSDimitry Andric cl::desc("Disable unclustered high register pressure " 38bdd1243dSDimitry Andric "reduction scheduling stage."), 39bdd1243dSDimitry Andric cl::init(false)); 40*5f757f3fSDimitry Andric 41*5f757f3fSDimitry Andric static cl::opt<bool> DisableClusteredLowOccupancy( 42*5f757f3fSDimitry Andric "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, 43*5f757f3fSDimitry Andric cl::desc("Disable clustered low occupancy " 44*5f757f3fSDimitry Andric "rescheduling for ILP scheduling stage."), 45*5f757f3fSDimitry Andric cl::init(false)); 46*5f757f3fSDimitry Andric 47bdd1243dSDimitry Andric static cl::opt<unsigned> ScheduleMetricBias( 48bdd1243dSDimitry Andric "amdgpu-schedule-metric-bias", cl::Hidden, 49bdd1243dSDimitry Andric cl::desc( 50bdd1243dSDimitry Andric "Sets the bias which adds weight to occupancy vs latency. Set it to " 51bdd1243dSDimitry Andric "100 to chase the occupancy only."), 52bdd1243dSDimitry Andric cl::init(10)); 530b57cec5SDimitry Andric 5406c3fb27SDimitry Andric static cl::opt<bool> 5506c3fb27SDimitry Andric RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, 5606c3fb27SDimitry Andric cl::desc("Relax occupancy targets for kernels which are memory " 5706c3fb27SDimitry Andric "bound (amdgpu-membound-threshold), or " 5806c3fb27SDimitry Andric "Wave Limited (amdgpu-limit-wave-threshold)."), 5906c3fb27SDimitry Andric cl::init(false)); 6006c3fb27SDimitry Andric 61bdd1243dSDimitry Andric const unsigned ScheduleMetrics::ScaleFactor = 100; 62bdd1243dSDimitry Andric 63bdd1243dSDimitry Andric GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) 64bdd1243dSDimitry Andric : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), 65bdd1243dSDimitry Andric HasHighPressure(false) {} 66bdd1243dSDimitry Andric 67bdd1243dSDimitry Andric void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { 680b57cec5SDimitry Andric GenericScheduler::initialize(DAG); 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric MF = &DAG->MF; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 730b57cec5SDimitry Andric 74349cc55cSDimitry Andric SGPRExcessLimit = 75349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 76349cc55cSDimitry Andric VGPRExcessLimit = 77349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 780b57cec5SDimitry Andric 79349cc55cSDimitry Andric SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 80349cc55cSDimitry Andric // Set the initial TargetOccupnacy to the maximum occupancy that we can 81349cc55cSDimitry Andric // achieve for this function. This effectively sets a lower bound on the 82349cc55cSDimitry Andric // 'Critical' register limits in the scheduler. 8306c3fb27SDimitry Andric // Allow for lower occupancy targets if kernel is wave limited or memory 8406c3fb27SDimitry Andric // bound, and using the relaxed occupancy feature. 8506c3fb27SDimitry Andric TargetOccupancy = 8606c3fb27SDimitry Andric RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy(); 87349cc55cSDimitry Andric SGPRCriticalLimit = 88349cc55cSDimitry Andric std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 89bdd1243dSDimitry Andric 90bdd1243dSDimitry Andric if (!KnownExcessRP) { 91349cc55cSDimitry Andric VGPRCriticalLimit = 92349cc55cSDimitry Andric std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 93bdd1243dSDimitry Andric } else { 94bdd1243dSDimitry Andric // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except 95bdd1243dSDimitry Andric // returns a reasonably small number for targets with lots of VGPRs, such 96bdd1243dSDimitry Andric // as GFX10 and GFX11. 97bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative " 98bdd1243dSDimitry Andric "VGPRCriticalLimit calculation method.\n"); 99349cc55cSDimitry Andric 100bdd1243dSDimitry Andric unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST); 101bdd1243dSDimitry Andric unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST); 102bdd1243dSDimitry Andric unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule); 103bdd1243dSDimitry Andric VGPRBudget = std::max(VGPRBudget, Granule); 104bdd1243dSDimitry Andric VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 107bdd1243dSDimitry Andric // Subtract error margin and bias from register limits and avoid overflow. 108bdd1243dSDimitry Andric SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit); 109bdd1243dSDimitry Andric VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit); 110bdd1243dSDimitry Andric SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit); 111bdd1243dSDimitry Andric VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit); 112bdd1243dSDimitry Andric 113bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit 114bdd1243dSDimitry Andric << ", VGPRExcessLimit = " << VGPRExcessLimit 115bdd1243dSDimitry Andric << ", SGPRCriticalLimit = " << SGPRCriticalLimit 116bdd1243dSDimitry Andric << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n"); 117bdd1243dSDimitry Andric } 118bdd1243dSDimitry Andric 119bdd1243dSDimitry Andric void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 120bdd1243dSDimitry Andric bool AtTop, 121bdd1243dSDimitry Andric const RegPressureTracker &RPTracker, 1220b57cec5SDimitry Andric const SIRegisterInfo *SRI, 1230b57cec5SDimitry Andric unsigned SGPRPressure, 1240b57cec5SDimitry Andric unsigned VGPRPressure) { 1250b57cec5SDimitry Andric Cand.SU = SU; 1260b57cec5SDimitry Andric Cand.AtTop = AtTop; 1270b57cec5SDimitry Andric 128bdd1243dSDimitry Andric if (!DAG->isTrackingPressure()) 129bdd1243dSDimitry Andric return; 130bdd1243dSDimitry Andric 1310b57cec5SDimitry Andric // getDownwardPressure() and getUpwardPressure() make temporary changes to 1320b57cec5SDimitry Andric // the tracker, so we need to pass those function a non-const copy. 1330b57cec5SDimitry Andric RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1340b57cec5SDimitry Andric 1358bcb0991SDimitry Andric Pressure.clear(); 1368bcb0991SDimitry Andric MaxPressure.clear(); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric if (AtTop) 1390b57cec5SDimitry Andric TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 1400b57cec5SDimitry Andric else { 1410b57cec5SDimitry Andric // FIXME: I think for bottom up scheduling, the register pressure is cached 1420b57cec5SDimitry Andric // and can be retrieved by DAG->getPressureDif(SU). 1430b57cec5SDimitry Andric TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1465ffd83dbSDimitry Andric unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 1475ffd83dbSDimitry Andric unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // If two instructions increase the pressure of different register sets 1500b57cec5SDimitry Andric // by the same amount, the generic scheduler will prefer to schedule the 1510b57cec5SDimitry Andric // instruction that increases the set with the least amount of registers, 1520b57cec5SDimitry Andric // which in our case would be SGPRs. This is rarely what we want, so 1530b57cec5SDimitry Andric // when we report excess/critical register pressure, we do it either 1540b57cec5SDimitry Andric // only for VGPRs or only for SGPRs. 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 1570b57cec5SDimitry Andric const unsigned MaxVGPRPressureInc = 16; 1580b57cec5SDimitry Andric bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 1590b57cec5SDimitry Andric bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 1630b57cec5SDimitry Andric // to increase the likelihood we don't go over the limits. We should improve 1640b57cec5SDimitry Andric // the analysis to look through dependencies to find the path with the least 1650b57cec5SDimitry Andric // register pressure. 1660b57cec5SDimitry Andric 1678bcb0991SDimitry Andric // We only need to update the RPDelta for instructions that increase register 1688bcb0991SDimitry Andric // pressure. Instructions that decrease or keep reg pressure the same will be 1698bcb0991SDimitry Andric // marked as RegExcess in tryCandidate() when they are compared with 1708bcb0991SDimitry Andric // instructions that increase the register pressure. 1710b57cec5SDimitry Andric if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 172bdd1243dSDimitry Andric HasHighPressure = true; 1735ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 1740b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 178bdd1243dSDimitry Andric HasHighPressure = true; 1795ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 1800b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric // Register pressure is considered 'CRITICAL' if it is approaching a value 1840b57cec5SDimitry Andric // that would reduce the wave occupancy for the execution unit. When 185349cc55cSDimitry Andric // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 1860b57cec5SDimitry Andric // has the same cost, so we don't need to prefer one over the other. 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 1890b57cec5SDimitry Andric int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric if (SGPRDelta >= 0 || VGPRDelta >= 0) { 192bdd1243dSDimitry Andric HasHighPressure = true; 1930b57cec5SDimitry Andric if (SGPRDelta > VGPRDelta) { 1945ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 1955ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 1960b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 1970b57cec5SDimitry Andric } else { 1985ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 1995ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 2000b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric // This function is mostly cut and pasted from 2060b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue() 207bdd1243dSDimitry Andric void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 2080b57cec5SDimitry Andric const CandPolicy &ZonePolicy, 2090b57cec5SDimitry Andric const RegPressureTracker &RPTracker, 2100b57cec5SDimitry Andric SchedCandidate &Cand) { 2110b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 2120b57cec5SDimitry Andric ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 213bdd1243dSDimitry Andric unsigned SGPRPressure = 0; 214bdd1243dSDimitry Andric unsigned VGPRPressure = 0; 215bdd1243dSDimitry Andric if (DAG->isTrackingPressure()) { 216bdd1243dSDimitry Andric SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 217bdd1243dSDimitry Andric VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 218bdd1243dSDimitry Andric } 2190b57cec5SDimitry Andric ReadyQueue &Q = Zone.Available; 2200b57cec5SDimitry Andric for (SUnit *SU : Q) { 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric SchedCandidate TryCand(ZonePolicy); 2230b57cec5SDimitry Andric initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 2240b57cec5SDimitry Andric SGPRPressure, VGPRPressure); 2250b57cec5SDimitry Andric // Pass SchedBoundary only when comparing nodes from the same boundary. 2260b57cec5SDimitry Andric SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 227bdd1243dSDimitry Andric tryCandidate(Cand, TryCand, ZoneArg); 2280b57cec5SDimitry Andric if (TryCand.Reason != NoCand) { 2290b57cec5SDimitry Andric // Initialize resource delta if needed in case future heuristics query it. 2300b57cec5SDimitry Andric if (TryCand.ResDelta == SchedResourceDelta()) 2310b57cec5SDimitry Andric TryCand.initResourceDelta(Zone.DAG, SchedModel); 2320b57cec5SDimitry Andric Cand.setBest(TryCand); 2338bcb0991SDimitry Andric LLVM_DEBUG(traceCandidate(Cand)); 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric // This function is mostly cut and pasted from 2390b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional() 240bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 2410b57cec5SDimitry Andric // Schedule as far as possible in the direction of no choice. This is most 2420b57cec5SDimitry Andric // efficient, but also provides the best heuristics for CriticalPSets. 2430b57cec5SDimitry Andric if (SUnit *SU = Bot.pickOnlyChoice()) { 2440b57cec5SDimitry Andric IsTopNode = false; 2450b57cec5SDimitry Andric return SU; 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric if (SUnit *SU = Top.pickOnlyChoice()) { 2480b57cec5SDimitry Andric IsTopNode = true; 2490b57cec5SDimitry Andric return SU; 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric // Set the bottom-up policy based on the state of the current bottom zone and 2520b57cec5SDimitry Andric // the instructions outside the zone, including the top zone. 2530b57cec5SDimitry Andric CandPolicy BotPolicy; 2540b57cec5SDimitry Andric setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 2550b57cec5SDimitry Andric // Set the top-down policy based on the state of the current top zone and 2560b57cec5SDimitry Andric // the instructions outside the zone, including the bottom zone. 2570b57cec5SDimitry Andric CandPolicy TopPolicy; 2580b57cec5SDimitry Andric setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric // See if BotCand is still valid (because we previously scheduled from Top). 2610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 2620b57cec5SDimitry Andric if (!BotCand.isValid() || BotCand.SU->isScheduled || 2630b57cec5SDimitry Andric BotCand.Policy != BotPolicy) { 2640b57cec5SDimitry Andric BotCand.reset(CandPolicy()); 2650b57cec5SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 2660b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2670b57cec5SDimitry Andric } else { 2680b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(BotCand)); 2698bcb0991SDimitry Andric #ifndef NDEBUG 2708bcb0991SDimitry Andric if (VerifyScheduling) { 2718bcb0991SDimitry Andric SchedCandidate TCand; 2728bcb0991SDimitry Andric TCand.reset(CandPolicy()); 2738bcb0991SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 2748bcb0991SDimitry Andric assert(TCand.SU == BotCand.SU && 2758bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 2768bcb0991SDimitry Andric } 2778bcb0991SDimitry Andric #endif 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric // Check if the top Q has a better candidate. 2810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 2820b57cec5SDimitry Andric if (!TopCand.isValid() || TopCand.SU->isScheduled || 2830b57cec5SDimitry Andric TopCand.Policy != TopPolicy) { 2840b57cec5SDimitry Andric TopCand.reset(CandPolicy()); 2850b57cec5SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 2860b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2870b57cec5SDimitry Andric } else { 2880b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(TopCand)); 2898bcb0991SDimitry Andric #ifndef NDEBUG 2908bcb0991SDimitry Andric if (VerifyScheduling) { 2918bcb0991SDimitry Andric SchedCandidate TCand; 2928bcb0991SDimitry Andric TCand.reset(CandPolicy()); 2938bcb0991SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 2948bcb0991SDimitry Andric assert(TCand.SU == TopCand.SU && 2958bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 2968bcb0991SDimitry Andric } 2978bcb0991SDimitry Andric #endif 2980b57cec5SDimitry Andric } 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric // Pick best from BotCand and TopCand. 3010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 3020b57cec5SDimitry Andric dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 3035ffd83dbSDimitry Andric SchedCandidate Cand = BotCand; 3040b57cec5SDimitry Andric TopCand.Reason = NoCand; 305bdd1243dSDimitry Andric tryCandidate(Cand, TopCand, nullptr); 3060b57cec5SDimitry Andric if (TopCand.Reason != NoCand) { 3070b57cec5SDimitry Andric Cand.setBest(TopCand); 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric IsTopNode = Cand.AtTop; 3120b57cec5SDimitry Andric return Cand.SU; 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric // This function is mostly cut and pasted from 3160b57cec5SDimitry Andric // GenericScheduler::pickNode() 317bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { 3180b57cec5SDimitry Andric if (DAG->top() == DAG->bottom()) { 3190b57cec5SDimitry Andric assert(Top.Available.empty() && Top.Pending.empty() && 3200b57cec5SDimitry Andric Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 3210b57cec5SDimitry Andric return nullptr; 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric SUnit *SU; 3240b57cec5SDimitry Andric do { 3250b57cec5SDimitry Andric if (RegionPolicy.OnlyTopDown) { 3260b57cec5SDimitry Andric SU = Top.pickOnlyChoice(); 3270b57cec5SDimitry Andric if (!SU) { 3280b57cec5SDimitry Andric CandPolicy NoPolicy; 3290b57cec5SDimitry Andric TopCand.reset(NoPolicy); 3300b57cec5SDimitry Andric pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 3310b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3320b57cec5SDimitry Andric SU = TopCand.SU; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric IsTopNode = true; 3350b57cec5SDimitry Andric } else if (RegionPolicy.OnlyBottomUp) { 3360b57cec5SDimitry Andric SU = Bot.pickOnlyChoice(); 3370b57cec5SDimitry Andric if (!SU) { 3380b57cec5SDimitry Andric CandPolicy NoPolicy; 3390b57cec5SDimitry Andric BotCand.reset(NoPolicy); 3400b57cec5SDimitry Andric pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 3410b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find a candidate"); 3420b57cec5SDimitry Andric SU = BotCand.SU; 3430b57cec5SDimitry Andric } 3440b57cec5SDimitry Andric IsTopNode = false; 3450b57cec5SDimitry Andric } else { 3460b57cec5SDimitry Andric SU = pickNodeBidirectional(IsTopNode); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric } while (SU->isScheduled); 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric if (SU->isTopReady()) 3510b57cec5SDimitry Andric Top.removeReady(SU); 3520b57cec5SDimitry Andric if (SU->isBottomReady()) 3530b57cec5SDimitry Andric Bot.removeReady(SU); 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3560b57cec5SDimitry Andric << *SU->getInstr()); 3570b57cec5SDimitry Andric return SU; 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 360bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getCurrentStage() { 361bdd1243dSDimitry Andric assert(CurrentStage && CurrentStage != SchedStages.end()); 362bdd1243dSDimitry Andric return *CurrentStage; 363bdd1243dSDimitry Andric } 364bdd1243dSDimitry Andric 365bdd1243dSDimitry Andric bool GCNSchedStrategy::advanceStage() { 366bdd1243dSDimitry Andric assert(CurrentStage != SchedStages.end()); 367bdd1243dSDimitry Andric if (!CurrentStage) 368bdd1243dSDimitry Andric CurrentStage = SchedStages.begin(); 369bdd1243dSDimitry Andric else 370bdd1243dSDimitry Andric CurrentStage++; 371bdd1243dSDimitry Andric 372bdd1243dSDimitry Andric return CurrentStage != SchedStages.end(); 373bdd1243dSDimitry Andric } 374bdd1243dSDimitry Andric 375bdd1243dSDimitry Andric bool GCNSchedStrategy::hasNextStage() const { 376bdd1243dSDimitry Andric assert(CurrentStage); 377bdd1243dSDimitry Andric return std::next(CurrentStage) != SchedStages.end(); 378bdd1243dSDimitry Andric } 379bdd1243dSDimitry Andric 380bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getNextStage() const { 381bdd1243dSDimitry Andric assert(CurrentStage && std::next(CurrentStage) != SchedStages.end()); 382bdd1243dSDimitry Andric return *std::next(CurrentStage); 383bdd1243dSDimitry Andric } 384bdd1243dSDimitry Andric 385bdd1243dSDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 386bdd1243dSDimitry Andric const MachineSchedContext *C) 387bdd1243dSDimitry Andric : GCNSchedStrategy(C) { 388bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); 389bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); 390bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); 391bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::PreRARematerialize); 392bdd1243dSDimitry Andric } 393bdd1243dSDimitry Andric 394bdd1243dSDimitry Andric GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C) 395bdd1243dSDimitry Andric : GCNSchedStrategy(C) { 396bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule); 397bdd1243dSDimitry Andric } 398bdd1243dSDimitry Andric 399bdd1243dSDimitry Andric bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand, 400bdd1243dSDimitry Andric SchedCandidate &TryCand, 401bdd1243dSDimitry Andric SchedBoundary *Zone) const { 402bdd1243dSDimitry Andric // Initialize the candidate if needed. 403bdd1243dSDimitry Andric if (!Cand.isValid()) { 404bdd1243dSDimitry Andric TryCand.Reason = NodeOrder; 405bdd1243dSDimitry Andric return true; 406bdd1243dSDimitry Andric } 407bdd1243dSDimitry Andric 408bdd1243dSDimitry Andric // Avoid spilling by exceeding the register limit. 409bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 410bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, 411bdd1243dSDimitry Andric RegExcess, TRI, DAG->MF)) 412bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 413bdd1243dSDimitry Andric 414bdd1243dSDimitry Andric // Bias PhysReg Defs and copies to their uses and defined respectively. 415bdd1243dSDimitry Andric if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), 416bdd1243dSDimitry Andric biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) 417bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 418bdd1243dSDimitry Andric 419bdd1243dSDimitry Andric bool SameBoundary = Zone != nullptr; 420bdd1243dSDimitry Andric if (SameBoundary) { 421bdd1243dSDimitry Andric // Prioritize instructions that read unbuffered resources by stall cycles. 422bdd1243dSDimitry Andric if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 423bdd1243dSDimitry Andric Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 424bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 425bdd1243dSDimitry Andric 426bdd1243dSDimitry Andric // Avoid critical resource consumption and balance the schedule. 427bdd1243dSDimitry Andric TryCand.initResourceDelta(DAG, SchedModel); 428bdd1243dSDimitry Andric if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 429bdd1243dSDimitry Andric TryCand, Cand, ResourceReduce)) 430bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 431bdd1243dSDimitry Andric if (tryGreater(TryCand.ResDelta.DemandedResources, 432bdd1243dSDimitry Andric Cand.ResDelta.DemandedResources, TryCand, Cand, 433bdd1243dSDimitry Andric ResourceDemand)) 434bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 435bdd1243dSDimitry Andric 436bdd1243dSDimitry Andric // Unconditionally try to reduce latency. 437bdd1243dSDimitry Andric if (tryLatency(TryCand, Cand, *Zone)) 438bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 439bdd1243dSDimitry Andric 440bdd1243dSDimitry Andric // Weak edges are for clustering and other constraints. 441bdd1243dSDimitry Andric if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 442bdd1243dSDimitry Andric getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) 443bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 444bdd1243dSDimitry Andric } 445bdd1243dSDimitry Andric 446bdd1243dSDimitry Andric // Keep clustered nodes together to encourage downstream peephole 447bdd1243dSDimitry Andric // optimizations which may reduce resource requirements. 448bdd1243dSDimitry Andric // 449bdd1243dSDimitry Andric // This is a best effort to set things up for a post-RA pass. Optimizations 450bdd1243dSDimitry Andric // like generating loads of multiple registers should ideally be done within 451bdd1243dSDimitry Andric // the scheduler pass by combining the loads during DAG postprocessing. 452bdd1243dSDimitry Andric const SUnit *CandNextClusterSU = 453bdd1243dSDimitry Andric Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 454bdd1243dSDimitry Andric const SUnit *TryCandNextClusterSU = 455bdd1243dSDimitry Andric TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 456bdd1243dSDimitry Andric if (tryGreater(TryCand.SU == TryCandNextClusterSU, 457bdd1243dSDimitry Andric Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) 458bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 459bdd1243dSDimitry Andric 460bdd1243dSDimitry Andric // Avoid increasing the max critical pressure in the scheduled region. 461bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 462bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, 463bdd1243dSDimitry Andric TryCand, Cand, RegCritical, TRI, DAG->MF)) 464bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 465bdd1243dSDimitry Andric 466bdd1243dSDimitry Andric // Avoid increasing the max pressure of the entire region. 467bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 468bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, 469bdd1243dSDimitry Andric Cand, RegMax, TRI, DAG->MF)) 470bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 471bdd1243dSDimitry Andric 472bdd1243dSDimitry Andric if (SameBoundary) { 473bdd1243dSDimitry Andric // Fall through to original instruction order. 474bdd1243dSDimitry Andric if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || 475bdd1243dSDimitry Andric (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 476bdd1243dSDimitry Andric TryCand.Reason = NodeOrder; 477bdd1243dSDimitry Andric return true; 478bdd1243dSDimitry Andric } 479bdd1243dSDimitry Andric } 480bdd1243dSDimitry Andric return false; 481bdd1243dSDimitry Andric } 482bdd1243dSDimitry Andric 483972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive( 484972a253aSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) 485972a253aSDimitry Andric : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()), 4860b57cec5SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()), 487972a253aSDimitry Andric StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) { 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 49006c3fb27SDimitry Andric if (RelaxedOcc) { 49106c3fb27SDimitry Andric MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy); 49206c3fb27SDimitry Andric if (MinOccupancy != StartingOccupancy) 49306c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy 49406c3fb27SDimitry Andric << ".\n"); 49506c3fb27SDimitry Andric } 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric 498bdd1243dSDimitry Andric std::unique_ptr<GCNSchedStage> 499bdd1243dSDimitry Andric GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { 500bdd1243dSDimitry Andric switch (SchedStageID) { 501bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule: 502bdd1243dSDimitry Andric return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this); 503bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule: 504bdd1243dSDimitry Andric return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this); 505bdd1243dSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 506bdd1243dSDimitry Andric return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this); 507bdd1243dSDimitry Andric case GCNSchedStageID::PreRARematerialize: 508bdd1243dSDimitry Andric return std::make_unique<PreRARematStage>(SchedStageID, *this); 509bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule: 510bdd1243dSDimitry Andric return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this); 511bdd1243dSDimitry Andric } 512bdd1243dSDimitry Andric 513bdd1243dSDimitry Andric llvm_unreachable("Unknown SchedStageID."); 514bdd1243dSDimitry Andric } 515bdd1243dSDimitry Andric 5160b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() { 517972a253aSDimitry Andric // Collect all scheduling regions. The actual scheduling is performed in 518972a253aSDimitry Andric // GCNScheduleDAGMILive::finalizeSchedule. 519bdd1243dSDimitry Andric Regions.push_back(std::pair(RegionBegin, RegionEnd)); 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric 522972a253aSDimitry Andric GCNRegPressure 523972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { 5240b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 5250b57cec5SDimitry Andric RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 5260b57cec5SDimitry Andric return RPTracker.moveMaxPressure(); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 529972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, 530972a253aSDimitry Andric const MachineBasicBlock *MBB) { 5310b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric // If the block has the only successor then live-ins of that successor are 5340b57cec5SDimitry Andric // live-outs of the current block. We can reuse calculated live set if the 5350b57cec5SDimitry Andric // successor will be sent to scheduling past current block. 53606c3fb27SDimitry Andric 53706c3fb27SDimitry Andric // However, due to the bug in LiveInterval analysis it may happen that two 53806c3fb27SDimitry Andric // predecessors of the same successor block have different lane bitmasks for 53906c3fb27SDimitry Andric // a live-out register. Workaround that by sticking to one-to-one relationship 54006c3fb27SDimitry Andric // i.e. one predecessor with one successor block. 5410b57cec5SDimitry Andric const MachineBasicBlock *OnlySucc = nullptr; 54206c3fb27SDimitry Andric if (MBB->succ_size() == 1) { 54306c3fb27SDimitry Andric auto *Candidate = *MBB->succ_begin(); 54406c3fb27SDimitry Andric if (!Candidate->empty() && Candidate->pred_size() == 1) { 5450b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes(); 54606c3fb27SDimitry Andric if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate)) 54706c3fb27SDimitry Andric OnlySucc = Candidate; 54806c3fb27SDimitry Andric } 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric // Scheduler sends regions from the end of the block upwards. 5520b57cec5SDimitry Andric size_t CurRegion = RegionIdx; 5530b57cec5SDimitry Andric for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 5540b57cec5SDimitry Andric if (Regions[CurRegion].first->getParent() != MBB) 5550b57cec5SDimitry Andric break; 5560b57cec5SDimitry Andric --CurRegion; 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric auto I = MBB->begin(); 5590b57cec5SDimitry Andric auto LiveInIt = MBBLiveIns.find(MBB); 56081ad6265SDimitry Andric auto &Rgn = Regions[CurRegion]; 56181ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 5620b57cec5SDimitry Andric if (LiveInIt != MBBLiveIns.end()) { 5630b57cec5SDimitry Andric auto LiveIn = std::move(LiveInIt->second); 5640b57cec5SDimitry Andric RPTracker.reset(*MBB->begin(), &LiveIn); 5650b57cec5SDimitry Andric MBBLiveIns.erase(LiveInIt); 5660b57cec5SDimitry Andric } else { 5670b57cec5SDimitry Andric I = Rgn.first; 5680b57cec5SDimitry Andric auto LRS = BBLiveInMap.lookup(NonDbgMI); 569fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS 5700b57cec5SDimitry Andric assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 571fe6060f1SDimitry Andric #endif 5720b57cec5SDimitry Andric RPTracker.reset(*I, &LRS); 5730b57cec5SDimitry Andric } 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric for (;;) { 5760b57cec5SDimitry Andric I = RPTracker.getNext(); 5770b57cec5SDimitry Andric 57881ad6265SDimitry Andric if (Regions[CurRegion].first == I || NonDbgMI == I) { 5790b57cec5SDimitry Andric LiveIns[CurRegion] = RPTracker.getLiveRegs(); 5800b57cec5SDimitry Andric RPTracker.clearMaxPressure(); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric if (Regions[CurRegion].second == I) { 5840b57cec5SDimitry Andric Pressure[CurRegion] = RPTracker.moveMaxPressure(); 5850b57cec5SDimitry Andric if (CurRegion-- == RegionIdx) 5860b57cec5SDimitry Andric break; 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric RPTracker.advanceToNext(); 5890b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric if (OnlySucc) { 5930b57cec5SDimitry Andric if (I != MBB->end()) { 5940b57cec5SDimitry Andric RPTracker.advanceToNext(); 5950b57cec5SDimitry Andric RPTracker.advance(MBB->end()); 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 5980b57cec5SDimitry Andric MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 6030b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const { 6040b57cec5SDimitry Andric assert(!Regions.empty()); 6050b57cec5SDimitry Andric std::vector<MachineInstr *> BBStarters; 6060b57cec5SDimitry Andric BBStarters.reserve(Regions.size()); 6070b57cec5SDimitry Andric auto I = Regions.rbegin(), E = Regions.rend(); 6080b57cec5SDimitry Andric auto *BB = I->first->getParent(); 6090b57cec5SDimitry Andric do { 6100b57cec5SDimitry Andric auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 6110b57cec5SDimitry Andric BBStarters.push_back(MI); 6120b57cec5SDimitry Andric do { 6130b57cec5SDimitry Andric ++I; 6140b57cec5SDimitry Andric } while (I != E && I->first->getParent() == BB); 6150b57cec5SDimitry Andric } while (I != E); 6160b57cec5SDimitry Andric return getLiveRegMap(BBStarters, false /*After*/, *LIS); 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() { 620972a253aSDimitry Andric // Start actual scheduling here. This function is called by the base 621972a253aSDimitry Andric // MachineScheduler after all regions have been recorded by 622972a253aSDimitry Andric // GCNScheduleDAGMILive::schedule(). 6230b57cec5SDimitry Andric LiveIns.resize(Regions.size()); 6240b57cec5SDimitry Andric Pressure.resize(Regions.size()); 6255ffd83dbSDimitry Andric RescheduleRegions.resize(Regions.size()); 626fe6060f1SDimitry Andric RegionsWithHighRP.resize(Regions.size()); 627bdd1243dSDimitry Andric RegionsWithExcessRP.resize(Regions.size()); 62881ad6265SDimitry Andric RegionsWithMinOcc.resize(Regions.size()); 629bdd1243dSDimitry Andric RegionsWithIGLPInstrs.resize(Regions.size()); 6305ffd83dbSDimitry Andric RescheduleRegions.set(); 631fe6060f1SDimitry Andric RegionsWithHighRP.reset(); 632bdd1243dSDimitry Andric RegionsWithExcessRP.reset(); 63381ad6265SDimitry Andric RegionsWithMinOcc.reset(); 634bdd1243dSDimitry Andric RegionsWithIGLPInstrs.reset(); 6350b57cec5SDimitry Andric 636972a253aSDimitry Andric runSchedStages(); 637972a253aSDimitry Andric } 638972a253aSDimitry Andric 639972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() { 640972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 641972a253aSDimitry Andric 6420b57cec5SDimitry Andric if (!Regions.empty()) 6430b57cec5SDimitry Andric BBLiveInMap = getBBLiveInMap(); 6440b57cec5SDimitry Andric 645bdd1243dSDimitry Andric GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); 646bdd1243dSDimitry Andric while (S.advanceStage()) { 647bdd1243dSDimitry Andric auto Stage = createSchedStage(S.getCurrentStage()); 648972a253aSDimitry Andric if (!Stage->initGCNSchedStage()) 6495ffd83dbSDimitry Andric continue; 650972a253aSDimitry Andric 651972a253aSDimitry Andric for (auto Region : Regions) { 652972a253aSDimitry Andric RegionBegin = Region.first; 653972a253aSDimitry Andric RegionEnd = Region.second; 654972a253aSDimitry Andric // Setup for scheduling the region and check whether it should be skipped. 655972a253aSDimitry Andric if (!Stage->initGCNRegion()) { 656972a253aSDimitry Andric Stage->advanceRegion(); 657972a253aSDimitry Andric exitRegion(); 658972a253aSDimitry Andric continue; 6595ffd83dbSDimitry Andric } 6605ffd83dbSDimitry Andric 661972a253aSDimitry Andric ScheduleDAGMILive::schedule(); 662972a253aSDimitry Andric Stage->finalizeGCNRegion(); 663972a253aSDimitry Andric } 664972a253aSDimitry Andric 665972a253aSDimitry Andric Stage->finalizeGCNSchedStage(); 666972a253aSDimitry Andric } 667972a253aSDimitry Andric } 668972a253aSDimitry Andric 669972a253aSDimitry Andric #ifndef NDEBUG 670972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { 671972a253aSDimitry Andric switch (StageID) { 672bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule: 673bdd1243dSDimitry Andric OS << "Max Occupancy Initial Schedule"; 6740b57cec5SDimitry Andric break; 675bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule: 676bdd1243dSDimitry Andric OS << "Unclustered High Register Pressure Reschedule"; 677972a253aSDimitry Andric break; 678972a253aSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 679972a253aSDimitry Andric OS << "Clustered Low Occupancy Reschedule"; 680972a253aSDimitry Andric break; 681972a253aSDimitry Andric case GCNSchedStageID::PreRARematerialize: 682972a253aSDimitry Andric OS << "Pre-RA Rematerialize"; 683972a253aSDimitry Andric break; 684bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule: 685bdd1243dSDimitry Andric OS << "Max ILP Initial Schedule"; 686bdd1243dSDimitry Andric break; 687972a253aSDimitry Andric } 688bdd1243dSDimitry Andric 689972a253aSDimitry Andric return OS; 690972a253aSDimitry Andric } 691972a253aSDimitry Andric #endif 692972a253aSDimitry Andric 693972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 694bdd1243dSDimitry Andric : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF), 695bdd1243dSDimitry Andric MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {} 696972a253aSDimitry Andric 697972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() { 698972a253aSDimitry Andric if (!DAG.LIS) 699972a253aSDimitry Andric return false; 700972a253aSDimitry Andric 701972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n"); 702972a253aSDimitry Andric return true; 703972a253aSDimitry Andric } 704972a253aSDimitry Andric 705bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNSchedStage() { 706bdd1243dSDimitry Andric if (DisableUnclusterHighRP) 707bdd1243dSDimitry Andric return false; 708bdd1243dSDimitry Andric 709972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 710972a253aSDimitry Andric return false; 711972a253aSDimitry Andric 712bdd1243dSDimitry Andric if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none()) 713972a253aSDimitry Andric return false; 714972a253aSDimitry Andric 715972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 716*5f757f3fSDimitry Andric DAG.addMutation(createIGroupLPDAGMutation(/*IsPostRA=*/false)); 717972a253aSDimitry Andric 718bdd1243dSDimitry Andric InitialOccupancy = DAG.MinOccupancy; 719bdd1243dSDimitry Andric // Aggressivly try to reduce register pressure in the unclustered high RP 720bdd1243dSDimitry Andric // stage. Temporarily increase occupancy target in the region. 721bdd1243dSDimitry Andric S.SGPRLimitBias = S.HighRPSGPRBias; 722bdd1243dSDimitry Andric S.VGPRLimitBias = S.HighRPVGPRBias; 723bdd1243dSDimitry Andric if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) 724bdd1243dSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 725bdd1243dSDimitry Andric 726bdd1243dSDimitry Andric LLVM_DEBUG( 727bdd1243dSDimitry Andric dbgs() 728bdd1243dSDimitry Andric << "Retrying function scheduling without clustering. " 729bdd1243dSDimitry Andric "Aggressivly try to reduce register pressure to achieve occupancy " 730bdd1243dSDimitry Andric << DAG.MinOccupancy << ".\n"); 731bdd1243dSDimitry Andric 732972a253aSDimitry Andric return true; 733972a253aSDimitry Andric } 734972a253aSDimitry Andric 735972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() { 736*5f757f3fSDimitry Andric if (DisableClusteredLowOccupancy) 737*5f757f3fSDimitry Andric return false; 738*5f757f3fSDimitry Andric 739972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 740972a253aSDimitry Andric return false; 741972a253aSDimitry Andric 742972a253aSDimitry Andric // Don't bother trying to improve ILP in lower RP regions if occupancy has not 743972a253aSDimitry Andric // been dropped. All regions will have already been scheduled with the ideal 744972a253aSDimitry Andric // occupancy targets. 745972a253aSDimitry Andric if (DAG.StartingOccupancy <= DAG.MinOccupancy) 746972a253aSDimitry Andric return false; 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric LLVM_DEBUG( 749972a253aSDimitry Andric dbgs() << "Retrying function scheduling with lowest recorded occupancy " 750972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 751972a253aSDimitry Andric return true; 7520b57cec5SDimitry Andric } 75381ad6265SDimitry Andric 754972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() { 755972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 756972a253aSDimitry Andric return false; 75781ad6265SDimitry Andric 758972a253aSDimitry Andric if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1) 759972a253aSDimitry Andric return false; 760972a253aSDimitry Andric 76181ad6265SDimitry Andric const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 76281ad6265SDimitry Andric // Check maximum occupancy 76381ad6265SDimitry Andric if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) == 764972a253aSDimitry Andric DAG.MinOccupancy) 765972a253aSDimitry Andric return false; 76681ad6265SDimitry Andric 76781ad6265SDimitry Andric // FIXME: This pass will invalidate cached MBBLiveIns for regions 76881ad6265SDimitry Andric // inbetween the defs and region we sinked the def to. Cached pressure 76981ad6265SDimitry Andric // for regions where a def is sinked from will also be invalidated. Will 77081ad6265SDimitry Andric // need to be fixed if there is another pass after this pass. 771bdd1243dSDimitry Andric assert(!S.hasNextStage()); 77281ad6265SDimitry Andric 77381ad6265SDimitry Andric collectRematerializableInstructions(); 77481ad6265SDimitry Andric if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII)) 775972a253aSDimitry Andric return false; 77681ad6265SDimitry Andric 77781ad6265SDimitry Andric LLVM_DEBUG( 77881ad6265SDimitry Andric dbgs() << "Retrying function scheduling with improved occupancy of " 779972a253aSDimitry Andric << DAG.MinOccupancy << " from rematerializing\n"); 780972a253aSDimitry Andric return true; 7815ffd83dbSDimitry Andric } 7825ffd83dbSDimitry Andric 783972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() { 784972a253aSDimitry Andric DAG.finishBlock(); 785972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n"); 786e8d8bef9SDimitry Andric } 7875ffd83dbSDimitry Andric 788bdd1243dSDimitry Andric void UnclusteredHighRPStage::finalizeGCNSchedStage() { 789972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 790bdd1243dSDimitry Andric S.SGPRLimitBias = S.VGPRLimitBias = 0; 791bdd1243dSDimitry Andric if (DAG.MinOccupancy > InitialOccupancy) { 792bdd1243dSDimitry Andric for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX) 793bdd1243dSDimitry Andric DAG.RegionsWithMinOcc[IDX] = 794bdd1243dSDimitry Andric DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy; 795bdd1243dSDimitry Andric 796bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << StageID 797bdd1243dSDimitry Andric << " stage successfully increased occupancy to " 798bdd1243dSDimitry Andric << DAG.MinOccupancy << '\n'); 799bdd1243dSDimitry Andric } 8000b57cec5SDimitry Andric 801972a253aSDimitry Andric GCNSchedStage::finalizeGCNSchedStage(); 8020b57cec5SDimitry Andric } 8030b57cec5SDimitry Andric 804972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() { 805972a253aSDimitry Andric // Check whether this new region is also a new block. 806972a253aSDimitry Andric if (DAG.RegionBegin->getParent() != CurrentMBB) 807972a253aSDimitry Andric setupNewBlock(); 808972a253aSDimitry Andric 809972a253aSDimitry Andric unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end()); 810972a253aSDimitry Andric DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs); 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric // Skip empty scheduling regions (0 or 1 schedulable instructions). 813972a253aSDimitry Andric if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) 814972a253aSDimitry Andric return false; 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 817972a253aSDimitry Andric LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB) 818972a253aSDimitry Andric << " " << CurrentMBB->getName() 819972a253aSDimitry Andric << "\n From: " << *DAG.begin() << " To: "; 820972a253aSDimitry Andric if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; 8210b57cec5SDimitry Andric else dbgs() << "End"; 8220b57cec5SDimitry Andric dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 8230b57cec5SDimitry Andric 824972a253aSDimitry Andric // Save original instruction order before scheduling for possible revert. 825972a253aSDimitry Andric Unsched.clear(); 826972a253aSDimitry Andric Unsched.reserve(DAG.NumRegionInstrs); 827bdd1243dSDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule || 828bdd1243dSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) { 829bdd1243dSDimitry Andric for (auto &I : DAG) { 830bdd1243dSDimitry Andric Unsched.push_back(&I); 831bdd1243dSDimitry Andric if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER || 832bdd1243dSDimitry Andric I.getOpcode() == AMDGPU::IGLP_OPT) 833bdd1243dSDimitry Andric DAG.RegionsWithIGLPInstrs[RegionIdx] = true; 834bdd1243dSDimitry Andric } 835bdd1243dSDimitry Andric } else { 836972a253aSDimitry Andric for (auto &I : DAG) 837972a253aSDimitry Andric Unsched.push_back(&I); 838bdd1243dSDimitry Andric } 8390b57cec5SDimitry Andric 840972a253aSDimitry Andric PressureBefore = DAG.Pressure[RegionIdx]; 8410b57cec5SDimitry Andric 842972a253aSDimitry Andric LLVM_DEBUG( 843bdd1243dSDimitry Andric dbgs() << "Pressure before scheduling:\nRegion live-ins:" 844bdd1243dSDimitry Andric << print(DAG.LiveIns[RegionIdx], DAG.MRI) 845bdd1243dSDimitry Andric << "Region live-in pressure: " 846bdd1243dSDimitry Andric << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx])) 847bdd1243dSDimitry Andric << "Region register pressure: " << print(PressureBefore)); 848972a253aSDimitry Andric 849bdd1243dSDimitry Andric S.HasHighPressure = false; 850bdd1243dSDimitry Andric S.KnownExcessRP = isRegionWithExcessRP(); 851bdd1243dSDimitry Andric 852bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] && 853bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) { 854bdd1243dSDimitry Andric SavedMutations.clear(); 855bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations); 856*5f757f3fSDimitry Andric bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule || 857*5f757f3fSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule; 858*5f757f3fSDimitry Andric DAG.addMutation(createIGroupLPDAGMutation(/*IsReentry=*/!IsInitialStage)); 859bdd1243dSDimitry Andric } 860972a253aSDimitry Andric 861972a253aSDimitry Andric return true; 8620b57cec5SDimitry Andric } 86381ad6265SDimitry Andric 864bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNRegion() { 865bdd1243dSDimitry Andric // Only reschedule regions with the minimum occupancy or regions that may have 866bdd1243dSDimitry Andric // spilling (excess register pressure). 867bdd1243dSDimitry Andric if ((!DAG.RegionsWithMinOcc[RegionIdx] || 868bdd1243dSDimitry Andric DAG.MinOccupancy <= InitialOccupancy) && 869bdd1243dSDimitry Andric !DAG.RegionsWithExcessRP[RegionIdx]) 870972a253aSDimitry Andric return false; 871972a253aSDimitry Andric 872972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 873972a253aSDimitry Andric } 874972a253aSDimitry Andric 875972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() { 876bdd1243dSDimitry Andric // We may need to reschedule this region if it wasn't rescheduled in the last 877bdd1243dSDimitry Andric // stage, or if we found it was testing critical register pressure limits in 878bdd1243dSDimitry Andric // the unclustered reschedule stage. The later is because we may not have been 879bdd1243dSDimitry Andric // able to raise the min occupancy in the previous stage so the region may be 880bdd1243dSDimitry Andric // overly constrained even if it was already rescheduled. 881bdd1243dSDimitry Andric if (!DAG.RegionsWithHighRP[RegionIdx]) 882972a253aSDimitry Andric return false; 883972a253aSDimitry Andric 884972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 885972a253aSDimitry Andric } 886972a253aSDimitry Andric 887972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() { 888972a253aSDimitry Andric if (!DAG.RescheduleRegions[RegionIdx]) 889972a253aSDimitry Andric return false; 890972a253aSDimitry Andric 891972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 892972a253aSDimitry Andric } 893972a253aSDimitry Andric 894972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() { 895972a253aSDimitry Andric if (CurrentMBB) 896972a253aSDimitry Andric DAG.finishBlock(); 897972a253aSDimitry Andric 898972a253aSDimitry Andric CurrentMBB = DAG.RegionBegin->getParent(); 899972a253aSDimitry Andric DAG.startBlock(CurrentMBB); 900972a253aSDimitry Andric // Get real RP for the region if it hasn't be calculated before. After the 901972a253aSDimitry Andric // initial schedule stage real RP will be collected after scheduling. 90206c3fb27SDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule || 90306c3fb27SDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) 904972a253aSDimitry Andric DAG.computeBlockPressure(RegionIdx, CurrentMBB); 905972a253aSDimitry Andric } 906972a253aSDimitry Andric 907972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() { 908bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); 909972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = false; 910bdd1243dSDimitry Andric if (S.HasHighPressure) 911972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 912972a253aSDimitry Andric 913972a253aSDimitry Andric // Revert scheduling if we have dropped occupancy or there is some other 914972a253aSDimitry Andric // reason that the original schedule is better. 915972a253aSDimitry Andric checkScheduling(); 916972a253aSDimitry Andric 917bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] && 918bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) 919bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations); 920bdd1243dSDimitry Andric 921972a253aSDimitry Andric DAG.exitRegion(); 922972a253aSDimitry Andric RegionIdx++; 923972a253aSDimitry Andric } 924972a253aSDimitry Andric 925972a253aSDimitry Andric void GCNSchedStage::checkScheduling() { 926972a253aSDimitry Andric // Check the results of scheduling. 927972a253aSDimitry Andric PressureAfter = DAG.getRealRegPressure(RegionIdx); 928bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter)); 929bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n"); 930972a253aSDimitry Andric 931972a253aSDimitry Andric if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 932972a253aSDimitry Andric PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 933972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 934972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 935972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 936972a253aSDimitry Andric 937972a253aSDimitry Andric // Early out if we have achieve the occupancy target. 938972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 939972a253aSDimitry Andric return; 940972a253aSDimitry Andric } 941972a253aSDimitry Andric 942bdd1243dSDimitry Andric unsigned TargetOccupancy = 943bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF)); 944972a253aSDimitry Andric unsigned WavesAfter = 945bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureAfter.getOccupancy(ST)); 946972a253aSDimitry Andric unsigned WavesBefore = 947bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureBefore.getOccupancy(ST)); 948972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 949972a253aSDimitry Andric << ", after " << WavesAfter << ".\n"); 950972a253aSDimitry Andric 951972a253aSDimitry Andric // We may not be able to keep the current target occupancy because of the just 952972a253aSDimitry Andric // scheduled region. We might still be able to revert scheduling if the 953972a253aSDimitry Andric // occupancy before was higher, or if the current schedule has register 954972a253aSDimitry Andric // pressure higher than the excess limits which could lead to more spilling. 955972a253aSDimitry Andric unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 956972a253aSDimitry Andric 957972a253aSDimitry Andric // Allow memory bound functions to drop to 4 waves if not limited by an 958972a253aSDimitry Andric // attribute. 959972a253aSDimitry Andric if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy && 960972a253aSDimitry Andric WavesAfter >= MFI.getMinAllowedOccupancy()) { 961972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 962972a253aSDimitry Andric << MFI.getMinAllowedOccupancy() << " waves\n"); 963972a253aSDimitry Andric NewOccupancy = WavesAfter; 964972a253aSDimitry Andric } 965972a253aSDimitry Andric 966972a253aSDimitry Andric if (NewOccupancy < DAG.MinOccupancy) { 967972a253aSDimitry Andric DAG.MinOccupancy = NewOccupancy; 968972a253aSDimitry Andric MFI.limitOccupancy(DAG.MinOccupancy); 969972a253aSDimitry Andric DAG.RegionsWithMinOcc.reset(); 970972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 971972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 972972a253aSDimitry Andric } 973972a253aSDimitry Andric 974972a253aSDimitry Andric unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 975972a253aSDimitry Andric unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 976972a253aSDimitry Andric if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 977972a253aSDimitry Andric PressureAfter.getAGPRNum() > MaxVGPRs || 978972a253aSDimitry Andric PressureAfter.getSGPRNum() > MaxSGPRs) { 979972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = true; 980972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 981bdd1243dSDimitry Andric DAG.RegionsWithExcessRP[RegionIdx] = true; 982972a253aSDimitry Andric } 983972a253aSDimitry Andric 984972a253aSDimitry Andric // Revert if this region's schedule would cause a drop in occupancy or 985972a253aSDimitry Andric // spilling. 986972a253aSDimitry Andric if (shouldRevertScheduling(WavesAfter)) { 987972a253aSDimitry Andric revertScheduling(); 988972a253aSDimitry Andric } else { 989972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 990972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 991972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 992972a253aSDimitry Andric } 993972a253aSDimitry Andric } 994972a253aSDimitry Andric 995bdd1243dSDimitry Andric unsigned 996bdd1243dSDimitry Andric GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, 997bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> &ReadyCycles, 998bdd1243dSDimitry Andric const TargetSchedModel &SM) { 999bdd1243dSDimitry Andric unsigned ReadyCycle = CurrCycle; 1000bdd1243dSDimitry Andric for (auto &D : SU.Preds) { 1001bdd1243dSDimitry Andric if (D.isAssignedRegDep()) { 1002bdd1243dSDimitry Andric MachineInstr *DefMI = D.getSUnit()->getInstr(); 1003bdd1243dSDimitry Andric unsigned Latency = SM.computeInstrLatency(DefMI); 1004bdd1243dSDimitry Andric unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum]; 1005bdd1243dSDimitry Andric ReadyCycle = std::max(ReadyCycle, DefReady + Latency); 1006bdd1243dSDimitry Andric } 1007bdd1243dSDimitry Andric } 1008bdd1243dSDimitry Andric ReadyCycles[SU.NodeNum] = ReadyCycle; 1009bdd1243dSDimitry Andric return ReadyCycle; 1010bdd1243dSDimitry Andric } 1011bdd1243dSDimitry Andric 1012bdd1243dSDimitry Andric #ifndef NDEBUG 1013bdd1243dSDimitry Andric struct EarlierIssuingCycle { 1014bdd1243dSDimitry Andric bool operator()(std::pair<MachineInstr *, unsigned> A, 1015bdd1243dSDimitry Andric std::pair<MachineInstr *, unsigned> B) const { 1016bdd1243dSDimitry Andric return A.second < B.second; 1017bdd1243dSDimitry Andric } 1018bdd1243dSDimitry Andric }; 1019bdd1243dSDimitry Andric 1020bdd1243dSDimitry Andric static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>, 1021bdd1243dSDimitry Andric EarlierIssuingCycle> &ReadyCycles) { 1022bdd1243dSDimitry Andric if (ReadyCycles.empty()) 1023bdd1243dSDimitry Andric return; 1024bdd1243dSDimitry Andric unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber(); 1025bdd1243dSDimitry Andric dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum 1026bdd1243dSDimitry Andric << " ##################\n# Cycle #\t\t\tInstruction " 1027bdd1243dSDimitry Andric " " 1028bdd1243dSDimitry Andric " \n"; 1029bdd1243dSDimitry Andric unsigned IPrev = 1; 1030bdd1243dSDimitry Andric for (auto &I : ReadyCycles) { 1031bdd1243dSDimitry Andric if (I.second > IPrev + 1) 1032bdd1243dSDimitry Andric dbgs() << "****************************** BUBBLE OF " << I.second - IPrev 1033bdd1243dSDimitry Andric << " CYCLES DETECTED ******************************\n\n"; 1034bdd1243dSDimitry Andric dbgs() << "[ " << I.second << " ] : " << *I.first << "\n"; 1035bdd1243dSDimitry Andric IPrev = I.second; 1036bdd1243dSDimitry Andric } 1037bdd1243dSDimitry Andric } 1038bdd1243dSDimitry Andric #endif 1039bdd1243dSDimitry Andric 1040bdd1243dSDimitry Andric ScheduleMetrics 1041bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) { 1042bdd1243dSDimitry Andric #ifndef NDEBUG 1043bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle> 1044bdd1243dSDimitry Andric ReadyCyclesSorted; 1045bdd1243dSDimitry Andric #endif 1046bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); 1047bdd1243dSDimitry Andric unsigned SumBubbles = 0; 1048bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles; 1049bdd1243dSDimitry Andric unsigned CurrCycle = 0; 1050bdd1243dSDimitry Andric for (auto &SU : InputSchedule) { 1051bdd1243dSDimitry Andric unsigned ReadyCycle = 1052bdd1243dSDimitry Andric computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM); 1053bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle; 1054bdd1243dSDimitry Andric #ifndef NDEBUG 1055bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle)); 1056bdd1243dSDimitry Andric #endif 1057bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle; 1058bdd1243dSDimitry Andric } 1059bdd1243dSDimitry Andric #ifndef NDEBUG 1060bdd1243dSDimitry Andric LLVM_DEBUG( 1061bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted); 1062bdd1243dSDimitry Andric dbgs() << "\n\t" 1063bdd1243dSDimitry Andric << "Metric: " 1064bdd1243dSDimitry Andric << (SumBubbles 1065bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle 1066bdd1243dSDimitry Andric : 1) 1067bdd1243dSDimitry Andric << "\n\n"); 1068bdd1243dSDimitry Andric #endif 1069bdd1243dSDimitry Andric 1070bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles); 1071bdd1243dSDimitry Andric } 1072bdd1243dSDimitry Andric 1073bdd1243dSDimitry Andric ScheduleMetrics 1074bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) { 1075bdd1243dSDimitry Andric #ifndef NDEBUG 1076bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle> 1077bdd1243dSDimitry Andric ReadyCyclesSorted; 1078bdd1243dSDimitry Andric #endif 1079bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); 1080bdd1243dSDimitry Andric unsigned SumBubbles = 0; 1081bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles; 1082bdd1243dSDimitry Andric unsigned CurrCycle = 0; 1083bdd1243dSDimitry Andric for (auto &MI : DAG) { 1084bdd1243dSDimitry Andric SUnit *SU = DAG.getSUnit(&MI); 1085bdd1243dSDimitry Andric if (!SU) 1086bdd1243dSDimitry Andric continue; 1087bdd1243dSDimitry Andric unsigned ReadyCycle = 1088bdd1243dSDimitry Andric computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM); 1089bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle; 1090bdd1243dSDimitry Andric #ifndef NDEBUG 1091bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle)); 1092bdd1243dSDimitry Andric #endif 1093bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle; 1094bdd1243dSDimitry Andric } 1095bdd1243dSDimitry Andric #ifndef NDEBUG 1096bdd1243dSDimitry Andric LLVM_DEBUG( 1097bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted); 1098bdd1243dSDimitry Andric dbgs() << "\n\t" 1099bdd1243dSDimitry Andric << "Metric: " 1100bdd1243dSDimitry Andric << (SumBubbles 1101bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle 1102bdd1243dSDimitry Andric : 1) 1103bdd1243dSDimitry Andric << "\n\n"); 1104bdd1243dSDimitry Andric #endif 1105bdd1243dSDimitry Andric 1106bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles); 1107bdd1243dSDimitry Andric } 1108bdd1243dSDimitry Andric 1109972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) { 1110972a253aSDimitry Andric if (WavesAfter < DAG.MinOccupancy) 1111972a253aSDimitry Andric return true; 1112972a253aSDimitry Andric 1113972a253aSDimitry Andric return false; 1114972a253aSDimitry Andric } 1115972a253aSDimitry Andric 1116bdd1243dSDimitry Andric bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 1117bdd1243dSDimitry Andric if (PressureAfter == PressureBefore) 1118bdd1243dSDimitry Andric return false; 1119bdd1243dSDimitry Andric 1120972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1121972a253aSDimitry Andric return true; 1122972a253aSDimitry Andric 1123972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1124972a253aSDimitry Andric return true; 1125972a253aSDimitry Andric 1126972a253aSDimitry Andric return false; 1127972a253aSDimitry Andric } 1128972a253aSDimitry Andric 1129bdd1243dSDimitry Andric bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) { 1130*5f757f3fSDimitry Andric // If RP is not reduced in the unclustered reschedule stage, revert to the 1131bdd1243dSDimitry Andric // old schedule. 1132bdd1243dSDimitry Andric if ((WavesAfter <= PressureBefore.getOccupancy(ST) && 1133bdd1243dSDimitry Andric mayCauseSpilling(WavesAfter)) || 1134bdd1243dSDimitry Andric GCNSchedStage::shouldRevertScheduling(WavesAfter)) { 1135972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 1136972a253aSDimitry Andric return true; 1137972a253aSDimitry Andric } 1138972a253aSDimitry Andric 113906c3fb27SDimitry Andric // Do not attempt to relax schedule even more if we are already spilling. 114006c3fb27SDimitry Andric if (isRegionWithExcessRP()) 114106c3fb27SDimitry Andric return false; 114206c3fb27SDimitry Andric 1143bdd1243dSDimitry Andric LLVM_DEBUG( 1144bdd1243dSDimitry Andric dbgs() 1145bdd1243dSDimitry Andric << "\n\t *** In shouldRevertScheduling ***\n" 1146bdd1243dSDimitry Andric << " *********** BEFORE UnclusteredHighRPStage ***********\n"); 1147bdd1243dSDimitry Andric ScheduleMetrics MBefore = 1148bdd1243dSDimitry Andric getScheduleMetrics(DAG.SUnits); 1149bdd1243dSDimitry Andric LLVM_DEBUG( 1150bdd1243dSDimitry Andric dbgs() 1151bdd1243dSDimitry Andric << "\n *********** AFTER UnclusteredHighRPStage ***********\n"); 1152bdd1243dSDimitry Andric ScheduleMetrics MAfter = getScheduleMetrics(DAG); 1153bdd1243dSDimitry Andric unsigned OldMetric = MBefore.getMetric(); 1154bdd1243dSDimitry Andric unsigned NewMetric = MAfter.getMetric(); 1155bdd1243dSDimitry Andric unsigned WavesBefore = 1156bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST)); 1157bdd1243dSDimitry Andric unsigned Profit = 1158bdd1243dSDimitry Andric ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore * 1159bdd1243dSDimitry Andric ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) / 1160bdd1243dSDimitry Andric NewMetric) / 1161bdd1243dSDimitry Andric ScheduleMetrics::ScaleFactor; 1162bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after " 1163bdd1243dSDimitry Andric << MAfter << "Profit: " << Profit << "\n"); 1164bdd1243dSDimitry Andric return Profit < ScheduleMetrics::ScaleFactor; 1165972a253aSDimitry Andric } 1166972a253aSDimitry Andric 1167972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) { 1168bdd1243dSDimitry Andric if (PressureAfter == PressureBefore) 1169bdd1243dSDimitry Andric return false; 1170bdd1243dSDimitry Andric 1171972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1172972a253aSDimitry Andric return true; 1173972a253aSDimitry Andric 1174972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1175972a253aSDimitry Andric return true; 1176972a253aSDimitry Andric 1177972a253aSDimitry Andric return false; 1178972a253aSDimitry Andric } 1179972a253aSDimitry Andric 1180972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) { 1181972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1182972a253aSDimitry Andric return true; 1183972a253aSDimitry Andric 1184972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1185972a253aSDimitry Andric return true; 1186972a253aSDimitry Andric 1187972a253aSDimitry Andric return false; 1188972a253aSDimitry Andric } 1189972a253aSDimitry Andric 1190bdd1243dSDimitry Andric bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 1191bdd1243dSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1192bdd1243dSDimitry Andric return true; 1193bdd1243dSDimitry Andric 1194bdd1243dSDimitry Andric return false; 1195bdd1243dSDimitry Andric } 1196bdd1243dSDimitry Andric 1197972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { 1198972a253aSDimitry Andric if (WavesAfter <= MFI.getMinWavesPerEU() && 1199972a253aSDimitry Andric !PressureAfter.less(ST, PressureBefore) && 1200bdd1243dSDimitry Andric isRegionWithExcessRP()) { 1201972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 1202972a253aSDimitry Andric return true; 1203972a253aSDimitry Andric } 1204972a253aSDimitry Andric 1205972a253aSDimitry Andric return false; 1206972a253aSDimitry Andric } 1207972a253aSDimitry Andric 1208972a253aSDimitry Andric void GCNSchedStage::revertScheduling() { 1209972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 1210972a253aSDimitry Andric PressureBefore.getOccupancy(ST) == DAG.MinOccupancy; 1211972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 1212972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = 1213bdd1243dSDimitry Andric S.hasNextStage() && 1214bdd1243dSDimitry Andric S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule; 1215972a253aSDimitry Andric DAG.RegionEnd = DAG.RegionBegin; 1216972a253aSDimitry Andric int SkippedDebugInstr = 0; 1217972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 1218972a253aSDimitry Andric if (MI->isDebugInstr()) { 1219972a253aSDimitry Andric ++SkippedDebugInstr; 1220972a253aSDimitry Andric continue; 1221972a253aSDimitry Andric } 1222972a253aSDimitry Andric 1223972a253aSDimitry Andric if (MI->getIterator() != DAG.RegionEnd) { 1224972a253aSDimitry Andric DAG.BB->remove(MI); 1225972a253aSDimitry Andric DAG.BB->insert(DAG.RegionEnd, MI); 1226972a253aSDimitry Andric if (!MI->isDebugInstr()) 1227972a253aSDimitry Andric DAG.LIS->handleMove(*MI, true); 1228972a253aSDimitry Andric } 1229972a253aSDimitry Andric 1230972a253aSDimitry Andric // Reset read-undef flags and update them later. 123106c3fb27SDimitry Andric for (auto &Op : MI->all_defs()) 1232972a253aSDimitry Andric Op.setIsUndef(false); 1233972a253aSDimitry Andric RegisterOperands RegOpers; 1234972a253aSDimitry Andric RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false); 1235972a253aSDimitry Andric if (!MI->isDebugInstr()) { 1236972a253aSDimitry Andric if (DAG.ShouldTrackLaneMasks) { 1237972a253aSDimitry Andric // Adjust liveness and add missing dead+read-undef flags. 1238972a253aSDimitry Andric SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot(); 1239972a253aSDimitry Andric RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI); 1240972a253aSDimitry Andric } else { 1241972a253aSDimitry Andric // Adjust for missing dead-def flags. 1242972a253aSDimitry Andric RegOpers.detectDeadDefs(*MI, *DAG.LIS); 1243972a253aSDimitry Andric } 1244972a253aSDimitry Andric } 1245972a253aSDimitry Andric DAG.RegionEnd = MI->getIterator(); 1246972a253aSDimitry Andric ++DAG.RegionEnd; 1247972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 1248972a253aSDimitry Andric } 1249972a253aSDimitry Andric 1250972a253aSDimitry Andric // After reverting schedule, debug instrs will now be at the end of the block 1251972a253aSDimitry Andric // and RegionEnd will point to the first debug instr. Increment RegionEnd 1252972a253aSDimitry Andric // pass debug instrs to the actual end of the scheduling region. 1253972a253aSDimitry Andric while (SkippedDebugInstr-- > 0) 1254972a253aSDimitry Andric ++DAG.RegionEnd; 1255972a253aSDimitry Andric 1256972a253aSDimitry Andric // If Unsched.front() instruction is a debug instruction, this will actually 1257972a253aSDimitry Andric // shrink the region since we moved all debug instructions to the end of the 1258972a253aSDimitry Andric // block. Find the first instruction that is not a debug instruction. 1259972a253aSDimitry Andric DAG.RegionBegin = Unsched.front()->getIterator(); 1260972a253aSDimitry Andric if (DAG.RegionBegin->isDebugInstr()) { 1261972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 1262972a253aSDimitry Andric if (MI->isDebugInstr()) 1263972a253aSDimitry Andric continue; 1264972a253aSDimitry Andric DAG.RegionBegin = MI->getIterator(); 1265972a253aSDimitry Andric break; 1266972a253aSDimitry Andric } 1267972a253aSDimitry Andric } 1268972a253aSDimitry Andric 1269972a253aSDimitry Andric // Then move the debug instructions back into their correct place and set 1270972a253aSDimitry Andric // RegionBegin and RegionEnd if needed. 1271972a253aSDimitry Andric DAG.placeDebugValues(); 1272972a253aSDimitry Andric 1273bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); 1274972a253aSDimitry Andric } 1275972a253aSDimitry Andric 1276972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() { 1277972a253aSDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI); 1278972a253aSDimitry Andric for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) { 127981ad6265SDimitry Andric Register Reg = Register::index2VirtReg(I); 1280972a253aSDimitry Andric if (!DAG.LIS->hasInterval(Reg)) 128181ad6265SDimitry Andric continue; 128281ad6265SDimitry Andric 128381ad6265SDimitry Andric // TODO: Handle AGPR and SGPR rematerialization 1284972a253aSDimitry Andric if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) || 1285972a253aSDimitry Andric !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg)) 128681ad6265SDimitry Andric continue; 128781ad6265SDimitry Andric 1288972a253aSDimitry Andric MachineOperand *Op = DAG.MRI.getOneDef(Reg); 128981ad6265SDimitry Andric MachineInstr *Def = Op->getParent(); 1290fcaf7f86SDimitry Andric if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) 129181ad6265SDimitry Andric continue; 129281ad6265SDimitry Andric 1293972a253aSDimitry Andric MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg); 129481ad6265SDimitry Andric if (Def->getParent() == UseI->getParent()) 129581ad6265SDimitry Andric continue; 129681ad6265SDimitry Andric 129781ad6265SDimitry Andric // We are only collecting defs that are defined in another block and are 129881ad6265SDimitry Andric // live-through or used inside regions at MinOccupancy. This means that the 129981ad6265SDimitry Andric // register must be in the live-in set for the region. 130081ad6265SDimitry Andric bool AddedToRematList = false; 1301972a253aSDimitry Andric for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { 1302972a253aSDimitry Andric auto It = DAG.LiveIns[I].find(Reg); 1303972a253aSDimitry Andric if (It != DAG.LiveIns[I].end() && !It->second.none()) { 1304972a253aSDimitry Andric if (DAG.RegionsWithMinOcc[I]) { 130581ad6265SDimitry Andric RematerializableInsts[I][Def] = UseI; 130681ad6265SDimitry Andric AddedToRematList = true; 130781ad6265SDimitry Andric } 130881ad6265SDimitry Andric 130981ad6265SDimitry Andric // Collect regions with rematerializable reg as live-in to avoid 131081ad6265SDimitry Andric // searching later when updating RP. 131181ad6265SDimitry Andric RematDefToLiveInRegions[Def].push_back(I); 131281ad6265SDimitry Andric } 131381ad6265SDimitry Andric } 131481ad6265SDimitry Andric if (!AddedToRematList) 131581ad6265SDimitry Andric RematDefToLiveInRegions.erase(Def); 131681ad6265SDimitry Andric } 131781ad6265SDimitry Andric } 131881ad6265SDimitry Andric 1319972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, 132081ad6265SDimitry Andric const TargetInstrInfo *TII) { 132181ad6265SDimitry Andric // Temporary copies of cached variables we will be modifying and replacing if 132281ad6265SDimitry Andric // sinking succeeds. 132381ad6265SDimitry Andric SmallVector< 132481ad6265SDimitry Andric std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32> 132581ad6265SDimitry Andric NewRegions; 132681ad6265SDimitry Andric DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns; 132781ad6265SDimitry Andric DenseMap<unsigned, GCNRegPressure> NewPressure; 132881ad6265SDimitry Andric BitVector NewRescheduleRegions; 1329972a253aSDimitry Andric LiveIntervals *LIS = DAG.LIS; 133081ad6265SDimitry Andric 1331972a253aSDimitry Andric NewRegions.resize(DAG.Regions.size()); 1332972a253aSDimitry Andric NewRescheduleRegions.resize(DAG.Regions.size()); 133381ad6265SDimitry Andric 133481ad6265SDimitry Andric // Collect only regions that has a rematerializable def as a live-in. 133581ad6265SDimitry Andric SmallSet<unsigned, 16> ImpactedRegions; 133681ad6265SDimitry Andric for (const auto &It : RematDefToLiveInRegions) 133781ad6265SDimitry Andric ImpactedRegions.insert(It.second.begin(), It.second.end()); 133881ad6265SDimitry Andric 133981ad6265SDimitry Andric // Make copies of register pressure and live-ins cache that will be updated 134081ad6265SDimitry Andric // as we rematerialize. 134181ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1342972a253aSDimitry Andric NewPressure[Idx] = DAG.Pressure[Idx]; 1343972a253aSDimitry Andric NewLiveIns[Idx] = DAG.LiveIns[Idx]; 134481ad6265SDimitry Andric } 1345972a253aSDimitry Andric NewRegions = DAG.Regions; 134681ad6265SDimitry Andric NewRescheduleRegions.reset(); 134781ad6265SDimitry Andric 134881ad6265SDimitry Andric DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef; 134981ad6265SDimitry Andric bool Improved = false; 135081ad6265SDimitry Andric for (auto I : ImpactedRegions) { 1351972a253aSDimitry Andric if (!DAG.RegionsWithMinOcc[I]) 135281ad6265SDimitry Andric continue; 135381ad6265SDimitry Andric 135481ad6265SDimitry Andric Improved = false; 135581ad6265SDimitry Andric int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts()); 135681ad6265SDimitry Andric int SGPRUsage = NewPressure[I].getSGPRNum(); 135781ad6265SDimitry Andric 135881ad6265SDimitry Andric // TODO: Handle occupancy drop due to AGPR and SGPR. 135981ad6265SDimitry Andric // Check if cause of occupancy drop is due to VGPR usage and not SGPR. 1360972a253aSDimitry Andric if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy) 136181ad6265SDimitry Andric break; 136281ad6265SDimitry Andric 136381ad6265SDimitry Andric // The occupancy of this region could have been improved by a previous 136481ad6265SDimitry Andric // iteration's sinking of defs. 1365972a253aSDimitry Andric if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) { 136681ad6265SDimitry Andric NewRescheduleRegions[I] = true; 136781ad6265SDimitry Andric Improved = true; 136881ad6265SDimitry Andric continue; 136981ad6265SDimitry Andric } 137081ad6265SDimitry Andric 137181ad6265SDimitry Andric // First check if we have enough trivially rematerializable instructions to 137281ad6265SDimitry Andric // improve occupancy. Optimistically assume all instructions we are able to 137381ad6265SDimitry Andric // sink decreased RP. 137481ad6265SDimitry Andric int TotalSinkableRegs = 0; 137581ad6265SDimitry Andric for (const auto &It : RematerializableInsts[I]) { 137681ad6265SDimitry Andric MachineInstr *Def = It.first; 137781ad6265SDimitry Andric Register DefReg = Def->getOperand(0).getReg(); 137881ad6265SDimitry Andric TotalSinkableRegs += 137981ad6265SDimitry Andric SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]); 138081ad6265SDimitry Andric } 138181ad6265SDimitry Andric int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; 138281ad6265SDimitry Andric unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink); 138381ad6265SDimitry Andric // If in the most optimistic scenario, we cannot improve occupancy, then do 138481ad6265SDimitry Andric // not attempt to sink any instructions. 1385972a253aSDimitry Andric if (OptimisticOccupancy <= DAG.MinOccupancy) 138681ad6265SDimitry Andric break; 138781ad6265SDimitry Andric 138881ad6265SDimitry Andric unsigned ImproveOccupancy = 0; 138981ad6265SDimitry Andric SmallVector<MachineInstr *, 4> SinkedDefs; 139081ad6265SDimitry Andric for (auto &It : RematerializableInsts[I]) { 139181ad6265SDimitry Andric MachineInstr *Def = It.first; 139281ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos = 139381ad6265SDimitry Andric MachineBasicBlock::iterator(It.second); 139481ad6265SDimitry Andric Register Reg = Def->getOperand(0).getReg(); 139581ad6265SDimitry Andric // Rematerialize MI to its use block. Since we are only rematerializing 139681ad6265SDimitry Andric // instructions that do not have any virtual reg uses, we do not need to 139781ad6265SDimitry Andric // call LiveRangeEdit::allUsesAvailableAt() and 139881ad6265SDimitry Andric // LiveRangeEdit::canRematerializeAt(). 139981ad6265SDimitry Andric TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, 1400972a253aSDimitry Andric Def->getOperand(0).getSubReg(), *Def, *DAG.TRI); 1401bdd1243dSDimitry Andric MachineInstr *NewMI = &*std::prev(InsertPos); 140281ad6265SDimitry Andric LIS->InsertMachineInstrInMaps(*NewMI); 140381ad6265SDimitry Andric LIS->removeInterval(Reg); 140481ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 140581ad6265SDimitry Andric InsertedMIToOldDef[NewMI] = Def; 140681ad6265SDimitry Andric 140781ad6265SDimitry Andric // Update region boundaries in scheduling region we sinked from since we 140881ad6265SDimitry Andric // may sink an instruction that was at the beginning or end of its region 1409972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr, 141081ad6265SDimitry Andric /*Removing =*/true); 141181ad6265SDimitry Andric 141281ad6265SDimitry Andric // Update region boundaries in region we sinked to. 1413972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI); 141481ad6265SDimitry Andric 141581ad6265SDimitry Andric LaneBitmask PrevMask = NewLiveIns[I][Reg]; 141681ad6265SDimitry Andric // FIXME: Also update cached pressure for where the def was sinked from. 141781ad6265SDimitry Andric // Update RP for all regions that has this reg as a live-in and remove 141881ad6265SDimitry Andric // the reg from all regions as a live-in. 141981ad6265SDimitry Andric for (auto Idx : RematDefToLiveInRegions[Def]) { 142081ad6265SDimitry Andric NewLiveIns[Idx].erase(Reg); 1421972a253aSDimitry Andric if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) { 142281ad6265SDimitry Andric // Def is live-through and not used in this block. 1423972a253aSDimitry Andric NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI); 142481ad6265SDimitry Andric } else { 142581ad6265SDimitry Andric // Def is used and rematerialized into this block. 142681ad6265SDimitry Andric GCNDownwardRPTracker RPT(*LIS); 142781ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward( 142881ad6265SDimitry Andric NewRegions[Idx].first, NewRegions[Idx].second); 142981ad6265SDimitry Andric RPT.reset(*NonDbgMI, &NewLiveIns[Idx]); 143081ad6265SDimitry Andric RPT.advance(NewRegions[Idx].second); 143181ad6265SDimitry Andric NewPressure[Idx] = RPT.moveMaxPressure(); 143281ad6265SDimitry Andric } 143381ad6265SDimitry Andric } 143481ad6265SDimitry Andric 143581ad6265SDimitry Andric SinkedDefs.push_back(Def); 143681ad6265SDimitry Andric ImproveOccupancy = NewPressure[I].getOccupancy(ST); 1437972a253aSDimitry Andric if (ImproveOccupancy > DAG.MinOccupancy) 143881ad6265SDimitry Andric break; 143981ad6265SDimitry Andric } 144081ad6265SDimitry Andric 144181ad6265SDimitry Andric // Remove defs we just sinked from all regions' list of sinkable defs 144281ad6265SDimitry Andric for (auto &Def : SinkedDefs) 144381ad6265SDimitry Andric for (auto TrackedIdx : RematDefToLiveInRegions[Def]) 144481ad6265SDimitry Andric RematerializableInsts[TrackedIdx].erase(Def); 144581ad6265SDimitry Andric 1446972a253aSDimitry Andric if (ImproveOccupancy <= DAG.MinOccupancy) 144781ad6265SDimitry Andric break; 144881ad6265SDimitry Andric 144981ad6265SDimitry Andric NewRescheduleRegions[I] = true; 145081ad6265SDimitry Andric Improved = true; 145181ad6265SDimitry Andric } 145281ad6265SDimitry Andric 145381ad6265SDimitry Andric if (!Improved) { 145481ad6265SDimitry Andric // Occupancy was not improved for all regions that were at MinOccupancy. 145581ad6265SDimitry Andric // Undo sinking and remove newly rematerialized instructions. 145681ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 145781ad6265SDimitry Andric MachineInstr *MI = Entry.first; 145881ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 145981ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 146081ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI); 146181ad6265SDimitry Andric MI->eraseFromParent(); 146281ad6265SDimitry Andric OldMI->clearRegisterDeads(Reg); 146381ad6265SDimitry Andric LIS->removeInterval(Reg); 146481ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 146581ad6265SDimitry Andric } 146681ad6265SDimitry Andric return false; 146781ad6265SDimitry Andric } 146881ad6265SDimitry Andric 146981ad6265SDimitry Andric // Occupancy was improved for all regions. 147081ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 147181ad6265SDimitry Andric MachineInstr *MI = Entry.first; 147281ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 147381ad6265SDimitry Andric 147481ad6265SDimitry Andric // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. 1475972a253aSDimitry Andric DAG.BBLiveInMap.erase(OldMI); 147681ad6265SDimitry Andric 147781ad6265SDimitry Andric // Remove OldMI and update LIS 147881ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 147981ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*OldMI); 148081ad6265SDimitry Andric OldMI->eraseFromParent(); 148181ad6265SDimitry Andric LIS->removeInterval(Reg); 148281ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 148381ad6265SDimitry Andric } 148481ad6265SDimitry Andric 148581ad6265SDimitry Andric // Update live-ins, register pressure, and regions caches. 148681ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1487972a253aSDimitry Andric DAG.LiveIns[Idx] = NewLiveIns[Idx]; 1488972a253aSDimitry Andric DAG.Pressure[Idx] = NewPressure[Idx]; 1489972a253aSDimitry Andric DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); 149081ad6265SDimitry Andric } 1491972a253aSDimitry Andric DAG.Regions = NewRegions; 1492972a253aSDimitry Andric DAG.RescheduleRegions = NewRescheduleRegions; 149381ad6265SDimitry Andric 149481ad6265SDimitry Andric SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1495972a253aSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 149681ad6265SDimitry Andric 149781ad6265SDimitry Andric return true; 149881ad6265SDimitry Andric } 149981ad6265SDimitry Andric 150081ad6265SDimitry Andric // Copied from MachineLICM 1501972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) { 1502972a253aSDimitry Andric if (!DAG.TII->isTriviallyReMaterializable(MI)) 150381ad6265SDimitry Andric return false; 150481ad6265SDimitry Andric 150506c3fb27SDimitry Andric for (const MachineOperand &MO : MI.all_uses()) 150606c3fb27SDimitry Andric if (MO.getReg().isVirtual()) 150781ad6265SDimitry Andric return false; 150881ad6265SDimitry Andric 150981ad6265SDimitry Andric return true; 151081ad6265SDimitry Andric } 151181ad6265SDimitry Andric 151281ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region. 151381ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front 151481ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only 151581ad6265SDimitry Andric // ever be inserting before an already existing MI. 151681ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries( 151781ad6265SDimitry Andric SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 151881ad6265SDimitry Andric MachineBasicBlock::iterator>> &RegionBoundaries, 151981ad6265SDimitry Andric MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) { 152081ad6265SDimitry Andric unsigned I = 0, E = RegionBoundaries.size(); 152181ad6265SDimitry Andric // Search for first region of the block where MI is located 152281ad6265SDimitry Andric while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) 152381ad6265SDimitry Andric ++I; 152481ad6265SDimitry Andric 152581ad6265SDimitry Andric for (; I != E; ++I) { 152681ad6265SDimitry Andric if (MI->getParent() != RegionBoundaries[I].first->getParent()) 152781ad6265SDimitry Andric return; 152881ad6265SDimitry Andric 152981ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].first && 153081ad6265SDimitry Andric MI == RegionBoundaries[I].second) { 153181ad6265SDimitry Andric // MI is in a region with size 1, after removing, the region will be 153281ad6265SDimitry Andric // size 0, set RegionBegin and RegionEnd to pass end of block iterator. 153381ad6265SDimitry Andric RegionBoundaries[I] = 1534bdd1243dSDimitry Andric std::pair(MI->getParent()->end(), MI->getParent()->end()); 153581ad6265SDimitry Andric return; 153681ad6265SDimitry Andric } 153781ad6265SDimitry Andric if (MI == RegionBoundaries[I].first) { 153881ad6265SDimitry Andric if (Removing) 153981ad6265SDimitry Andric RegionBoundaries[I] = 1540bdd1243dSDimitry Andric std::pair(std::next(MI), RegionBoundaries[I].second); 154181ad6265SDimitry Andric else 154281ad6265SDimitry Andric // Inserted NewMI in front of region, set new RegionBegin to NewMI 1543bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI), 154481ad6265SDimitry Andric RegionBoundaries[I].second); 154581ad6265SDimitry Andric return; 154681ad6265SDimitry Andric } 154781ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].second) { 1548bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI)); 154981ad6265SDimitry Andric return; 155081ad6265SDimitry Andric } 155181ad6265SDimitry Andric } 155281ad6265SDimitry Andric } 1553bdd1243dSDimitry Andric 1554bdd1243dSDimitry Andric static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) { 1555bdd1243dSDimitry Andric return std::any_of( 1556bdd1243dSDimitry Andric DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) { 1557bdd1243dSDimitry Andric unsigned Opc = MI->getOpcode(); 1558bdd1243dSDimitry Andric return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; 1559bdd1243dSDimitry Andric }); 1560bdd1243dSDimitry Andric } 1561bdd1243dSDimitry Andric 1562bdd1243dSDimitry Andric GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive( 1563bdd1243dSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 1564bdd1243dSDimitry Andric bool RemoveKillFlags) 1565bdd1243dSDimitry Andric : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {} 1566bdd1243dSDimitry Andric 1567bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::schedule() { 1568bdd1243dSDimitry Andric HasIGLPInstrs = hasIGLPInstrs(this); 1569bdd1243dSDimitry Andric if (HasIGLPInstrs) { 1570bdd1243dSDimitry Andric SavedMutations.clear(); 1571bdd1243dSDimitry Andric SavedMutations.swap(Mutations); 1572*5f757f3fSDimitry Andric addMutation(createIGroupLPDAGMutation(/*IsReentry=*/true)); 1573bdd1243dSDimitry Andric } 1574bdd1243dSDimitry Andric 1575bdd1243dSDimitry Andric ScheduleDAGMI::schedule(); 1576bdd1243dSDimitry Andric } 1577bdd1243dSDimitry Andric 1578bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::finalizeSchedule() { 1579bdd1243dSDimitry Andric if (HasIGLPInstrs) 1580bdd1243dSDimitry Andric SavedMutations.swap(Mutations); 1581bdd1243dSDimitry Andric 1582bdd1243dSDimitry Andric ScheduleDAGMI::finalizeSchedule(); 1583bdd1243dSDimitry Andric } 1584