10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave 110b57cec5SDimitry Andric /// occupancy on GCN hardware. 12*972a253aSDimitry Andric /// 13*972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function. 14*972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual 15*972a253aSDimitry Andric /// entry point for the scheduling of those regions is 16*972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages. 17*972a253aSDimitry Andric 18*972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account 19*972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy. Usually, only a 20*972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit 21*972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in 22*972a253aSDimitry Andric /// other regions. 23*972a253aSDimitry Andric /// 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #include "GCNSchedStrategy.h" 270b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 2881ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace llvm; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 35*972a253aSDimitry Andric const MachineSchedContext *C) 36*972a253aSDimitry Andric : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), 37*972a253aSDimitry Andric HasClusteredNodes(false), HasExcessPressure(false) {} 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 400b57cec5SDimitry Andric GenericScheduler::initialize(DAG); 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric MF = &DAG->MF; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric // FIXME: This is also necessary, because some passes that run after 470b57cec5SDimitry Andric // scheduling and before regalloc increase register pressure. 48349cc55cSDimitry Andric const unsigned ErrorMargin = 3; 490b57cec5SDimitry Andric 50349cc55cSDimitry Andric SGPRExcessLimit = 51349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 52349cc55cSDimitry Andric VGPRExcessLimit = 53349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 540b57cec5SDimitry Andric 55349cc55cSDimitry Andric SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 56349cc55cSDimitry Andric // Set the initial TargetOccupnacy to the maximum occupancy that we can 57349cc55cSDimitry Andric // achieve for this function. This effectively sets a lower bound on the 58349cc55cSDimitry Andric // 'Critical' register limits in the scheduler. 59349cc55cSDimitry Andric TargetOccupancy = MFI.getOccupancy(); 60349cc55cSDimitry Andric SGPRCriticalLimit = 61349cc55cSDimitry Andric std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 62349cc55cSDimitry Andric VGPRCriticalLimit = 63349cc55cSDimitry Andric std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 64349cc55cSDimitry Andric 65349cc55cSDimitry Andric // Subtract error margin from register limits and avoid overflow. 66349cc55cSDimitry Andric SGPRCriticalLimit = 67349cc55cSDimitry Andric std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit); 68349cc55cSDimitry Andric VGPRCriticalLimit = 69349cc55cSDimitry Andric std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit); 70349cc55cSDimitry Andric SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit); 71349cc55cSDimitry Andric VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit); 720b57cec5SDimitry Andric } 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 750b57cec5SDimitry Andric bool AtTop, const RegPressureTracker &RPTracker, 760b57cec5SDimitry Andric const SIRegisterInfo *SRI, 770b57cec5SDimitry Andric unsigned SGPRPressure, 780b57cec5SDimitry Andric unsigned VGPRPressure) { 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric Cand.SU = SU; 810b57cec5SDimitry Andric Cand.AtTop = AtTop; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric // getDownwardPressure() and getUpwardPressure() make temporary changes to 840b57cec5SDimitry Andric // the tracker, so we need to pass those function a non-const copy. 850b57cec5SDimitry Andric RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 860b57cec5SDimitry Andric 878bcb0991SDimitry Andric Pressure.clear(); 888bcb0991SDimitry Andric MaxPressure.clear(); 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric if (AtTop) 910b57cec5SDimitry Andric TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 920b57cec5SDimitry Andric else { 930b57cec5SDimitry Andric // FIXME: I think for bottom up scheduling, the register pressure is cached 940b57cec5SDimitry Andric // and can be retrieved by DAG->getPressureDif(SU). 950b57cec5SDimitry Andric TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 985ffd83dbSDimitry Andric unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 995ffd83dbSDimitry Andric unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric // If two instructions increase the pressure of different register sets 1020b57cec5SDimitry Andric // by the same amount, the generic scheduler will prefer to schedule the 1030b57cec5SDimitry Andric // instruction that increases the set with the least amount of registers, 1040b57cec5SDimitry Andric // which in our case would be SGPRs. This is rarely what we want, so 1050b57cec5SDimitry Andric // when we report excess/critical register pressure, we do it either 1060b57cec5SDimitry Andric // only for VGPRs or only for SGPRs. 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 1090b57cec5SDimitry Andric const unsigned MaxVGPRPressureInc = 16; 1100b57cec5SDimitry Andric bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 1110b57cec5SDimitry Andric bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 1150b57cec5SDimitry Andric // to increase the likelihood we don't go over the limits. We should improve 1160b57cec5SDimitry Andric // the analysis to look through dependencies to find the path with the least 1170b57cec5SDimitry Andric // register pressure. 1180b57cec5SDimitry Andric 1198bcb0991SDimitry Andric // We only need to update the RPDelta for instructions that increase register 1208bcb0991SDimitry Andric // pressure. Instructions that decrease or keep reg pressure the same will be 1218bcb0991SDimitry Andric // marked as RegExcess in tryCandidate() when they are compared with 1228bcb0991SDimitry Andric // instructions that increase the register pressure. 1230b57cec5SDimitry Andric if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 124fe6060f1SDimitry Andric HasExcessPressure = true; 1255ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 1260b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 130fe6060f1SDimitry Andric HasExcessPressure = true; 1315ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 1320b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // Register pressure is considered 'CRITICAL' if it is approaching a value 1360b57cec5SDimitry Andric // that would reduce the wave occupancy for the execution unit. When 137349cc55cSDimitry Andric // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 1380b57cec5SDimitry Andric // has the same cost, so we don't need to prefer one over the other. 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 1410b57cec5SDimitry Andric int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric if (SGPRDelta >= 0 || VGPRDelta >= 0) { 144fe6060f1SDimitry Andric HasExcessPressure = true; 1450b57cec5SDimitry Andric if (SGPRDelta > VGPRDelta) { 1465ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 1475ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 1480b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 1490b57cec5SDimitry Andric } else { 1505ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 1515ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 1520b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // This function is mostly cut and pasted from 1580b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue() 1590b57cec5SDimitry Andric void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 1600b57cec5SDimitry Andric const CandPolicy &ZonePolicy, 1610b57cec5SDimitry Andric const RegPressureTracker &RPTracker, 1620b57cec5SDimitry Andric SchedCandidate &Cand) { 1630b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 1640b57cec5SDimitry Andric ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 1655ffd83dbSDimitry Andric unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 1665ffd83dbSDimitry Andric unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 1670b57cec5SDimitry Andric ReadyQueue &Q = Zone.Available; 1680b57cec5SDimitry Andric for (SUnit *SU : Q) { 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric SchedCandidate TryCand(ZonePolicy); 1710b57cec5SDimitry Andric initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 1720b57cec5SDimitry Andric SGPRPressure, VGPRPressure); 1730b57cec5SDimitry Andric // Pass SchedBoundary only when comparing nodes from the same boundary. 1740b57cec5SDimitry Andric SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 1750b57cec5SDimitry Andric GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 1760b57cec5SDimitry Andric if (TryCand.Reason != NoCand) { 1770b57cec5SDimitry Andric // Initialize resource delta if needed in case future heuristics query it. 1780b57cec5SDimitry Andric if (TryCand.ResDelta == SchedResourceDelta()) 1790b57cec5SDimitry Andric TryCand.initResourceDelta(Zone.DAG, SchedModel); 1800b57cec5SDimitry Andric Cand.setBest(TryCand); 1818bcb0991SDimitry Andric LLVM_DEBUG(traceCandidate(Cand)); 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric // This function is mostly cut and pasted from 1870b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional() 1880b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 1890b57cec5SDimitry Andric // Schedule as far as possible in the direction of no choice. This is most 1900b57cec5SDimitry Andric // efficient, but also provides the best heuristics for CriticalPSets. 1910b57cec5SDimitry Andric if (SUnit *SU = Bot.pickOnlyChoice()) { 1920b57cec5SDimitry Andric IsTopNode = false; 1930b57cec5SDimitry Andric return SU; 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric if (SUnit *SU = Top.pickOnlyChoice()) { 1960b57cec5SDimitry Andric IsTopNode = true; 1970b57cec5SDimitry Andric return SU; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric // Set the bottom-up policy based on the state of the current bottom zone and 2000b57cec5SDimitry Andric // the instructions outside the zone, including the top zone. 2010b57cec5SDimitry Andric CandPolicy BotPolicy; 2020b57cec5SDimitry Andric setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 2030b57cec5SDimitry Andric // Set the top-down policy based on the state of the current top zone and 2040b57cec5SDimitry Andric // the instructions outside the zone, including the bottom zone. 2050b57cec5SDimitry Andric CandPolicy TopPolicy; 2060b57cec5SDimitry Andric setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric // See if BotCand is still valid (because we previously scheduled from Top). 2090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 2100b57cec5SDimitry Andric if (!BotCand.isValid() || BotCand.SU->isScheduled || 2110b57cec5SDimitry Andric BotCand.Policy != BotPolicy) { 2120b57cec5SDimitry Andric BotCand.reset(CandPolicy()); 2130b57cec5SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 2140b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2150b57cec5SDimitry Andric } else { 2160b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(BotCand)); 2178bcb0991SDimitry Andric #ifndef NDEBUG 2188bcb0991SDimitry Andric if (VerifyScheduling) { 2198bcb0991SDimitry Andric SchedCandidate TCand; 2208bcb0991SDimitry Andric TCand.reset(CandPolicy()); 2218bcb0991SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 2228bcb0991SDimitry Andric assert(TCand.SU == BotCand.SU && 2238bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 2248bcb0991SDimitry Andric } 2258bcb0991SDimitry Andric #endif 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric // Check if the top Q has a better candidate. 2290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 2300b57cec5SDimitry Andric if (!TopCand.isValid() || TopCand.SU->isScheduled || 2310b57cec5SDimitry Andric TopCand.Policy != TopPolicy) { 2320b57cec5SDimitry Andric TopCand.reset(CandPolicy()); 2330b57cec5SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 2340b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2350b57cec5SDimitry Andric } else { 2360b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(TopCand)); 2378bcb0991SDimitry Andric #ifndef NDEBUG 2388bcb0991SDimitry Andric if (VerifyScheduling) { 2398bcb0991SDimitry Andric SchedCandidate TCand; 2408bcb0991SDimitry Andric TCand.reset(CandPolicy()); 2418bcb0991SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 2428bcb0991SDimitry Andric assert(TCand.SU == TopCand.SU && 2438bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 2448bcb0991SDimitry Andric } 2458bcb0991SDimitry Andric #endif 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric // Pick best from BotCand and TopCand. 2490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 2500b57cec5SDimitry Andric dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 2515ffd83dbSDimitry Andric SchedCandidate Cand = BotCand; 2520b57cec5SDimitry Andric TopCand.Reason = NoCand; 2530b57cec5SDimitry Andric GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 2540b57cec5SDimitry Andric if (TopCand.Reason != NoCand) { 2550b57cec5SDimitry Andric Cand.setBest(TopCand); 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric IsTopNode = Cand.AtTop; 2600b57cec5SDimitry Andric return Cand.SU; 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric // This function is mostly cut and pasted from 2640b57cec5SDimitry Andric // GenericScheduler::pickNode() 2650b57cec5SDimitry Andric SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 2660b57cec5SDimitry Andric if (DAG->top() == DAG->bottom()) { 2670b57cec5SDimitry Andric assert(Top.Available.empty() && Top.Pending.empty() && 2680b57cec5SDimitry Andric Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 2690b57cec5SDimitry Andric return nullptr; 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric SUnit *SU; 2720b57cec5SDimitry Andric do { 2730b57cec5SDimitry Andric if (RegionPolicy.OnlyTopDown) { 2740b57cec5SDimitry Andric SU = Top.pickOnlyChoice(); 2750b57cec5SDimitry Andric if (!SU) { 2760b57cec5SDimitry Andric CandPolicy NoPolicy; 2770b57cec5SDimitry Andric TopCand.reset(NoPolicy); 2780b57cec5SDimitry Andric pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 2790b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find a candidate"); 2800b57cec5SDimitry Andric SU = TopCand.SU; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric IsTopNode = true; 2830b57cec5SDimitry Andric } else if (RegionPolicy.OnlyBottomUp) { 2840b57cec5SDimitry Andric SU = Bot.pickOnlyChoice(); 2850b57cec5SDimitry Andric if (!SU) { 2860b57cec5SDimitry Andric CandPolicy NoPolicy; 2870b57cec5SDimitry Andric BotCand.reset(NoPolicy); 2880b57cec5SDimitry Andric pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 2890b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find a candidate"); 2900b57cec5SDimitry Andric SU = BotCand.SU; 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric IsTopNode = false; 2930b57cec5SDimitry Andric } else { 2940b57cec5SDimitry Andric SU = pickNodeBidirectional(IsTopNode); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric } while (SU->isScheduled); 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric if (SU->isTopReady()) 2990b57cec5SDimitry Andric Top.removeReady(SU); 3000b57cec5SDimitry Andric if (SU->isBottomReady()) 3010b57cec5SDimitry Andric Bot.removeReady(SU); 3020b57cec5SDimitry Andric 303fe6060f1SDimitry Andric if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) { 304fe6060f1SDimitry Andric for (SDep &Dep : SU->Preds) { 305fe6060f1SDimitry Andric if (Dep.isCluster()) { 306fe6060f1SDimitry Andric HasClusteredNodes = true; 307fe6060f1SDimitry Andric break; 308fe6060f1SDimitry Andric } 309fe6060f1SDimitry Andric } 310fe6060f1SDimitry Andric } 311fe6060f1SDimitry Andric 3120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3130b57cec5SDimitry Andric << *SU->getInstr()); 3140b57cec5SDimitry Andric return SU; 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 317*972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive( 318*972a253aSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) 319*972a253aSDimitry Andric : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()), 3200b57cec5SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()), 321*972a253aSDimitry Andric StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) { 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 3240b57cec5SDimitry Andric } 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() { 327*972a253aSDimitry Andric // Collect all scheduling regions. The actual scheduling is performed in 328*972a253aSDimitry Andric // GCNScheduleDAGMILive::finalizeSchedule. 3290b57cec5SDimitry Andric Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric 332*972a253aSDimitry Andric GCNRegPressure 333*972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { 3340b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 3350b57cec5SDimitry Andric RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 3360b57cec5SDimitry Andric return RPTracker.moveMaxPressure(); 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric 339*972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, 340*972a253aSDimitry Andric const MachineBasicBlock *MBB) { 3410b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric // If the block has the only successor then live-ins of that successor are 3440b57cec5SDimitry Andric // live-outs of the current block. We can reuse calculated live set if the 3450b57cec5SDimitry Andric // successor will be sent to scheduling past current block. 3460b57cec5SDimitry Andric const MachineBasicBlock *OnlySucc = nullptr; 3470b57cec5SDimitry Andric if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 3480b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes(); 3490b57cec5SDimitry Andric if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 3500b57cec5SDimitry Andric OnlySucc = *MBB->succ_begin(); 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric // Scheduler sends regions from the end of the block upwards. 3540b57cec5SDimitry Andric size_t CurRegion = RegionIdx; 3550b57cec5SDimitry Andric for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 3560b57cec5SDimitry Andric if (Regions[CurRegion].first->getParent() != MBB) 3570b57cec5SDimitry Andric break; 3580b57cec5SDimitry Andric --CurRegion; 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric auto I = MBB->begin(); 3610b57cec5SDimitry Andric auto LiveInIt = MBBLiveIns.find(MBB); 36281ad6265SDimitry Andric auto &Rgn = Regions[CurRegion]; 36381ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 3640b57cec5SDimitry Andric if (LiveInIt != MBBLiveIns.end()) { 3650b57cec5SDimitry Andric auto LiveIn = std::move(LiveInIt->second); 3660b57cec5SDimitry Andric RPTracker.reset(*MBB->begin(), &LiveIn); 3670b57cec5SDimitry Andric MBBLiveIns.erase(LiveInIt); 3680b57cec5SDimitry Andric } else { 3690b57cec5SDimitry Andric I = Rgn.first; 3700b57cec5SDimitry Andric auto LRS = BBLiveInMap.lookup(NonDbgMI); 371fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS 3720b57cec5SDimitry Andric assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 373fe6060f1SDimitry Andric #endif 3740b57cec5SDimitry Andric RPTracker.reset(*I, &LRS); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric for (;;) { 3780b57cec5SDimitry Andric I = RPTracker.getNext(); 3790b57cec5SDimitry Andric 38081ad6265SDimitry Andric if (Regions[CurRegion].first == I || NonDbgMI == I) { 3810b57cec5SDimitry Andric LiveIns[CurRegion] = RPTracker.getLiveRegs(); 3820b57cec5SDimitry Andric RPTracker.clearMaxPressure(); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric if (Regions[CurRegion].second == I) { 3860b57cec5SDimitry Andric Pressure[CurRegion] = RPTracker.moveMaxPressure(); 3870b57cec5SDimitry Andric if (CurRegion-- == RegionIdx) 3880b57cec5SDimitry Andric break; 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric RPTracker.advanceToNext(); 3910b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric if (OnlySucc) { 3950b57cec5SDimitry Andric if (I != MBB->end()) { 3960b57cec5SDimitry Andric RPTracker.advanceToNext(); 3970b57cec5SDimitry Andric RPTracker.advance(MBB->end()); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 4000b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 4010b57cec5SDimitry Andric MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 4060b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const { 4070b57cec5SDimitry Andric assert(!Regions.empty()); 4080b57cec5SDimitry Andric std::vector<MachineInstr *> BBStarters; 4090b57cec5SDimitry Andric BBStarters.reserve(Regions.size()); 4100b57cec5SDimitry Andric auto I = Regions.rbegin(), E = Regions.rend(); 4110b57cec5SDimitry Andric auto *BB = I->first->getParent(); 4120b57cec5SDimitry Andric do { 4130b57cec5SDimitry Andric auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 4140b57cec5SDimitry Andric BBStarters.push_back(MI); 4150b57cec5SDimitry Andric do { 4160b57cec5SDimitry Andric ++I; 4170b57cec5SDimitry Andric } while (I != E && I->first->getParent() == BB); 4180b57cec5SDimitry Andric } while (I != E); 4190b57cec5SDimitry Andric return getLiveRegMap(BBStarters, false /*After*/, *LIS); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() { 423*972a253aSDimitry Andric // Start actual scheduling here. This function is called by the base 424*972a253aSDimitry Andric // MachineScheduler after all regions have been recorded by 425*972a253aSDimitry Andric // GCNScheduleDAGMILive::schedule(). 4260b57cec5SDimitry Andric LiveIns.resize(Regions.size()); 4270b57cec5SDimitry Andric Pressure.resize(Regions.size()); 4285ffd83dbSDimitry Andric RescheduleRegions.resize(Regions.size()); 429fe6060f1SDimitry Andric RegionsWithClusters.resize(Regions.size()); 430fe6060f1SDimitry Andric RegionsWithHighRP.resize(Regions.size()); 43181ad6265SDimitry Andric RegionsWithMinOcc.resize(Regions.size()); 4325ffd83dbSDimitry Andric RescheduleRegions.set(); 433fe6060f1SDimitry Andric RegionsWithClusters.reset(); 434fe6060f1SDimitry Andric RegionsWithHighRP.reset(); 43581ad6265SDimitry Andric RegionsWithMinOcc.reset(); 4360b57cec5SDimitry Andric 437*972a253aSDimitry Andric runSchedStages(); 438*972a253aSDimitry Andric } 439*972a253aSDimitry Andric 440*972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() { 441*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 442*972a253aSDimitry Andric InitialScheduleStage S0(GCNSchedStageID::InitialSchedule, *this); 443*972a253aSDimitry Andric UnclusteredRescheduleStage S1(GCNSchedStageID::UnclusteredReschedule, *this); 444*972a253aSDimitry Andric ClusteredLowOccStage S2(GCNSchedStageID::ClusteredLowOccupancyReschedule, 445*972a253aSDimitry Andric *this); 446*972a253aSDimitry Andric PreRARematStage S3(GCNSchedStageID::PreRARematerialize, *this); 447*972a253aSDimitry Andric GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3}; 448*972a253aSDimitry Andric 4490b57cec5SDimitry Andric if (!Regions.empty()) 4500b57cec5SDimitry Andric BBLiveInMap = getBBLiveInMap(); 4510b57cec5SDimitry Andric 452*972a253aSDimitry Andric for (auto *Stage : SchedStages) { 453*972a253aSDimitry Andric if (!Stage->initGCNSchedStage()) 4545ffd83dbSDimitry Andric continue; 455*972a253aSDimitry Andric 456*972a253aSDimitry Andric for (auto Region : Regions) { 457*972a253aSDimitry Andric RegionBegin = Region.first; 458*972a253aSDimitry Andric RegionEnd = Region.second; 459*972a253aSDimitry Andric // Setup for scheduling the region and check whether it should be skipped. 460*972a253aSDimitry Andric if (!Stage->initGCNRegion()) { 461*972a253aSDimitry Andric Stage->advanceRegion(); 462*972a253aSDimitry Andric exitRegion(); 463*972a253aSDimitry Andric continue; 4645ffd83dbSDimitry Andric } 4655ffd83dbSDimitry Andric 466*972a253aSDimitry Andric ScheduleDAGMILive::schedule(); 467*972a253aSDimitry Andric Stage->finalizeGCNRegion(); 468*972a253aSDimitry Andric } 469*972a253aSDimitry Andric 470*972a253aSDimitry Andric Stage->finalizeGCNSchedStage(); 471*972a253aSDimitry Andric } 472*972a253aSDimitry Andric } 473*972a253aSDimitry Andric 474*972a253aSDimitry Andric #ifndef NDEBUG 475*972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { 476*972a253aSDimitry Andric switch (StageID) { 477*972a253aSDimitry Andric case GCNSchedStageID::InitialSchedule: 478*972a253aSDimitry Andric OS << "Initial Schedule"; 4790b57cec5SDimitry Andric break; 480*972a253aSDimitry Andric case GCNSchedStageID::UnclusteredReschedule: 481*972a253aSDimitry Andric OS << "Unclustered Reschedule"; 482*972a253aSDimitry Andric break; 483*972a253aSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 484*972a253aSDimitry Andric OS << "Clustered Low Occupancy Reschedule"; 485*972a253aSDimitry Andric break; 486*972a253aSDimitry Andric case GCNSchedStageID::PreRARematerialize: 487*972a253aSDimitry Andric OS << "Pre-RA Rematerialize"; 488*972a253aSDimitry Andric break; 489*972a253aSDimitry Andric } 490*972a253aSDimitry Andric return OS; 491*972a253aSDimitry Andric } 492*972a253aSDimitry Andric #endif 493*972a253aSDimitry Andric 494*972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 495*972a253aSDimitry Andric : DAG(DAG), S(static_cast<GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)), 496*972a253aSDimitry Andric MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {} 497*972a253aSDimitry Andric 498*972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() { 499*972a253aSDimitry Andric if (!DAG.LIS) 500*972a253aSDimitry Andric return false; 501*972a253aSDimitry Andric 502*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n"); 503*972a253aSDimitry Andric return true; 504*972a253aSDimitry Andric } 505*972a253aSDimitry Andric 506*972a253aSDimitry Andric bool UnclusteredRescheduleStage::initGCNSchedStage() { 507*972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 508*972a253aSDimitry Andric return false; 509*972a253aSDimitry Andric 510*972a253aSDimitry Andric if (DAG.RescheduleRegions.none()) 511*972a253aSDimitry Andric return false; 512*972a253aSDimitry Andric 513*972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 514*972a253aSDimitry Andric 515*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Retrying function scheduling without clustering.\n"); 516*972a253aSDimitry Andric return true; 517*972a253aSDimitry Andric } 518*972a253aSDimitry Andric 519*972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() { 520*972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 521*972a253aSDimitry Andric return false; 522*972a253aSDimitry Andric 523*972a253aSDimitry Andric // Don't bother trying to improve ILP in lower RP regions if occupancy has not 524*972a253aSDimitry Andric // been dropped. All regions will have already been scheduled with the ideal 525*972a253aSDimitry Andric // occupancy targets. 526*972a253aSDimitry Andric if (DAG.StartingOccupancy <= DAG.MinOccupancy) 527*972a253aSDimitry Andric return false; 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric LLVM_DEBUG( 530*972a253aSDimitry Andric dbgs() << "Retrying function scheduling with lowest recorded occupancy " 531*972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 532*972a253aSDimitry Andric return true; 5330b57cec5SDimitry Andric } 53481ad6265SDimitry Andric 535*972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() { 536*972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 537*972a253aSDimitry Andric return false; 53881ad6265SDimitry Andric 539*972a253aSDimitry Andric if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1) 540*972a253aSDimitry Andric return false; 541*972a253aSDimitry Andric 54281ad6265SDimitry Andric const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 54381ad6265SDimitry Andric // Check maximum occupancy 54481ad6265SDimitry Andric if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) == 545*972a253aSDimitry Andric DAG.MinOccupancy) 546*972a253aSDimitry Andric return false; 54781ad6265SDimitry Andric 54881ad6265SDimitry Andric // FIXME: This pass will invalidate cached MBBLiveIns for regions 54981ad6265SDimitry Andric // inbetween the defs and region we sinked the def to. Cached pressure 55081ad6265SDimitry Andric // for regions where a def is sinked from will also be invalidated. Will 55181ad6265SDimitry Andric // need to be fixed if there is another pass after this pass. 55281ad6265SDimitry Andric 55381ad6265SDimitry Andric collectRematerializableInstructions(); 55481ad6265SDimitry Andric if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII)) 555*972a253aSDimitry Andric return false; 55681ad6265SDimitry Andric 55781ad6265SDimitry Andric LLVM_DEBUG( 55881ad6265SDimitry Andric dbgs() << "Retrying function scheduling with improved occupancy of " 559*972a253aSDimitry Andric << DAG.MinOccupancy << " from rematerializing\n"); 560*972a253aSDimitry Andric return true; 5615ffd83dbSDimitry Andric } 5625ffd83dbSDimitry Andric 563*972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() { 564*972a253aSDimitry Andric DAG.finishBlock(); 565*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n"); 566e8d8bef9SDimitry Andric } 5675ffd83dbSDimitry Andric 568*972a253aSDimitry Andric void UnclusteredRescheduleStage::finalizeGCNSchedStage() { 569*972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 5700b57cec5SDimitry Andric 571*972a253aSDimitry Andric GCNSchedStage::finalizeGCNSchedStage(); 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 574*972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() { 575*972a253aSDimitry Andric // Check whether this new region is also a new block. 576*972a253aSDimitry Andric if (DAG.RegionBegin->getParent() != CurrentMBB) 577*972a253aSDimitry Andric setupNewBlock(); 578*972a253aSDimitry Andric 579*972a253aSDimitry Andric unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end()); 580*972a253aSDimitry Andric DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs); 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric // Skip empty scheduling regions (0 or 1 schedulable instructions). 583*972a253aSDimitry Andric if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) 584*972a253aSDimitry Andric return false; 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 587*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB) 588*972a253aSDimitry Andric << " " << CurrentMBB->getName() 589*972a253aSDimitry Andric << "\n From: " << *DAG.begin() << " To: "; 590*972a253aSDimitry Andric if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; 5910b57cec5SDimitry Andric else dbgs() << "End"; 5920b57cec5SDimitry Andric dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 5930b57cec5SDimitry Andric 594*972a253aSDimitry Andric // Save original instruction order before scheduling for possible revert. 595*972a253aSDimitry Andric Unsched.clear(); 596*972a253aSDimitry Andric Unsched.reserve(DAG.NumRegionInstrs); 597*972a253aSDimitry Andric for (auto &I : DAG) 598*972a253aSDimitry Andric Unsched.push_back(&I); 5990b57cec5SDimitry Andric 600*972a253aSDimitry Andric PressureBefore = DAG.Pressure[RegionIdx]; 6010b57cec5SDimitry Andric 602*972a253aSDimitry Andric LLVM_DEBUG( 603*972a253aSDimitry Andric dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 604*972a253aSDimitry Andric GCNRPTracker::printLiveRegs(dbgs(), DAG.LiveIns[RegionIdx], DAG.MRI); 605*972a253aSDimitry Andric dbgs() << "Region live-in pressure: "; 606*972a253aSDimitry Andric llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]).print(dbgs()); 607*972a253aSDimitry Andric dbgs() << "Region register pressure: "; PressureBefore.print(dbgs())); 608*972a253aSDimitry Andric 609*972a253aSDimitry Andric // Set HasClusteredNodes to true for late stages where we have already 610*972a253aSDimitry Andric // collected it. That way pickNode() will not scan SDep's when not needed. 611*972a253aSDimitry Andric S.HasClusteredNodes = StageID > GCNSchedStageID::InitialSchedule; 612*972a253aSDimitry Andric S.HasExcessPressure = false; 613*972a253aSDimitry Andric 614*972a253aSDimitry Andric return true; 6150b57cec5SDimitry Andric } 61681ad6265SDimitry Andric 617*972a253aSDimitry Andric bool UnclusteredRescheduleStage::initGCNRegion() { 618*972a253aSDimitry Andric if (!DAG.RescheduleRegions[RegionIdx]) 619*972a253aSDimitry Andric return false; 620*972a253aSDimitry Andric 621*972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 622*972a253aSDimitry Andric } 623*972a253aSDimitry Andric 624*972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() { 625*972a253aSDimitry Andric // We may need to reschedule this region if it doesn't have clusters so it 626*972a253aSDimitry Andric // wasn't rescheduled in the last stage, or if we found it was testing 627*972a253aSDimitry Andric // critical register pressure limits in the unclustered reschedule stage. The 628*972a253aSDimitry Andric // later is because we may not have been able to raise the min occupancy in 629*972a253aSDimitry Andric // the previous stage so the region may be overly constrained even if it was 630*972a253aSDimitry Andric // already rescheduled. 631*972a253aSDimitry Andric if (!DAG.RegionsWithClusters[RegionIdx] && !DAG.RegionsWithHighRP[RegionIdx]) 632*972a253aSDimitry Andric return false; 633*972a253aSDimitry Andric 634*972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 635*972a253aSDimitry Andric } 636*972a253aSDimitry Andric 637*972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() { 638*972a253aSDimitry Andric if (!DAG.RescheduleRegions[RegionIdx]) 639*972a253aSDimitry Andric return false; 640*972a253aSDimitry Andric 641*972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 642*972a253aSDimitry Andric } 643*972a253aSDimitry Andric 644*972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() { 645*972a253aSDimitry Andric if (CurrentMBB) 646*972a253aSDimitry Andric DAG.finishBlock(); 647*972a253aSDimitry Andric 648*972a253aSDimitry Andric CurrentMBB = DAG.RegionBegin->getParent(); 649*972a253aSDimitry Andric DAG.startBlock(CurrentMBB); 650*972a253aSDimitry Andric // Get real RP for the region if it hasn't be calculated before. After the 651*972a253aSDimitry Andric // initial schedule stage real RP will be collected after scheduling. 652*972a253aSDimitry Andric if (StageID == GCNSchedStageID::InitialSchedule) 653*972a253aSDimitry Andric DAG.computeBlockPressure(RegionIdx, CurrentMBB); 654*972a253aSDimitry Andric } 655*972a253aSDimitry Andric 656*972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() { 657*972a253aSDimitry Andric DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd); 658*972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = false; 659*972a253aSDimitry Andric if (S.HasExcessPressure) 660*972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 661*972a253aSDimitry Andric 662*972a253aSDimitry Andric // Revert scheduling if we have dropped occupancy or there is some other 663*972a253aSDimitry Andric // reason that the original schedule is better. 664*972a253aSDimitry Andric checkScheduling(); 665*972a253aSDimitry Andric 666*972a253aSDimitry Andric DAG.exitRegion(); 667*972a253aSDimitry Andric RegionIdx++; 668*972a253aSDimitry Andric } 669*972a253aSDimitry Andric 670*972a253aSDimitry Andric void InitialScheduleStage::finalizeGCNRegion() { 671*972a253aSDimitry Andric // Record which regions have clustered nodes for the next unclustered 672*972a253aSDimitry Andric // reschedule stage. 673*972a253aSDimitry Andric assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule); 674*972a253aSDimitry Andric if (S.HasClusteredNodes) 675*972a253aSDimitry Andric DAG.RegionsWithClusters[RegionIdx] = true; 676*972a253aSDimitry Andric 677*972a253aSDimitry Andric GCNSchedStage::finalizeGCNRegion(); 678*972a253aSDimitry Andric } 679*972a253aSDimitry Andric 680*972a253aSDimitry Andric void GCNSchedStage::checkScheduling() { 681*972a253aSDimitry Andric // Check the results of scheduling. 682*972a253aSDimitry Andric PressureAfter = DAG.getRealRegPressure(RegionIdx); 683*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 684*972a253aSDimitry Andric PressureAfter.print(dbgs())); 685*972a253aSDimitry Andric 686*972a253aSDimitry Andric if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 687*972a253aSDimitry Andric PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 688*972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 689*972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 690*972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 691*972a253aSDimitry Andric 692*972a253aSDimitry Andric // Early out if we have achieve the occupancy target. 693*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 694*972a253aSDimitry Andric return; 695*972a253aSDimitry Andric } 696*972a253aSDimitry Andric 697*972a253aSDimitry Andric unsigned WavesAfter = 698*972a253aSDimitry Andric std::min(S.getTargetOccupancy(), PressureAfter.getOccupancy(ST)); 699*972a253aSDimitry Andric unsigned WavesBefore = 700*972a253aSDimitry Andric std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST)); 701*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 702*972a253aSDimitry Andric << ", after " << WavesAfter << ".\n"); 703*972a253aSDimitry Andric 704*972a253aSDimitry Andric // We may not be able to keep the current target occupancy because of the just 705*972a253aSDimitry Andric // scheduled region. We might still be able to revert scheduling if the 706*972a253aSDimitry Andric // occupancy before was higher, or if the current schedule has register 707*972a253aSDimitry Andric // pressure higher than the excess limits which could lead to more spilling. 708*972a253aSDimitry Andric unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 709*972a253aSDimitry Andric 710*972a253aSDimitry Andric // Allow memory bound functions to drop to 4 waves if not limited by an 711*972a253aSDimitry Andric // attribute. 712*972a253aSDimitry Andric if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy && 713*972a253aSDimitry Andric WavesAfter >= MFI.getMinAllowedOccupancy()) { 714*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 715*972a253aSDimitry Andric << MFI.getMinAllowedOccupancy() << " waves\n"); 716*972a253aSDimitry Andric NewOccupancy = WavesAfter; 717*972a253aSDimitry Andric } 718*972a253aSDimitry Andric 719*972a253aSDimitry Andric if (NewOccupancy < DAG.MinOccupancy) { 720*972a253aSDimitry Andric DAG.MinOccupancy = NewOccupancy; 721*972a253aSDimitry Andric MFI.limitOccupancy(DAG.MinOccupancy); 722*972a253aSDimitry Andric DAG.RegionsWithMinOcc.reset(); 723*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 724*972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 725*972a253aSDimitry Andric } 726*972a253aSDimitry Andric 727*972a253aSDimitry Andric unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 728*972a253aSDimitry Andric unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 729*972a253aSDimitry Andric if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 730*972a253aSDimitry Andric PressureAfter.getAGPRNum() > MaxVGPRs || 731*972a253aSDimitry Andric PressureAfter.getSGPRNum() > MaxSGPRs) { 732*972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = true; 733*972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 734*972a253aSDimitry Andric } 735*972a253aSDimitry Andric 736*972a253aSDimitry Andric // Revert if this region's schedule would cause a drop in occupancy or 737*972a253aSDimitry Andric // spilling. 738*972a253aSDimitry Andric if (shouldRevertScheduling(WavesAfter)) { 739*972a253aSDimitry Andric revertScheduling(); 740*972a253aSDimitry Andric } else { 741*972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 742*972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 743*972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 744*972a253aSDimitry Andric } 745*972a253aSDimitry Andric } 746*972a253aSDimitry Andric 747*972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) { 748*972a253aSDimitry Andric if (WavesAfter < DAG.MinOccupancy) 749*972a253aSDimitry Andric return true; 750*972a253aSDimitry Andric 751*972a253aSDimitry Andric return false; 752*972a253aSDimitry Andric } 753*972a253aSDimitry Andric 754*972a253aSDimitry Andric bool InitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 755*972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 756*972a253aSDimitry Andric return true; 757*972a253aSDimitry Andric 758*972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 759*972a253aSDimitry Andric return true; 760*972a253aSDimitry Andric 761*972a253aSDimitry Andric assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule); 762*972a253aSDimitry Andric // Don't reschedule the region in the next stage if it doesn't have clusters. 763*972a253aSDimitry Andric if (!DAG.RegionsWithClusters[RegionIdx]) 764*972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = false; 765*972a253aSDimitry Andric 766*972a253aSDimitry Andric return false; 767*972a253aSDimitry Andric } 768*972a253aSDimitry Andric 769*972a253aSDimitry Andric bool UnclusteredRescheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 770*972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 771*972a253aSDimitry Andric return true; 772*972a253aSDimitry Andric 773*972a253aSDimitry Andric // If RP is not reduced in the unclustred reschedule stage, revert to the old 774*972a253aSDimitry Andric // schedule. 775*972a253aSDimitry Andric if (!PressureAfter.less(ST, PressureBefore)) { 776*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 777*972a253aSDimitry Andric return true; 778*972a253aSDimitry Andric } 779*972a253aSDimitry Andric 780*972a253aSDimitry Andric return false; 781*972a253aSDimitry Andric } 782*972a253aSDimitry Andric 783*972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) { 784*972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 785*972a253aSDimitry Andric return true; 786*972a253aSDimitry Andric 787*972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 788*972a253aSDimitry Andric return true; 789*972a253aSDimitry Andric 790*972a253aSDimitry Andric return false; 791*972a253aSDimitry Andric } 792*972a253aSDimitry Andric 793*972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) { 794*972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 795*972a253aSDimitry Andric return true; 796*972a253aSDimitry Andric 797*972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 798*972a253aSDimitry Andric return true; 799*972a253aSDimitry Andric 800*972a253aSDimitry Andric return false; 801*972a253aSDimitry Andric } 802*972a253aSDimitry Andric 803*972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { 804*972a253aSDimitry Andric if (WavesAfter <= MFI.getMinWavesPerEU() && 805*972a253aSDimitry Andric !PressureAfter.less(ST, PressureBefore) && 806*972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx]) { 807*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 808*972a253aSDimitry Andric return true; 809*972a253aSDimitry Andric } 810*972a253aSDimitry Andric 811*972a253aSDimitry Andric return false; 812*972a253aSDimitry Andric } 813*972a253aSDimitry Andric 814*972a253aSDimitry Andric void GCNSchedStage::revertScheduling() { 815*972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 816*972a253aSDimitry Andric PressureBefore.getOccupancy(ST) == DAG.MinOccupancy; 817*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 818*972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = 819*972a253aSDimitry Andric DAG.RegionsWithClusters[RegionIdx] || 820*972a253aSDimitry Andric (nextStage(StageID)) != GCNSchedStageID::UnclusteredReschedule; 821*972a253aSDimitry Andric DAG.RegionEnd = DAG.RegionBegin; 822*972a253aSDimitry Andric int SkippedDebugInstr = 0; 823*972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 824*972a253aSDimitry Andric if (MI->isDebugInstr()) { 825*972a253aSDimitry Andric ++SkippedDebugInstr; 826*972a253aSDimitry Andric continue; 827*972a253aSDimitry Andric } 828*972a253aSDimitry Andric 829*972a253aSDimitry Andric if (MI->getIterator() != DAG.RegionEnd) { 830*972a253aSDimitry Andric DAG.BB->remove(MI); 831*972a253aSDimitry Andric DAG.BB->insert(DAG.RegionEnd, MI); 832*972a253aSDimitry Andric if (!MI->isDebugInstr()) 833*972a253aSDimitry Andric DAG.LIS->handleMove(*MI, true); 834*972a253aSDimitry Andric } 835*972a253aSDimitry Andric 836*972a253aSDimitry Andric // Reset read-undef flags and update them later. 837*972a253aSDimitry Andric for (auto &Op : MI->operands()) 838*972a253aSDimitry Andric if (Op.isReg() && Op.isDef()) 839*972a253aSDimitry Andric Op.setIsUndef(false); 840*972a253aSDimitry Andric RegisterOperands RegOpers; 841*972a253aSDimitry Andric RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false); 842*972a253aSDimitry Andric if (!MI->isDebugInstr()) { 843*972a253aSDimitry Andric if (DAG.ShouldTrackLaneMasks) { 844*972a253aSDimitry Andric // Adjust liveness and add missing dead+read-undef flags. 845*972a253aSDimitry Andric SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot(); 846*972a253aSDimitry Andric RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI); 847*972a253aSDimitry Andric } else { 848*972a253aSDimitry Andric // Adjust for missing dead-def flags. 849*972a253aSDimitry Andric RegOpers.detectDeadDefs(*MI, *DAG.LIS); 850*972a253aSDimitry Andric } 851*972a253aSDimitry Andric } 852*972a253aSDimitry Andric DAG.RegionEnd = MI->getIterator(); 853*972a253aSDimitry Andric ++DAG.RegionEnd; 854*972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 855*972a253aSDimitry Andric } 856*972a253aSDimitry Andric 857*972a253aSDimitry Andric // After reverting schedule, debug instrs will now be at the end of the block 858*972a253aSDimitry Andric // and RegionEnd will point to the first debug instr. Increment RegionEnd 859*972a253aSDimitry Andric // pass debug instrs to the actual end of the scheduling region. 860*972a253aSDimitry Andric while (SkippedDebugInstr-- > 0) 861*972a253aSDimitry Andric ++DAG.RegionEnd; 862*972a253aSDimitry Andric 863*972a253aSDimitry Andric // If Unsched.front() instruction is a debug instruction, this will actually 864*972a253aSDimitry Andric // shrink the region since we moved all debug instructions to the end of the 865*972a253aSDimitry Andric // block. Find the first instruction that is not a debug instruction. 866*972a253aSDimitry Andric DAG.RegionBegin = Unsched.front()->getIterator(); 867*972a253aSDimitry Andric if (DAG.RegionBegin->isDebugInstr()) { 868*972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 869*972a253aSDimitry Andric if (MI->isDebugInstr()) 870*972a253aSDimitry Andric continue; 871*972a253aSDimitry Andric DAG.RegionBegin = MI->getIterator(); 872*972a253aSDimitry Andric break; 873*972a253aSDimitry Andric } 874*972a253aSDimitry Andric } 875*972a253aSDimitry Andric 876*972a253aSDimitry Andric // Then move the debug instructions back into their correct place and set 877*972a253aSDimitry Andric // RegionBegin and RegionEnd if needed. 878*972a253aSDimitry Andric DAG.placeDebugValues(); 879*972a253aSDimitry Andric 880*972a253aSDimitry Andric DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd); 881*972a253aSDimitry Andric } 882*972a253aSDimitry Andric 883*972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() { 884*972a253aSDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI); 885*972a253aSDimitry Andric for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) { 88681ad6265SDimitry Andric Register Reg = Register::index2VirtReg(I); 887*972a253aSDimitry Andric if (!DAG.LIS->hasInterval(Reg)) 88881ad6265SDimitry Andric continue; 88981ad6265SDimitry Andric 89081ad6265SDimitry Andric // TODO: Handle AGPR and SGPR rematerialization 891*972a253aSDimitry Andric if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) || 892*972a253aSDimitry Andric !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg)) 89381ad6265SDimitry Andric continue; 89481ad6265SDimitry Andric 895*972a253aSDimitry Andric MachineOperand *Op = DAG.MRI.getOneDef(Reg); 89681ad6265SDimitry Andric MachineInstr *Def = Op->getParent(); 897fcaf7f86SDimitry Andric if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) 89881ad6265SDimitry Andric continue; 89981ad6265SDimitry Andric 900*972a253aSDimitry Andric MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg); 90181ad6265SDimitry Andric if (Def->getParent() == UseI->getParent()) 90281ad6265SDimitry Andric continue; 90381ad6265SDimitry Andric 90481ad6265SDimitry Andric // We are only collecting defs that are defined in another block and are 90581ad6265SDimitry Andric // live-through or used inside regions at MinOccupancy. This means that the 90681ad6265SDimitry Andric // register must be in the live-in set for the region. 90781ad6265SDimitry Andric bool AddedToRematList = false; 908*972a253aSDimitry Andric for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { 909*972a253aSDimitry Andric auto It = DAG.LiveIns[I].find(Reg); 910*972a253aSDimitry Andric if (It != DAG.LiveIns[I].end() && !It->second.none()) { 911*972a253aSDimitry Andric if (DAG.RegionsWithMinOcc[I]) { 91281ad6265SDimitry Andric RematerializableInsts[I][Def] = UseI; 91381ad6265SDimitry Andric AddedToRematList = true; 91481ad6265SDimitry Andric } 91581ad6265SDimitry Andric 91681ad6265SDimitry Andric // Collect regions with rematerializable reg as live-in to avoid 91781ad6265SDimitry Andric // searching later when updating RP. 91881ad6265SDimitry Andric RematDefToLiveInRegions[Def].push_back(I); 91981ad6265SDimitry Andric } 92081ad6265SDimitry Andric } 92181ad6265SDimitry Andric if (!AddedToRematList) 92281ad6265SDimitry Andric RematDefToLiveInRegions.erase(Def); 92381ad6265SDimitry Andric } 92481ad6265SDimitry Andric } 92581ad6265SDimitry Andric 926*972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, 92781ad6265SDimitry Andric const TargetInstrInfo *TII) { 92881ad6265SDimitry Andric // Temporary copies of cached variables we will be modifying and replacing if 92981ad6265SDimitry Andric // sinking succeeds. 93081ad6265SDimitry Andric SmallVector< 93181ad6265SDimitry Andric std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32> 93281ad6265SDimitry Andric NewRegions; 93381ad6265SDimitry Andric DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns; 93481ad6265SDimitry Andric DenseMap<unsigned, GCNRegPressure> NewPressure; 93581ad6265SDimitry Andric BitVector NewRescheduleRegions; 936*972a253aSDimitry Andric LiveIntervals *LIS = DAG.LIS; 93781ad6265SDimitry Andric 938*972a253aSDimitry Andric NewRegions.resize(DAG.Regions.size()); 939*972a253aSDimitry Andric NewRescheduleRegions.resize(DAG.Regions.size()); 94081ad6265SDimitry Andric 94181ad6265SDimitry Andric // Collect only regions that has a rematerializable def as a live-in. 94281ad6265SDimitry Andric SmallSet<unsigned, 16> ImpactedRegions; 94381ad6265SDimitry Andric for (const auto &It : RematDefToLiveInRegions) 94481ad6265SDimitry Andric ImpactedRegions.insert(It.second.begin(), It.second.end()); 94581ad6265SDimitry Andric 94681ad6265SDimitry Andric // Make copies of register pressure and live-ins cache that will be updated 94781ad6265SDimitry Andric // as we rematerialize. 94881ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 949*972a253aSDimitry Andric NewPressure[Idx] = DAG.Pressure[Idx]; 950*972a253aSDimitry Andric NewLiveIns[Idx] = DAG.LiveIns[Idx]; 95181ad6265SDimitry Andric } 952*972a253aSDimitry Andric NewRegions = DAG.Regions; 95381ad6265SDimitry Andric NewRescheduleRegions.reset(); 95481ad6265SDimitry Andric 95581ad6265SDimitry Andric DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef; 95681ad6265SDimitry Andric bool Improved = false; 95781ad6265SDimitry Andric for (auto I : ImpactedRegions) { 958*972a253aSDimitry Andric if (!DAG.RegionsWithMinOcc[I]) 95981ad6265SDimitry Andric continue; 96081ad6265SDimitry Andric 96181ad6265SDimitry Andric Improved = false; 96281ad6265SDimitry Andric int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts()); 96381ad6265SDimitry Andric int SGPRUsage = NewPressure[I].getSGPRNum(); 96481ad6265SDimitry Andric 96581ad6265SDimitry Andric // TODO: Handle occupancy drop due to AGPR and SGPR. 96681ad6265SDimitry Andric // Check if cause of occupancy drop is due to VGPR usage and not SGPR. 967*972a253aSDimitry Andric if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy) 96881ad6265SDimitry Andric break; 96981ad6265SDimitry Andric 97081ad6265SDimitry Andric // The occupancy of this region could have been improved by a previous 97181ad6265SDimitry Andric // iteration's sinking of defs. 972*972a253aSDimitry Andric if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) { 97381ad6265SDimitry Andric NewRescheduleRegions[I] = true; 97481ad6265SDimitry Andric Improved = true; 97581ad6265SDimitry Andric continue; 97681ad6265SDimitry Andric } 97781ad6265SDimitry Andric 97881ad6265SDimitry Andric // First check if we have enough trivially rematerializable instructions to 97981ad6265SDimitry Andric // improve occupancy. Optimistically assume all instructions we are able to 98081ad6265SDimitry Andric // sink decreased RP. 98181ad6265SDimitry Andric int TotalSinkableRegs = 0; 98281ad6265SDimitry Andric for (const auto &It : RematerializableInsts[I]) { 98381ad6265SDimitry Andric MachineInstr *Def = It.first; 98481ad6265SDimitry Andric Register DefReg = Def->getOperand(0).getReg(); 98581ad6265SDimitry Andric TotalSinkableRegs += 98681ad6265SDimitry Andric SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]); 98781ad6265SDimitry Andric } 98881ad6265SDimitry Andric int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; 98981ad6265SDimitry Andric unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink); 99081ad6265SDimitry Andric // If in the most optimistic scenario, we cannot improve occupancy, then do 99181ad6265SDimitry Andric // not attempt to sink any instructions. 992*972a253aSDimitry Andric if (OptimisticOccupancy <= DAG.MinOccupancy) 99381ad6265SDimitry Andric break; 99481ad6265SDimitry Andric 99581ad6265SDimitry Andric unsigned ImproveOccupancy = 0; 99681ad6265SDimitry Andric SmallVector<MachineInstr *, 4> SinkedDefs; 99781ad6265SDimitry Andric for (auto &It : RematerializableInsts[I]) { 99881ad6265SDimitry Andric MachineInstr *Def = It.first; 99981ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos = 100081ad6265SDimitry Andric MachineBasicBlock::iterator(It.second); 100181ad6265SDimitry Andric Register Reg = Def->getOperand(0).getReg(); 100281ad6265SDimitry Andric // Rematerialize MI to its use block. Since we are only rematerializing 100381ad6265SDimitry Andric // instructions that do not have any virtual reg uses, we do not need to 100481ad6265SDimitry Andric // call LiveRangeEdit::allUsesAvailableAt() and 100581ad6265SDimitry Andric // LiveRangeEdit::canRematerializeAt(). 100681ad6265SDimitry Andric TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, 1007*972a253aSDimitry Andric Def->getOperand(0).getSubReg(), *Def, *DAG.TRI); 100881ad6265SDimitry Andric MachineInstr *NewMI = &*(--InsertPos); 100981ad6265SDimitry Andric LIS->InsertMachineInstrInMaps(*NewMI); 101081ad6265SDimitry Andric LIS->removeInterval(Reg); 101181ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 101281ad6265SDimitry Andric InsertedMIToOldDef[NewMI] = Def; 101381ad6265SDimitry Andric 101481ad6265SDimitry Andric // Update region boundaries in scheduling region we sinked from since we 101581ad6265SDimitry Andric // may sink an instruction that was at the beginning or end of its region 1016*972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr, 101781ad6265SDimitry Andric /*Removing =*/true); 101881ad6265SDimitry Andric 101981ad6265SDimitry Andric // Update region boundaries in region we sinked to. 1020*972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI); 102181ad6265SDimitry Andric 102281ad6265SDimitry Andric LaneBitmask PrevMask = NewLiveIns[I][Reg]; 102381ad6265SDimitry Andric // FIXME: Also update cached pressure for where the def was sinked from. 102481ad6265SDimitry Andric // Update RP for all regions that has this reg as a live-in and remove 102581ad6265SDimitry Andric // the reg from all regions as a live-in. 102681ad6265SDimitry Andric for (auto Idx : RematDefToLiveInRegions[Def]) { 102781ad6265SDimitry Andric NewLiveIns[Idx].erase(Reg); 1028*972a253aSDimitry Andric if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) { 102981ad6265SDimitry Andric // Def is live-through and not used in this block. 1030*972a253aSDimitry Andric NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI); 103181ad6265SDimitry Andric } else { 103281ad6265SDimitry Andric // Def is used and rematerialized into this block. 103381ad6265SDimitry Andric GCNDownwardRPTracker RPT(*LIS); 103481ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward( 103581ad6265SDimitry Andric NewRegions[Idx].first, NewRegions[Idx].second); 103681ad6265SDimitry Andric RPT.reset(*NonDbgMI, &NewLiveIns[Idx]); 103781ad6265SDimitry Andric RPT.advance(NewRegions[Idx].second); 103881ad6265SDimitry Andric NewPressure[Idx] = RPT.moveMaxPressure(); 103981ad6265SDimitry Andric } 104081ad6265SDimitry Andric } 104181ad6265SDimitry Andric 104281ad6265SDimitry Andric SinkedDefs.push_back(Def); 104381ad6265SDimitry Andric ImproveOccupancy = NewPressure[I].getOccupancy(ST); 1044*972a253aSDimitry Andric if (ImproveOccupancy > DAG.MinOccupancy) 104581ad6265SDimitry Andric break; 104681ad6265SDimitry Andric } 104781ad6265SDimitry Andric 104881ad6265SDimitry Andric // Remove defs we just sinked from all regions' list of sinkable defs 104981ad6265SDimitry Andric for (auto &Def : SinkedDefs) 105081ad6265SDimitry Andric for (auto TrackedIdx : RematDefToLiveInRegions[Def]) 105181ad6265SDimitry Andric RematerializableInsts[TrackedIdx].erase(Def); 105281ad6265SDimitry Andric 1053*972a253aSDimitry Andric if (ImproveOccupancy <= DAG.MinOccupancy) 105481ad6265SDimitry Andric break; 105581ad6265SDimitry Andric 105681ad6265SDimitry Andric NewRescheduleRegions[I] = true; 105781ad6265SDimitry Andric Improved = true; 105881ad6265SDimitry Andric } 105981ad6265SDimitry Andric 106081ad6265SDimitry Andric if (!Improved) { 106181ad6265SDimitry Andric // Occupancy was not improved for all regions that were at MinOccupancy. 106281ad6265SDimitry Andric // Undo sinking and remove newly rematerialized instructions. 106381ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 106481ad6265SDimitry Andric MachineInstr *MI = Entry.first; 106581ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 106681ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 106781ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI); 106881ad6265SDimitry Andric MI->eraseFromParent(); 106981ad6265SDimitry Andric OldMI->clearRegisterDeads(Reg); 107081ad6265SDimitry Andric LIS->removeInterval(Reg); 107181ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 107281ad6265SDimitry Andric } 107381ad6265SDimitry Andric return false; 107481ad6265SDimitry Andric } 107581ad6265SDimitry Andric 107681ad6265SDimitry Andric // Occupancy was improved for all regions. 107781ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 107881ad6265SDimitry Andric MachineInstr *MI = Entry.first; 107981ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 108081ad6265SDimitry Andric 108181ad6265SDimitry Andric // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. 1082*972a253aSDimitry Andric DAG.BBLiveInMap.erase(OldMI); 108381ad6265SDimitry Andric 108481ad6265SDimitry Andric // Remove OldMI and update LIS 108581ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 108681ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*OldMI); 108781ad6265SDimitry Andric OldMI->eraseFromParent(); 108881ad6265SDimitry Andric LIS->removeInterval(Reg); 108981ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 109081ad6265SDimitry Andric } 109181ad6265SDimitry Andric 109281ad6265SDimitry Andric // Update live-ins, register pressure, and regions caches. 109381ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1094*972a253aSDimitry Andric DAG.LiveIns[Idx] = NewLiveIns[Idx]; 1095*972a253aSDimitry Andric DAG.Pressure[Idx] = NewPressure[Idx]; 1096*972a253aSDimitry Andric DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); 109781ad6265SDimitry Andric } 1098*972a253aSDimitry Andric DAG.Regions = NewRegions; 1099*972a253aSDimitry Andric DAG.RescheduleRegions = NewRescheduleRegions; 110081ad6265SDimitry Andric 110181ad6265SDimitry Andric SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1102*972a253aSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 110381ad6265SDimitry Andric 110481ad6265SDimitry Andric return true; 110581ad6265SDimitry Andric } 110681ad6265SDimitry Andric 110781ad6265SDimitry Andric // Copied from MachineLICM 1108*972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) { 1109*972a253aSDimitry Andric if (!DAG.TII->isTriviallyReMaterializable(MI)) 111081ad6265SDimitry Andric return false; 111181ad6265SDimitry Andric 111281ad6265SDimitry Andric for (const MachineOperand &MO : MI.operands()) 111381ad6265SDimitry Andric if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) 111481ad6265SDimitry Andric return false; 111581ad6265SDimitry Andric 111681ad6265SDimitry Andric return true; 111781ad6265SDimitry Andric } 111881ad6265SDimitry Andric 111981ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region. 112081ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front 112181ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only 112281ad6265SDimitry Andric // ever be inserting before an already existing MI. 112381ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries( 112481ad6265SDimitry Andric SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 112581ad6265SDimitry Andric MachineBasicBlock::iterator>> &RegionBoundaries, 112681ad6265SDimitry Andric MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) { 112781ad6265SDimitry Andric unsigned I = 0, E = RegionBoundaries.size(); 112881ad6265SDimitry Andric // Search for first region of the block where MI is located 112981ad6265SDimitry Andric while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) 113081ad6265SDimitry Andric ++I; 113181ad6265SDimitry Andric 113281ad6265SDimitry Andric for (; I != E; ++I) { 113381ad6265SDimitry Andric if (MI->getParent() != RegionBoundaries[I].first->getParent()) 113481ad6265SDimitry Andric return; 113581ad6265SDimitry Andric 113681ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].first && 113781ad6265SDimitry Andric MI == RegionBoundaries[I].second) { 113881ad6265SDimitry Andric // MI is in a region with size 1, after removing, the region will be 113981ad6265SDimitry Andric // size 0, set RegionBegin and RegionEnd to pass end of block iterator. 114081ad6265SDimitry Andric RegionBoundaries[I] = 114181ad6265SDimitry Andric std::make_pair(MI->getParent()->end(), MI->getParent()->end()); 114281ad6265SDimitry Andric return; 114381ad6265SDimitry Andric } 114481ad6265SDimitry Andric if (MI == RegionBoundaries[I].first) { 114581ad6265SDimitry Andric if (Removing) 114681ad6265SDimitry Andric RegionBoundaries[I] = 114781ad6265SDimitry Andric std::make_pair(std::next(MI), RegionBoundaries[I].second); 114881ad6265SDimitry Andric else 114981ad6265SDimitry Andric // Inserted NewMI in front of region, set new RegionBegin to NewMI 115081ad6265SDimitry Andric RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI), 115181ad6265SDimitry Andric RegionBoundaries[I].second); 115281ad6265SDimitry Andric return; 115381ad6265SDimitry Andric } 115481ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].second) { 115581ad6265SDimitry Andric RegionBoundaries[I] = 115681ad6265SDimitry Andric std::make_pair(RegionBoundaries[I].first, std::prev(MI)); 115781ad6265SDimitry Andric return; 115881ad6265SDimitry Andric } 115981ad6265SDimitry Andric } 116081ad6265SDimitry Andric } 1161