1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This contains a MachineSchedStrategy implementation for maximizing wave 11 /// occupancy on GCN hardware. 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNSchedStrategy.h" 15 #include "SIMachineFunctionInfo.h" 16 17 #define DEBUG_TYPE "machine-scheduler" 18 19 using namespace llvm; 20 21 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 22 const MachineSchedContext *C) : 23 GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false), 24 HasExcessPressure(false), MF(nullptr) { } 25 26 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 27 GenericScheduler::initialize(DAG); 28 29 MF = &DAG->MF; 30 31 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 32 33 // FIXME: This is also necessary, because some passes that run after 34 // scheduling and before regalloc increase register pressure. 35 const unsigned ErrorMargin = 3; 36 37 SGPRExcessLimit = 38 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 39 VGPRExcessLimit = 40 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 41 42 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 43 // Set the initial TargetOccupnacy to the maximum occupancy that we can 44 // achieve for this function. This effectively sets a lower bound on the 45 // 'Critical' register limits in the scheduler. 46 TargetOccupancy = MFI.getOccupancy(); 47 SGPRCriticalLimit = 48 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 49 VGPRCriticalLimit = 50 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 51 52 // Subtract error margin from register limits and avoid overflow. 53 SGPRCriticalLimit = 54 std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit); 55 VGPRCriticalLimit = 56 std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit); 57 SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit); 58 VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit); 59 } 60 61 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 62 bool AtTop, const RegPressureTracker &RPTracker, 63 const SIRegisterInfo *SRI, 64 unsigned SGPRPressure, 65 unsigned VGPRPressure) { 66 67 Cand.SU = SU; 68 Cand.AtTop = AtTop; 69 70 // getDownwardPressure() and getUpwardPressure() make temporary changes to 71 // the tracker, so we need to pass those function a non-const copy. 72 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 73 74 Pressure.clear(); 75 MaxPressure.clear(); 76 77 if (AtTop) 78 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 79 else { 80 // FIXME: I think for bottom up scheduling, the register pressure is cached 81 // and can be retrieved by DAG->getPressureDif(SU). 82 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 83 } 84 85 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 86 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 87 88 // If two instructions increase the pressure of different register sets 89 // by the same amount, the generic scheduler will prefer to schedule the 90 // instruction that increases the set with the least amount of registers, 91 // which in our case would be SGPRs. This is rarely what we want, so 92 // when we report excess/critical register pressure, we do it either 93 // only for VGPRs or only for SGPRs. 94 95 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 96 const unsigned MaxVGPRPressureInc = 16; 97 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 98 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 99 100 101 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 102 // to increase the likelihood we don't go over the limits. We should improve 103 // the analysis to look through dependencies to find the path with the least 104 // register pressure. 105 106 // We only need to update the RPDelta for instructions that increase register 107 // pressure. Instructions that decrease or keep reg pressure the same will be 108 // marked as RegExcess in tryCandidate() when they are compared with 109 // instructions that increase the register pressure. 110 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 111 HasExcessPressure = true; 112 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 113 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 114 } 115 116 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 117 HasExcessPressure = true; 118 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 119 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 120 } 121 122 // Register pressure is considered 'CRITICAL' if it is approaching a value 123 // that would reduce the wave occupancy for the execution unit. When 124 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 125 // has the same cost, so we don't need to prefer one over the other. 126 127 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 128 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 129 130 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 131 HasExcessPressure = true; 132 if (SGPRDelta > VGPRDelta) { 133 Cand.RPDelta.CriticalMax = 134 PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 135 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 136 } else { 137 Cand.RPDelta.CriticalMax = 138 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 139 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 140 } 141 } 142 } 143 144 // This function is mostly cut and pasted from 145 // GenericScheduler::pickNodeFromQueue() 146 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 147 const CandPolicy &ZonePolicy, 148 const RegPressureTracker &RPTracker, 149 SchedCandidate &Cand) { 150 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 151 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 152 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 153 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 154 ReadyQueue &Q = Zone.Available; 155 for (SUnit *SU : Q) { 156 157 SchedCandidate TryCand(ZonePolicy); 158 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 159 SGPRPressure, VGPRPressure); 160 // Pass SchedBoundary only when comparing nodes from the same boundary. 161 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 162 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 163 if (TryCand.Reason != NoCand) { 164 // Initialize resource delta if needed in case future heuristics query it. 165 if (TryCand.ResDelta == SchedResourceDelta()) 166 TryCand.initResourceDelta(Zone.DAG, SchedModel); 167 Cand.setBest(TryCand); 168 LLVM_DEBUG(traceCandidate(Cand)); 169 } 170 } 171 } 172 173 // This function is mostly cut and pasted from 174 // GenericScheduler::pickNodeBidirectional() 175 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 176 // Schedule as far as possible in the direction of no choice. This is most 177 // efficient, but also provides the best heuristics for CriticalPSets. 178 if (SUnit *SU = Bot.pickOnlyChoice()) { 179 IsTopNode = false; 180 return SU; 181 } 182 if (SUnit *SU = Top.pickOnlyChoice()) { 183 IsTopNode = true; 184 return SU; 185 } 186 // Set the bottom-up policy based on the state of the current bottom zone and 187 // the instructions outside the zone, including the top zone. 188 CandPolicy BotPolicy; 189 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 190 // Set the top-down policy based on the state of the current top zone and 191 // the instructions outside the zone, including the bottom zone. 192 CandPolicy TopPolicy; 193 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 194 195 // See if BotCand is still valid (because we previously scheduled from Top). 196 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 197 if (!BotCand.isValid() || BotCand.SU->isScheduled || 198 BotCand.Policy != BotPolicy) { 199 BotCand.reset(CandPolicy()); 200 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 201 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 202 } else { 203 LLVM_DEBUG(traceCandidate(BotCand)); 204 #ifndef NDEBUG 205 if (VerifyScheduling) { 206 SchedCandidate TCand; 207 TCand.reset(CandPolicy()); 208 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 209 assert(TCand.SU == BotCand.SU && 210 "Last pick result should correspond to re-picking right now"); 211 } 212 #endif 213 } 214 215 // Check if the top Q has a better candidate. 216 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 217 if (!TopCand.isValid() || TopCand.SU->isScheduled || 218 TopCand.Policy != TopPolicy) { 219 TopCand.reset(CandPolicy()); 220 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 221 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 222 } else { 223 LLVM_DEBUG(traceCandidate(TopCand)); 224 #ifndef NDEBUG 225 if (VerifyScheduling) { 226 SchedCandidate TCand; 227 TCand.reset(CandPolicy()); 228 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 229 assert(TCand.SU == TopCand.SU && 230 "Last pick result should correspond to re-picking right now"); 231 } 232 #endif 233 } 234 235 // Pick best from BotCand and TopCand. 236 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 237 dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 238 SchedCandidate Cand = BotCand; 239 TopCand.Reason = NoCand; 240 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 241 if (TopCand.Reason != NoCand) { 242 Cand.setBest(TopCand); 243 } 244 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 245 246 IsTopNode = Cand.AtTop; 247 return Cand.SU; 248 } 249 250 // This function is mostly cut and pasted from 251 // GenericScheduler::pickNode() 252 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 253 if (DAG->top() == DAG->bottom()) { 254 assert(Top.Available.empty() && Top.Pending.empty() && 255 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 256 return nullptr; 257 } 258 SUnit *SU; 259 do { 260 if (RegionPolicy.OnlyTopDown) { 261 SU = Top.pickOnlyChoice(); 262 if (!SU) { 263 CandPolicy NoPolicy; 264 TopCand.reset(NoPolicy); 265 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 266 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 267 SU = TopCand.SU; 268 } 269 IsTopNode = true; 270 } else if (RegionPolicy.OnlyBottomUp) { 271 SU = Bot.pickOnlyChoice(); 272 if (!SU) { 273 CandPolicy NoPolicy; 274 BotCand.reset(NoPolicy); 275 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 276 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 277 SU = BotCand.SU; 278 } 279 IsTopNode = false; 280 } else { 281 SU = pickNodeBidirectional(IsTopNode); 282 } 283 } while (SU->isScheduled); 284 285 if (SU->isTopReady()) 286 Top.removeReady(SU); 287 if (SU->isBottomReady()) 288 Bot.removeReady(SU); 289 290 if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) { 291 for (SDep &Dep : SU->Preds) { 292 if (Dep.isCluster()) { 293 HasClusteredNodes = true; 294 break; 295 } 296 } 297 } 298 299 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 300 << *SU->getInstr()); 301 return SU; 302 } 303 304 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 305 std::unique_ptr<MachineSchedStrategy> S) : 306 ScheduleDAGMILive(C, std::move(S)), 307 ST(MF.getSubtarget<GCNSubtarget>()), 308 MFI(*MF.getInfo<SIMachineFunctionInfo>()), 309 StartingOccupancy(MFI.getOccupancy()), 310 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) { 311 312 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 313 } 314 315 void GCNScheduleDAGMILive::schedule() { 316 if (Stage == Collect) { 317 // Just record regions at the first pass. 318 Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 319 return; 320 } 321 322 std::vector<MachineInstr*> Unsched; 323 Unsched.reserve(NumRegionInstrs); 324 for (auto &I : *this) { 325 Unsched.push_back(&I); 326 } 327 328 GCNRegPressure PressureBefore; 329 if (LIS) { 330 PressureBefore = Pressure[RegionIdx]; 331 332 LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 333 GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 334 dbgs() << "Region live-in pressure: "; 335 llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 336 dbgs() << "Region register pressure: "; 337 PressureBefore.print(dbgs())); 338 } 339 340 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 341 // Set HasClusteredNodes to true for late stages where we have already 342 // collected it. That way pickNode() will not scan SDep's when not needed. 343 S.HasClusteredNodes = Stage > InitialSchedule; 344 S.HasExcessPressure = false; 345 ScheduleDAGMILive::schedule(); 346 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 347 RescheduleRegions[RegionIdx] = false; 348 if (Stage == InitialSchedule && S.HasClusteredNodes) 349 RegionsWithClusters[RegionIdx] = true; 350 if (S.HasExcessPressure) 351 RegionsWithHighRP[RegionIdx] = true; 352 353 if (!LIS) 354 return; 355 356 // Check the results of scheduling. 357 auto PressureAfter = getRealRegPressure(); 358 359 LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 360 PressureAfter.print(dbgs())); 361 362 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 363 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 364 Pressure[RegionIdx] = PressureAfter; 365 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 366 return; 367 } 368 369 unsigned WavesAfter = 370 std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST)); 371 unsigned WavesBefore = 372 std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST)); 373 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 374 << ", after " << WavesAfter << ".\n"); 375 376 // We may not be able to keep the current target occupancy because of the just 377 // scheduled region. We might still be able to revert scheduling if the 378 // occupancy before was higher, or if the current schedule has register 379 // pressure higher than the excess limits which could lead to more spilling. 380 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 381 // Allow memory bound functions to drop to 4 waves if not limited by an 382 // attribute. 383 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 384 WavesAfter >= MFI.getMinAllowedOccupancy()) { 385 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 386 << MFI.getMinAllowedOccupancy() << " waves\n"); 387 NewOccupancy = WavesAfter; 388 } 389 390 if (NewOccupancy < MinOccupancy) { 391 MinOccupancy = NewOccupancy; 392 MFI.limitOccupancy(MinOccupancy); 393 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 394 << MinOccupancy << ".\n"); 395 } 396 397 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 398 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 399 if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 400 PressureAfter.getAGPRNum() > MaxVGPRs || 401 PressureAfter.getSGPRNum() > MaxSGPRs) { 402 RescheduleRegions[RegionIdx] = true; 403 RegionsWithHighRP[RegionIdx] = true; 404 } 405 406 // If this condition is true, then either the occupancy before and after 407 // scheduling is the same, or we are allowing the occupancy to drop because 408 // the function is memory bound. Even if we are OK with the current occupancy, 409 // we still need to verify that we will not introduce any extra chance of 410 // spilling. 411 if (WavesAfter >= MinOccupancy) { 412 if (Stage == UnclusteredReschedule && 413 !PressureAfter.less(ST, PressureBefore)) { 414 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 415 } else if (WavesAfter > MFI.getMinWavesPerEU() || 416 PressureAfter.less(ST, PressureBefore) || 417 !RescheduleRegions[RegionIdx]) { 418 Pressure[RegionIdx] = PressureAfter; 419 if (!RegionsWithClusters[RegionIdx] && 420 (Stage + 1) == UnclusteredReschedule) 421 RescheduleRegions[RegionIdx] = false; 422 return; 423 } else { 424 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 425 } 426 } 427 428 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 429 RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] || 430 (Stage + 1) != UnclusteredReschedule; 431 RegionEnd = RegionBegin; 432 for (MachineInstr *MI : Unsched) { 433 if (MI->isDebugInstr()) 434 continue; 435 436 if (MI->getIterator() != RegionEnd) { 437 BB->remove(MI); 438 BB->insert(RegionEnd, MI); 439 if (!MI->isDebugInstr()) 440 LIS->handleMove(*MI, true); 441 } 442 // Reset read-undef flags and update them later. 443 for (auto &Op : MI->operands()) 444 if (Op.isReg() && Op.isDef()) 445 Op.setIsUndef(false); 446 RegisterOperands RegOpers; 447 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 448 if (!MI->isDebugInstr()) { 449 if (ShouldTrackLaneMasks) { 450 // Adjust liveness and add missing dead+read-undef flags. 451 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 452 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 453 } else { 454 // Adjust for missing dead-def flags. 455 RegOpers.detectDeadDefs(*MI, *LIS); 456 } 457 } 458 RegionEnd = MI->getIterator(); 459 ++RegionEnd; 460 LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 461 } 462 RegionBegin = Unsched.front()->getIterator(); 463 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 464 465 placeDebugValues(); 466 } 467 468 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 469 GCNDownwardRPTracker RPTracker(*LIS); 470 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 471 return RPTracker.moveMaxPressure(); 472 } 473 474 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 475 GCNDownwardRPTracker RPTracker(*LIS); 476 477 // If the block has the only successor then live-ins of that successor are 478 // live-outs of the current block. We can reuse calculated live set if the 479 // successor will be sent to scheduling past current block. 480 const MachineBasicBlock *OnlySucc = nullptr; 481 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 482 SlotIndexes *Ind = LIS->getSlotIndexes(); 483 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 484 OnlySucc = *MBB->succ_begin(); 485 } 486 487 // Scheduler sends regions from the end of the block upwards. 488 size_t CurRegion = RegionIdx; 489 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 490 if (Regions[CurRegion].first->getParent() != MBB) 491 break; 492 --CurRegion; 493 494 auto I = MBB->begin(); 495 auto LiveInIt = MBBLiveIns.find(MBB); 496 if (LiveInIt != MBBLiveIns.end()) { 497 auto LiveIn = std::move(LiveInIt->second); 498 RPTracker.reset(*MBB->begin(), &LiveIn); 499 MBBLiveIns.erase(LiveInIt); 500 } else { 501 auto &Rgn = Regions[CurRegion]; 502 I = Rgn.first; 503 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 504 auto LRS = BBLiveInMap.lookup(NonDbgMI); 505 #ifdef EXPENSIVE_CHECKS 506 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 507 #endif 508 RPTracker.reset(*I, &LRS); 509 } 510 511 for ( ; ; ) { 512 I = RPTracker.getNext(); 513 514 if (Regions[CurRegion].first == I) { 515 LiveIns[CurRegion] = RPTracker.getLiveRegs(); 516 RPTracker.clearMaxPressure(); 517 } 518 519 if (Regions[CurRegion].second == I) { 520 Pressure[CurRegion] = RPTracker.moveMaxPressure(); 521 if (CurRegion-- == RegionIdx) 522 break; 523 } 524 RPTracker.advanceToNext(); 525 RPTracker.advanceBeforeNext(); 526 } 527 528 if (OnlySucc) { 529 if (I != MBB->end()) { 530 RPTracker.advanceToNext(); 531 RPTracker.advance(MBB->end()); 532 } 533 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 534 RPTracker.advanceBeforeNext(); 535 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 536 } 537 } 538 539 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 540 GCNScheduleDAGMILive::getBBLiveInMap() const { 541 assert(!Regions.empty()); 542 std::vector<MachineInstr *> BBStarters; 543 BBStarters.reserve(Regions.size()); 544 auto I = Regions.rbegin(), E = Regions.rend(); 545 auto *BB = I->first->getParent(); 546 do { 547 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 548 BBStarters.push_back(MI); 549 do { 550 ++I; 551 } while (I != E && I->first->getParent() == BB); 552 } while (I != E); 553 return getLiveRegMap(BBStarters, false /*After*/, *LIS); 554 } 555 556 void GCNScheduleDAGMILive::finalizeSchedule() { 557 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 558 559 LiveIns.resize(Regions.size()); 560 Pressure.resize(Regions.size()); 561 RescheduleRegions.resize(Regions.size()); 562 RegionsWithClusters.resize(Regions.size()); 563 RegionsWithHighRP.resize(Regions.size()); 564 RescheduleRegions.set(); 565 RegionsWithClusters.reset(); 566 RegionsWithHighRP.reset(); 567 568 if (!Regions.empty()) 569 BBLiveInMap = getBBLiveInMap(); 570 571 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 572 573 do { 574 Stage++; 575 RegionIdx = 0; 576 MachineBasicBlock *MBB = nullptr; 577 578 if (Stage > InitialSchedule) { 579 if (!LIS) 580 break; 581 582 // Retry function scheduling if we found resulting occupancy and it is 583 // lower than used for first pass scheduling. This will give more freedom 584 // to schedule low register pressure blocks. 585 // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 586 587 if (Stage == UnclusteredReschedule) { 588 if (RescheduleRegions.none()) 589 continue; 590 LLVM_DEBUG(dbgs() << 591 "Retrying function scheduling without clustering.\n"); 592 } 593 594 if (Stage == ClusteredLowOccupancyReschedule) { 595 if (StartingOccupancy <= MinOccupancy) 596 break; 597 598 LLVM_DEBUG( 599 dbgs() 600 << "Retrying function scheduling with lowest recorded occupancy " 601 << MinOccupancy << ".\n"); 602 } 603 } 604 605 if (Stage == UnclusteredReschedule) 606 SavedMutations.swap(Mutations); 607 608 for (auto Region : Regions) { 609 if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) || 610 (Stage == ClusteredLowOccupancyReschedule && 611 !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) { 612 613 ++RegionIdx; 614 continue; 615 } 616 617 RegionBegin = Region.first; 618 RegionEnd = Region.second; 619 620 if (RegionBegin->getParent() != MBB) { 621 if (MBB) finishBlock(); 622 MBB = RegionBegin->getParent(); 623 startBlock(MBB); 624 if (Stage == InitialSchedule) 625 computeBlockPressure(MBB); 626 } 627 628 unsigned NumRegionInstrs = std::distance(begin(), end()); 629 enterRegion(MBB, begin(), end(), NumRegionInstrs); 630 631 // Skip empty scheduling regions (0 or 1 schedulable instructions). 632 if (begin() == end() || begin() == std::prev(end())) { 633 exitRegion(); 634 continue; 635 } 636 637 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 638 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 639 << MBB->getName() << "\n From: " << *begin() 640 << " To: "; 641 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 642 else dbgs() << "End"; 643 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 644 645 schedule(); 646 647 exitRegion(); 648 ++RegionIdx; 649 } 650 finishBlock(); 651 652 if (Stage == UnclusteredReschedule) 653 SavedMutations.swap(Mutations); 654 } while (Stage != LastStage); 655 } 656