1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This contains a MachineSchedStrategy implementation for maximizing wave 11 /// occupancy on GCN hardware. 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNSchedStrategy.h" 15 #include "SIMachineFunctionInfo.h" 16 17 #define DEBUG_TYPE "machine-scheduler" 18 19 using namespace llvm; 20 21 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 22 const MachineSchedContext *C) : 23 GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false), 24 HasExcessPressure(false), MF(nullptr) { } 25 26 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 27 GenericScheduler::initialize(DAG); 28 29 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 30 31 MF = &DAG->MF; 32 33 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 34 35 // FIXME: This is also necessary, because some passes that run after 36 // scheduling and before regalloc increase register pressure. 37 const int ErrorMargin = 3; 38 39 SGPRExcessLimit = Context->RegClassInfo 40 ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; 41 VGPRExcessLimit = Context->RegClassInfo 42 ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; 43 if (TargetOccupancy) { 44 SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); 45 VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); 46 } else { 47 SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 48 AMDGPU::RegisterPressureSets::SReg_32); 49 VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 50 AMDGPU::RegisterPressureSets::VGPR_32); 51 } 52 53 SGPRCriticalLimit -= ErrorMargin; 54 VGPRCriticalLimit -= ErrorMargin; 55 } 56 57 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 58 bool AtTop, const RegPressureTracker &RPTracker, 59 const SIRegisterInfo *SRI, 60 unsigned SGPRPressure, 61 unsigned VGPRPressure) { 62 63 Cand.SU = SU; 64 Cand.AtTop = AtTop; 65 66 // getDownwardPressure() and getUpwardPressure() make temporary changes to 67 // the tracker, so we need to pass those function a non-const copy. 68 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 69 70 Pressure.clear(); 71 MaxPressure.clear(); 72 73 if (AtTop) 74 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 75 else { 76 // FIXME: I think for bottom up scheduling, the register pressure is cached 77 // and can be retrieved by DAG->getPressureDif(SU). 78 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 79 } 80 81 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 82 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 83 84 // If two instructions increase the pressure of different register sets 85 // by the same amount, the generic scheduler will prefer to schedule the 86 // instruction that increases the set with the least amount of registers, 87 // which in our case would be SGPRs. This is rarely what we want, so 88 // when we report excess/critical register pressure, we do it either 89 // only for VGPRs or only for SGPRs. 90 91 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 92 const unsigned MaxVGPRPressureInc = 16; 93 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 94 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 95 96 97 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 98 // to increase the likelihood we don't go over the limits. We should improve 99 // the analysis to look through dependencies to find the path with the least 100 // register pressure. 101 102 // We only need to update the RPDelta for instructions that increase register 103 // pressure. Instructions that decrease or keep reg pressure the same will be 104 // marked as RegExcess in tryCandidate() when they are compared with 105 // instructions that increase the register pressure. 106 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 107 HasExcessPressure = true; 108 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 109 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 110 } 111 112 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 113 HasExcessPressure = true; 114 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 115 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 116 } 117 118 // Register pressure is considered 'CRITICAL' if it is approaching a value 119 // that would reduce the wave occupancy for the execution unit. When 120 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 121 // has the same cost, so we don't need to prefer one over the other. 122 123 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 124 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 125 126 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 127 HasExcessPressure = true; 128 if (SGPRDelta > VGPRDelta) { 129 Cand.RPDelta.CriticalMax = 130 PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 131 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 132 } else { 133 Cand.RPDelta.CriticalMax = 134 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 135 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 136 } 137 } 138 } 139 140 // This function is mostly cut and pasted from 141 // GenericScheduler::pickNodeFromQueue() 142 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 143 const CandPolicy &ZonePolicy, 144 const RegPressureTracker &RPTracker, 145 SchedCandidate &Cand) { 146 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 147 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 148 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 149 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 150 ReadyQueue &Q = Zone.Available; 151 for (SUnit *SU : Q) { 152 153 SchedCandidate TryCand(ZonePolicy); 154 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 155 SGPRPressure, VGPRPressure); 156 // Pass SchedBoundary only when comparing nodes from the same boundary. 157 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 158 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 159 if (TryCand.Reason != NoCand) { 160 // Initialize resource delta if needed in case future heuristics query it. 161 if (TryCand.ResDelta == SchedResourceDelta()) 162 TryCand.initResourceDelta(Zone.DAG, SchedModel); 163 Cand.setBest(TryCand); 164 LLVM_DEBUG(traceCandidate(Cand)); 165 } 166 } 167 } 168 169 // This function is mostly cut and pasted from 170 // GenericScheduler::pickNodeBidirectional() 171 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 172 // Schedule as far as possible in the direction of no choice. This is most 173 // efficient, but also provides the best heuristics for CriticalPSets. 174 if (SUnit *SU = Bot.pickOnlyChoice()) { 175 IsTopNode = false; 176 return SU; 177 } 178 if (SUnit *SU = Top.pickOnlyChoice()) { 179 IsTopNode = true; 180 return SU; 181 } 182 // Set the bottom-up policy based on the state of the current bottom zone and 183 // the instructions outside the zone, including the top zone. 184 CandPolicy BotPolicy; 185 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 186 // Set the top-down policy based on the state of the current top zone and 187 // the instructions outside the zone, including the bottom zone. 188 CandPolicy TopPolicy; 189 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 190 191 // See if BotCand is still valid (because we previously scheduled from Top). 192 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 193 if (!BotCand.isValid() || BotCand.SU->isScheduled || 194 BotCand.Policy != BotPolicy) { 195 BotCand.reset(CandPolicy()); 196 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 197 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 198 } else { 199 LLVM_DEBUG(traceCandidate(BotCand)); 200 #ifndef NDEBUG 201 if (VerifyScheduling) { 202 SchedCandidate TCand; 203 TCand.reset(CandPolicy()); 204 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 205 assert(TCand.SU == BotCand.SU && 206 "Last pick result should correspond to re-picking right now"); 207 } 208 #endif 209 } 210 211 // Check if the top Q has a better candidate. 212 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 213 if (!TopCand.isValid() || TopCand.SU->isScheduled || 214 TopCand.Policy != TopPolicy) { 215 TopCand.reset(CandPolicy()); 216 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 217 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 218 } else { 219 LLVM_DEBUG(traceCandidate(TopCand)); 220 #ifndef NDEBUG 221 if (VerifyScheduling) { 222 SchedCandidate TCand; 223 TCand.reset(CandPolicy()); 224 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 225 assert(TCand.SU == TopCand.SU && 226 "Last pick result should correspond to re-picking right now"); 227 } 228 #endif 229 } 230 231 // Pick best from BotCand and TopCand. 232 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 233 dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 234 SchedCandidate Cand = BotCand; 235 TopCand.Reason = NoCand; 236 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 237 if (TopCand.Reason != NoCand) { 238 Cand.setBest(TopCand); 239 } 240 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 241 242 IsTopNode = Cand.AtTop; 243 return Cand.SU; 244 } 245 246 // This function is mostly cut and pasted from 247 // GenericScheduler::pickNode() 248 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 249 if (DAG->top() == DAG->bottom()) { 250 assert(Top.Available.empty() && Top.Pending.empty() && 251 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 252 return nullptr; 253 } 254 SUnit *SU; 255 do { 256 if (RegionPolicy.OnlyTopDown) { 257 SU = Top.pickOnlyChoice(); 258 if (!SU) { 259 CandPolicy NoPolicy; 260 TopCand.reset(NoPolicy); 261 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 262 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 263 SU = TopCand.SU; 264 } 265 IsTopNode = true; 266 } else if (RegionPolicy.OnlyBottomUp) { 267 SU = Bot.pickOnlyChoice(); 268 if (!SU) { 269 CandPolicy NoPolicy; 270 BotCand.reset(NoPolicy); 271 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 272 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 273 SU = BotCand.SU; 274 } 275 IsTopNode = false; 276 } else { 277 SU = pickNodeBidirectional(IsTopNode); 278 } 279 } while (SU->isScheduled); 280 281 if (SU->isTopReady()) 282 Top.removeReady(SU); 283 if (SU->isBottomReady()) 284 Bot.removeReady(SU); 285 286 if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) { 287 for (SDep &Dep : SU->Preds) { 288 if (Dep.isCluster()) { 289 HasClusteredNodes = true; 290 break; 291 } 292 } 293 } 294 295 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 296 << *SU->getInstr()); 297 return SU; 298 } 299 300 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 301 std::unique_ptr<MachineSchedStrategy> S) : 302 ScheduleDAGMILive(C, std::move(S)), 303 ST(MF.getSubtarget<GCNSubtarget>()), 304 MFI(*MF.getInfo<SIMachineFunctionInfo>()), 305 StartingOccupancy(MFI.getOccupancy()), 306 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) { 307 308 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 309 } 310 311 void GCNScheduleDAGMILive::schedule() { 312 if (Stage == Collect) { 313 // Just record regions at the first pass. 314 Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 315 return; 316 } 317 318 std::vector<MachineInstr*> Unsched; 319 Unsched.reserve(NumRegionInstrs); 320 for (auto &I : *this) { 321 Unsched.push_back(&I); 322 } 323 324 GCNRegPressure PressureBefore; 325 if (LIS) { 326 PressureBefore = Pressure[RegionIdx]; 327 328 LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 329 GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 330 dbgs() << "Region live-in pressure: "; 331 llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 332 dbgs() << "Region register pressure: "; 333 PressureBefore.print(dbgs())); 334 } 335 336 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 337 // Set HasClusteredNodes to true for late stages where we have already 338 // collected it. That way pickNode() will not scan SDep's when not needed. 339 S.HasClusteredNodes = Stage > InitialSchedule; 340 S.HasExcessPressure = false; 341 ScheduleDAGMILive::schedule(); 342 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 343 RescheduleRegions[RegionIdx] = false; 344 if (Stage == InitialSchedule && S.HasClusteredNodes) 345 RegionsWithClusters[RegionIdx] = true; 346 if (S.HasExcessPressure) 347 RegionsWithHighRP[RegionIdx] = true; 348 349 if (!LIS) 350 return; 351 352 // Check the results of scheduling. 353 auto PressureAfter = getRealRegPressure(); 354 355 LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 356 PressureAfter.print(dbgs())); 357 358 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 359 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 360 Pressure[RegionIdx] = PressureAfter; 361 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 362 return; 363 } 364 unsigned Occ = MFI.getOccupancy(); 365 unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST)); 366 unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST)); 367 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 368 << ", after " << WavesAfter << ".\n"); 369 370 // We could not keep current target occupancy because of the just scheduled 371 // region. Record new occupancy for next scheduling cycle. 372 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 373 // Allow memory bound functions to drop to 4 waves if not limited by an 374 // attribute. 375 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 376 WavesAfter >= MFI.getMinAllowedOccupancy()) { 377 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 378 << MFI.getMinAllowedOccupancy() << " waves\n"); 379 NewOccupancy = WavesAfter; 380 } 381 if (NewOccupancy < MinOccupancy) { 382 MinOccupancy = NewOccupancy; 383 MFI.limitOccupancy(MinOccupancy); 384 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 385 << MinOccupancy << ".\n"); 386 } 387 388 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 389 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 390 if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 391 PressureAfter.getAGPRNum() > MaxVGPRs || 392 PressureAfter.getSGPRNum() > MaxSGPRs) { 393 RescheduleRegions[RegionIdx] = true; 394 RegionsWithHighRP[RegionIdx] = true; 395 } 396 397 if (WavesAfter >= MinOccupancy) { 398 if (Stage == UnclusteredReschedule && 399 !PressureAfter.less(ST, PressureBefore)) { 400 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 401 } else if (WavesAfter > MFI.getMinWavesPerEU() || 402 PressureAfter.less(ST, PressureBefore) || 403 !RescheduleRegions[RegionIdx]) { 404 Pressure[RegionIdx] = PressureAfter; 405 if (!RegionsWithClusters[RegionIdx] && 406 (Stage + 1) == UnclusteredReschedule) 407 RescheduleRegions[RegionIdx] = false; 408 return; 409 } else { 410 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 411 } 412 } 413 414 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 415 RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] || 416 (Stage + 1) != UnclusteredReschedule; 417 RegionEnd = RegionBegin; 418 for (MachineInstr *MI : Unsched) { 419 if (MI->isDebugInstr()) 420 continue; 421 422 if (MI->getIterator() != RegionEnd) { 423 BB->remove(MI); 424 BB->insert(RegionEnd, MI); 425 if (!MI->isDebugInstr()) 426 LIS->handleMove(*MI, true); 427 } 428 // Reset read-undef flags and update them later. 429 for (auto &Op : MI->operands()) 430 if (Op.isReg() && Op.isDef()) 431 Op.setIsUndef(false); 432 RegisterOperands RegOpers; 433 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 434 if (!MI->isDebugInstr()) { 435 if (ShouldTrackLaneMasks) { 436 // Adjust liveness and add missing dead+read-undef flags. 437 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 438 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 439 } else { 440 // Adjust for missing dead-def flags. 441 RegOpers.detectDeadDefs(*MI, *LIS); 442 } 443 } 444 RegionEnd = MI->getIterator(); 445 ++RegionEnd; 446 LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 447 } 448 RegionBegin = Unsched.front()->getIterator(); 449 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 450 451 placeDebugValues(); 452 } 453 454 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 455 GCNDownwardRPTracker RPTracker(*LIS); 456 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 457 return RPTracker.moveMaxPressure(); 458 } 459 460 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 461 GCNDownwardRPTracker RPTracker(*LIS); 462 463 // If the block has the only successor then live-ins of that successor are 464 // live-outs of the current block. We can reuse calculated live set if the 465 // successor will be sent to scheduling past current block. 466 const MachineBasicBlock *OnlySucc = nullptr; 467 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 468 SlotIndexes *Ind = LIS->getSlotIndexes(); 469 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 470 OnlySucc = *MBB->succ_begin(); 471 } 472 473 // Scheduler sends regions from the end of the block upwards. 474 size_t CurRegion = RegionIdx; 475 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 476 if (Regions[CurRegion].first->getParent() != MBB) 477 break; 478 --CurRegion; 479 480 auto I = MBB->begin(); 481 auto LiveInIt = MBBLiveIns.find(MBB); 482 if (LiveInIt != MBBLiveIns.end()) { 483 auto LiveIn = std::move(LiveInIt->second); 484 RPTracker.reset(*MBB->begin(), &LiveIn); 485 MBBLiveIns.erase(LiveInIt); 486 } else { 487 auto &Rgn = Regions[CurRegion]; 488 I = Rgn.first; 489 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 490 auto LRS = BBLiveInMap.lookup(NonDbgMI); 491 #ifdef EXPENSIVE_CHECKS 492 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 493 #endif 494 RPTracker.reset(*I, &LRS); 495 } 496 497 for ( ; ; ) { 498 I = RPTracker.getNext(); 499 500 if (Regions[CurRegion].first == I) { 501 LiveIns[CurRegion] = RPTracker.getLiveRegs(); 502 RPTracker.clearMaxPressure(); 503 } 504 505 if (Regions[CurRegion].second == I) { 506 Pressure[CurRegion] = RPTracker.moveMaxPressure(); 507 if (CurRegion-- == RegionIdx) 508 break; 509 } 510 RPTracker.advanceToNext(); 511 RPTracker.advanceBeforeNext(); 512 } 513 514 if (OnlySucc) { 515 if (I != MBB->end()) { 516 RPTracker.advanceToNext(); 517 RPTracker.advance(MBB->end()); 518 } 519 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 520 RPTracker.advanceBeforeNext(); 521 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 522 } 523 } 524 525 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 526 GCNScheduleDAGMILive::getBBLiveInMap() const { 527 assert(!Regions.empty()); 528 std::vector<MachineInstr *> BBStarters; 529 BBStarters.reserve(Regions.size()); 530 auto I = Regions.rbegin(), E = Regions.rend(); 531 auto *BB = I->first->getParent(); 532 do { 533 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 534 BBStarters.push_back(MI); 535 do { 536 ++I; 537 } while (I != E && I->first->getParent() == BB); 538 } while (I != E); 539 return getLiveRegMap(BBStarters, false /*After*/, *LIS); 540 } 541 542 void GCNScheduleDAGMILive::finalizeSchedule() { 543 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 544 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 545 546 LiveIns.resize(Regions.size()); 547 Pressure.resize(Regions.size()); 548 RescheduleRegions.resize(Regions.size()); 549 RegionsWithClusters.resize(Regions.size()); 550 RegionsWithHighRP.resize(Regions.size()); 551 RescheduleRegions.set(); 552 RegionsWithClusters.reset(); 553 RegionsWithHighRP.reset(); 554 555 if (!Regions.empty()) 556 BBLiveInMap = getBBLiveInMap(); 557 558 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 559 560 do { 561 Stage++; 562 RegionIdx = 0; 563 MachineBasicBlock *MBB = nullptr; 564 565 if (Stage > InitialSchedule) { 566 if (!LIS) 567 break; 568 569 // Retry function scheduling if we found resulting occupancy and it is 570 // lower than used for first pass scheduling. This will give more freedom 571 // to schedule low register pressure blocks. 572 // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 573 574 if (Stage == UnclusteredReschedule) { 575 if (RescheduleRegions.none()) 576 continue; 577 LLVM_DEBUG(dbgs() << 578 "Retrying function scheduling without clustering.\n"); 579 } 580 581 if (Stage == ClusteredLowOccupancyReschedule) { 582 if (StartingOccupancy <= MinOccupancy) 583 break; 584 585 LLVM_DEBUG( 586 dbgs() 587 << "Retrying function scheduling with lowest recorded occupancy " 588 << MinOccupancy << ".\n"); 589 590 S.setTargetOccupancy(MinOccupancy); 591 } 592 } 593 594 if (Stage == UnclusteredReschedule) 595 SavedMutations.swap(Mutations); 596 597 for (auto Region : Regions) { 598 if ((Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) || 599 (Stage == ClusteredLowOccupancyReschedule && 600 !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) { 601 602 ++RegionIdx; 603 continue; 604 } 605 606 RegionBegin = Region.first; 607 RegionEnd = Region.second; 608 609 if (RegionBegin->getParent() != MBB) { 610 if (MBB) finishBlock(); 611 MBB = RegionBegin->getParent(); 612 startBlock(MBB); 613 if (Stage == InitialSchedule) 614 computeBlockPressure(MBB); 615 } 616 617 unsigned NumRegionInstrs = std::distance(begin(), end()); 618 enterRegion(MBB, begin(), end(), NumRegionInstrs); 619 620 // Skip empty scheduling regions (0 or 1 schedulable instructions). 621 if (begin() == end() || begin() == std::prev(end())) { 622 exitRegion(); 623 continue; 624 } 625 626 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 627 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 628 << MBB->getName() << "\n From: " << *begin() 629 << " To: "; 630 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 631 else dbgs() << "End"; 632 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 633 634 schedule(); 635 636 exitRegion(); 637 ++RegionIdx; 638 } 639 finishBlock(); 640 641 if (Stage == UnclusteredReschedule) 642 SavedMutations.swap(Mutations); 643 } while (Stage != LastStage); 644 } 645