1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This contains a MachineSchedStrategy implementation for maximizing wave 11 /// occupancy on GCN hardware. 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNSchedStrategy.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "SIRegisterInfo.h" 19 #include "Utils/AMDGPUBaseInfo.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/Support/MathExtras.h" 22 23 #define DEBUG_TYPE "machine-scheduler" 24 25 using namespace llvm; 26 27 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 28 const MachineSchedContext *C) : 29 GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } 30 31 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 32 GenericScheduler::initialize(DAG); 33 34 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 35 36 MF = &DAG->MF; 37 38 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 39 40 // FIXME: This is also necessary, because some passes that run after 41 // scheduling and before regalloc increase register pressure. 42 const int ErrorMargin = 3; 43 44 SGPRExcessLimit = Context->RegClassInfo 45 ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; 46 VGPRExcessLimit = Context->RegClassInfo 47 ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; 48 if (TargetOccupancy) { 49 SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); 50 VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); 51 } else { 52 SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 53 AMDGPU::RegisterPressureSets::SReg_32); 54 VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, 55 AMDGPU::RegisterPressureSets::VGPR_32); 56 } 57 58 SGPRCriticalLimit -= ErrorMargin; 59 VGPRCriticalLimit -= ErrorMargin; 60 } 61 62 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 63 bool AtTop, const RegPressureTracker &RPTracker, 64 const SIRegisterInfo *SRI, 65 unsigned SGPRPressure, 66 unsigned VGPRPressure) { 67 68 Cand.SU = SU; 69 Cand.AtTop = AtTop; 70 71 // getDownwardPressure() and getUpwardPressure() make temporary changes to 72 // the tracker, so we need to pass those function a non-const copy. 73 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 74 75 Pressure.clear(); 76 MaxPressure.clear(); 77 78 if (AtTop) 79 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 80 else { 81 // FIXME: I think for bottom up scheduling, the register pressure is cached 82 // and can be retrieved by DAG->getPressureDif(SU). 83 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 84 } 85 86 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 87 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 88 89 // If two instructions increase the pressure of different register sets 90 // by the same amount, the generic scheduler will prefer to schedule the 91 // instruction that increases the set with the least amount of registers, 92 // which in our case would be SGPRs. This is rarely what we want, so 93 // when we report excess/critical register pressure, we do it either 94 // only for VGPRs or only for SGPRs. 95 96 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 97 const unsigned MaxVGPRPressureInc = 16; 98 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 99 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 100 101 102 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 103 // to increase the likelihood we don't go over the limits. We should improve 104 // the analysis to look through dependencies to find the path with the least 105 // register pressure. 106 107 // We only need to update the RPDelta for instructions that increase register 108 // pressure. Instructions that decrease or keep reg pressure the same will be 109 // marked as RegExcess in tryCandidate() when they are compared with 110 // instructions that increase the register pressure. 111 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 112 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 113 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 114 } 115 116 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 117 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 118 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 119 } 120 121 // Register pressure is considered 'CRITICAL' if it is approaching a value 122 // that would reduce the wave occupancy for the execution unit. When 123 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both 124 // has the same cost, so we don't need to prefer one over the other. 125 126 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 127 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 128 129 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 130 if (SGPRDelta > VGPRDelta) { 131 Cand.RPDelta.CriticalMax = 132 PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 133 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 134 } else { 135 Cand.RPDelta.CriticalMax = 136 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 137 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 138 } 139 } 140 } 141 142 // This function is mostly cut and pasted from 143 // GenericScheduler::pickNodeFromQueue() 144 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 145 const CandPolicy &ZonePolicy, 146 const RegPressureTracker &RPTracker, 147 SchedCandidate &Cand) { 148 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 149 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 150 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 151 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 152 ReadyQueue &Q = Zone.Available; 153 for (SUnit *SU : Q) { 154 155 SchedCandidate TryCand(ZonePolicy); 156 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 157 SGPRPressure, VGPRPressure); 158 // Pass SchedBoundary only when comparing nodes from the same boundary. 159 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 160 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 161 if (TryCand.Reason != NoCand) { 162 // Initialize resource delta if needed in case future heuristics query it. 163 if (TryCand.ResDelta == SchedResourceDelta()) 164 TryCand.initResourceDelta(Zone.DAG, SchedModel); 165 Cand.setBest(TryCand); 166 LLVM_DEBUG(traceCandidate(Cand)); 167 } 168 } 169 } 170 171 // This function is mostly cut and pasted from 172 // GenericScheduler::pickNodeBidirectional() 173 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 174 // Schedule as far as possible in the direction of no choice. This is most 175 // efficient, but also provides the best heuristics for CriticalPSets. 176 if (SUnit *SU = Bot.pickOnlyChoice()) { 177 IsTopNode = false; 178 return SU; 179 } 180 if (SUnit *SU = Top.pickOnlyChoice()) { 181 IsTopNode = true; 182 return SU; 183 } 184 // Set the bottom-up policy based on the state of the current bottom zone and 185 // the instructions outside the zone, including the top zone. 186 CandPolicy BotPolicy; 187 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 188 // Set the top-down policy based on the state of the current top zone and 189 // the instructions outside the zone, including the bottom zone. 190 CandPolicy TopPolicy; 191 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 192 193 // See if BotCand is still valid (because we previously scheduled from Top). 194 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 195 if (!BotCand.isValid() || BotCand.SU->isScheduled || 196 BotCand.Policy != BotPolicy) { 197 BotCand.reset(CandPolicy()); 198 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 199 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 200 } else { 201 LLVM_DEBUG(traceCandidate(BotCand)); 202 #ifndef NDEBUG 203 if (VerifyScheduling) { 204 SchedCandidate TCand; 205 TCand.reset(CandPolicy()); 206 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 207 assert(TCand.SU == BotCand.SU && 208 "Last pick result should correspond to re-picking right now"); 209 } 210 #endif 211 } 212 213 // Check if the top Q has a better candidate. 214 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 215 if (!TopCand.isValid() || TopCand.SU->isScheduled || 216 TopCand.Policy != TopPolicy) { 217 TopCand.reset(CandPolicy()); 218 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 219 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 220 } else { 221 LLVM_DEBUG(traceCandidate(TopCand)); 222 #ifndef NDEBUG 223 if (VerifyScheduling) { 224 SchedCandidate TCand; 225 TCand.reset(CandPolicy()); 226 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 227 assert(TCand.SU == TopCand.SU && 228 "Last pick result should correspond to re-picking right now"); 229 } 230 #endif 231 } 232 233 // Pick best from BotCand and TopCand. 234 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 235 dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 236 SchedCandidate Cand = BotCand; 237 TopCand.Reason = NoCand; 238 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 239 if (TopCand.Reason != NoCand) { 240 Cand.setBest(TopCand); 241 } 242 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 243 244 IsTopNode = Cand.AtTop; 245 return Cand.SU; 246 } 247 248 // This function is mostly cut and pasted from 249 // GenericScheduler::pickNode() 250 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 251 if (DAG->top() == DAG->bottom()) { 252 assert(Top.Available.empty() && Top.Pending.empty() && 253 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 254 return nullptr; 255 } 256 SUnit *SU; 257 do { 258 if (RegionPolicy.OnlyTopDown) { 259 SU = Top.pickOnlyChoice(); 260 if (!SU) { 261 CandPolicy NoPolicy; 262 TopCand.reset(NoPolicy); 263 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 264 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 265 SU = TopCand.SU; 266 } 267 IsTopNode = true; 268 } else if (RegionPolicy.OnlyBottomUp) { 269 SU = Bot.pickOnlyChoice(); 270 if (!SU) { 271 CandPolicy NoPolicy; 272 BotCand.reset(NoPolicy); 273 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 274 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 275 SU = BotCand.SU; 276 } 277 IsTopNode = false; 278 } else { 279 SU = pickNodeBidirectional(IsTopNode); 280 } 281 } while (SU->isScheduled); 282 283 if (SU->isTopReady()) 284 Top.removeReady(SU); 285 if (SU->isBottomReady()) 286 Bot.removeReady(SU); 287 288 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 289 << *SU->getInstr()); 290 return SU; 291 } 292 293 GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, 294 std::unique_ptr<MachineSchedStrategy> S) : 295 ScheduleDAGMILive(C, std::move(S)), 296 ST(MF.getSubtarget<GCNSubtarget>()), 297 MFI(*MF.getInfo<SIMachineFunctionInfo>()), 298 StartingOccupancy(MFI.getOccupancy()), 299 MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) { 300 301 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 302 } 303 304 void GCNScheduleDAGMILive::schedule() { 305 if (Stage == Collect) { 306 // Just record regions at the first pass. 307 Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 308 return; 309 } 310 311 std::vector<MachineInstr*> Unsched; 312 Unsched.reserve(NumRegionInstrs); 313 for (auto &I : *this) { 314 Unsched.push_back(&I); 315 } 316 317 GCNRegPressure PressureBefore; 318 if (LIS) { 319 PressureBefore = Pressure[RegionIdx]; 320 321 LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 322 GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); 323 dbgs() << "Region live-in pressure: "; 324 llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); 325 dbgs() << "Region register pressure: "; 326 PressureBefore.print(dbgs())); 327 } 328 329 ScheduleDAGMILive::schedule(); 330 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 331 RescheduleRegions[RegionIdx] = false; 332 333 if (!LIS) 334 return; 335 336 // Check the results of scheduling. 337 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 338 auto PressureAfter = getRealRegPressure(); 339 340 LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 341 PressureAfter.print(dbgs())); 342 343 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 344 PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { 345 Pressure[RegionIdx] = PressureAfter; 346 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 347 return; 348 } 349 unsigned Occ = MFI.getOccupancy(); 350 unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST)); 351 unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST)); 352 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 353 << ", after " << WavesAfter << ".\n"); 354 355 // We could not keep current target occupancy because of the just scheduled 356 // region. Record new occupancy for next scheduling cycle. 357 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 358 // Allow memory bound functions to drop to 4 waves if not limited by an 359 // attribute. 360 if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy && 361 WavesAfter >= MFI.getMinAllowedOccupancy()) { 362 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 363 << MFI.getMinAllowedOccupancy() << " waves\n"); 364 NewOccupancy = WavesAfter; 365 } 366 if (NewOccupancy < MinOccupancy) { 367 MinOccupancy = NewOccupancy; 368 MFI.limitOccupancy(MinOccupancy); 369 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 370 << MinOccupancy << ".\n"); 371 } 372 373 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 374 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 375 if (PressureAfter.getVGPRNum() > MaxVGPRs || 376 PressureAfter.getSGPRNum() > MaxSGPRs) 377 RescheduleRegions[RegionIdx] = true; 378 379 if (WavesAfter >= MinOccupancy) { 380 if (Stage == UnclusteredReschedule && 381 !PressureAfter.less(ST, PressureBefore)) { 382 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 383 } else if (WavesAfter > MFI.getMinWavesPerEU() || 384 PressureAfter.less(ST, PressureBefore) || 385 !RescheduleRegions[RegionIdx]) { 386 Pressure[RegionIdx] = PressureAfter; 387 return; 388 } else { 389 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 390 } 391 } 392 393 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 394 RescheduleRegions[RegionIdx] = true; 395 RegionEnd = RegionBegin; 396 for (MachineInstr *MI : Unsched) { 397 if (MI->isDebugInstr()) 398 continue; 399 400 if (MI->getIterator() != RegionEnd) { 401 BB->remove(MI); 402 BB->insert(RegionEnd, MI); 403 if (!MI->isDebugInstr()) 404 LIS->handleMove(*MI, true); 405 } 406 // Reset read-undef flags and update them later. 407 for (auto &Op : MI->operands()) 408 if (Op.isReg() && Op.isDef()) 409 Op.setIsUndef(false); 410 RegisterOperands RegOpers; 411 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 412 if (!MI->isDebugInstr()) { 413 if (ShouldTrackLaneMasks) { 414 // Adjust liveness and add missing dead+read-undef flags. 415 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 416 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 417 } else { 418 // Adjust for missing dead-def flags. 419 RegOpers.detectDeadDefs(*MI, *LIS); 420 } 421 } 422 RegionEnd = MI->getIterator(); 423 ++RegionEnd; 424 LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 425 } 426 RegionBegin = Unsched.front()->getIterator(); 427 Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); 428 429 placeDebugValues(); 430 } 431 432 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { 433 GCNDownwardRPTracker RPTracker(*LIS); 434 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 435 return RPTracker.moveMaxPressure(); 436 } 437 438 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { 439 GCNDownwardRPTracker RPTracker(*LIS); 440 441 // If the block has the only successor then live-ins of that successor are 442 // live-outs of the current block. We can reuse calculated live set if the 443 // successor will be sent to scheduling past current block. 444 const MachineBasicBlock *OnlySucc = nullptr; 445 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 446 SlotIndexes *Ind = LIS->getSlotIndexes(); 447 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 448 OnlySucc = *MBB->succ_begin(); 449 } 450 451 // Scheduler sends regions from the end of the block upwards. 452 size_t CurRegion = RegionIdx; 453 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 454 if (Regions[CurRegion].first->getParent() != MBB) 455 break; 456 --CurRegion; 457 458 auto I = MBB->begin(); 459 auto LiveInIt = MBBLiveIns.find(MBB); 460 if (LiveInIt != MBBLiveIns.end()) { 461 auto LiveIn = std::move(LiveInIt->second); 462 RPTracker.reset(*MBB->begin(), &LiveIn); 463 MBBLiveIns.erase(LiveInIt); 464 } else { 465 auto &Rgn = Regions[CurRegion]; 466 I = Rgn.first; 467 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 468 auto LRS = BBLiveInMap.lookup(NonDbgMI); 469 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 470 RPTracker.reset(*I, &LRS); 471 } 472 473 for ( ; ; ) { 474 I = RPTracker.getNext(); 475 476 if (Regions[CurRegion].first == I) { 477 LiveIns[CurRegion] = RPTracker.getLiveRegs(); 478 RPTracker.clearMaxPressure(); 479 } 480 481 if (Regions[CurRegion].second == I) { 482 Pressure[CurRegion] = RPTracker.moveMaxPressure(); 483 if (CurRegion-- == RegionIdx) 484 break; 485 } 486 RPTracker.advanceToNext(); 487 RPTracker.advanceBeforeNext(); 488 } 489 490 if (OnlySucc) { 491 if (I != MBB->end()) { 492 RPTracker.advanceToNext(); 493 RPTracker.advance(MBB->end()); 494 } 495 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 496 RPTracker.advanceBeforeNext(); 497 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 498 } 499 } 500 501 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 502 GCNScheduleDAGMILive::getBBLiveInMap() const { 503 assert(!Regions.empty()); 504 std::vector<MachineInstr *> BBStarters; 505 BBStarters.reserve(Regions.size()); 506 auto I = Regions.rbegin(), E = Regions.rend(); 507 auto *BB = I->first->getParent(); 508 do { 509 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 510 BBStarters.push_back(MI); 511 do { 512 ++I; 513 } while (I != E && I->first->getParent() == BB); 514 } while (I != E); 515 return getLiveRegMap(BBStarters, false /*After*/, *LIS); 516 } 517 518 void GCNScheduleDAGMILive::finalizeSchedule() { 519 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; 520 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 521 522 LiveIns.resize(Regions.size()); 523 Pressure.resize(Regions.size()); 524 RescheduleRegions.resize(Regions.size()); 525 RescheduleRegions.set(); 526 527 if (!Regions.empty()) 528 BBLiveInMap = getBBLiveInMap(); 529 530 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 531 532 do { 533 Stage++; 534 RegionIdx = 0; 535 MachineBasicBlock *MBB = nullptr; 536 537 if (Stage > InitialSchedule) { 538 if (!LIS) 539 break; 540 541 // Retry function scheduling if we found resulting occupancy and it is 542 // lower than used for first pass scheduling. This will give more freedom 543 // to schedule low register pressure blocks. 544 // Code is partially copied from MachineSchedulerBase::scheduleRegions(). 545 546 if (Stage == UnclusteredReschedule) { 547 if (RescheduleRegions.none()) 548 continue; 549 LLVM_DEBUG(dbgs() << 550 "Retrying function scheduling without clustering.\n"); 551 } 552 553 if (Stage == ClusteredLowOccupancyReschedule) { 554 if (StartingOccupancy <= MinOccupancy) 555 break; 556 557 LLVM_DEBUG( 558 dbgs() 559 << "Retrying function scheduling with lowest recorded occupancy " 560 << MinOccupancy << ".\n"); 561 562 S.setTargetOccupancy(MinOccupancy); 563 } 564 } 565 566 if (Stage == UnclusteredReschedule) 567 SavedMutations.swap(Mutations); 568 569 for (auto Region : Regions) { 570 if (Stage == UnclusteredReschedule && !RescheduleRegions[RegionIdx]) 571 continue; 572 573 RegionBegin = Region.first; 574 RegionEnd = Region.second; 575 576 if (RegionBegin->getParent() != MBB) { 577 if (MBB) finishBlock(); 578 MBB = RegionBegin->getParent(); 579 startBlock(MBB); 580 if (Stage == InitialSchedule) 581 computeBlockPressure(MBB); 582 } 583 584 unsigned NumRegionInstrs = std::distance(begin(), end()); 585 enterRegion(MBB, begin(), end(), NumRegionInstrs); 586 587 // Skip empty scheduling regions (0 or 1 schedulable instructions). 588 if (begin() == end() || begin() == std::prev(end())) { 589 exitRegion(); 590 continue; 591 } 592 593 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 594 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " 595 << MBB->getName() << "\n From: " << *begin() 596 << " To: "; 597 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 598 else dbgs() << "End"; 599 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 600 601 schedule(); 602 603 exitRegion(); 604 ++RegionIdx; 605 } 606 finishBlock(); 607 608 if (Stage == UnclusteredReschedule) 609 SavedMutations.swap(Mutations); 610 } while (Stage != LastStage); 611 } 612