1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This contains a MachineSchedStrategy implementation for maximizing wave 11 /// occupancy on GCN hardware. 12 /// 13 /// This pass will apply multiple scheduling stages to the same function. 14 /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual 15 /// entry point for the scheduling of those regions is 16 /// GCNScheduleDAGMILive::runSchedStages. 17 18 /// Generally, the reason for having multiple scheduling stages is to account 19 /// for the kernel-wide effect of register usage on occupancy. Usually, only a 20 /// few scheduling regions will have register pressure high enough to limit 21 /// occupancy for the kernel, so constraints can be relaxed to improve ILP in 22 /// other regions. 23 /// 24 //===----------------------------------------------------------------------===// 25 26 #include "GCNSchedStrategy.h" 27 #include "SIMachineFunctionInfo.h" 28 #include "llvm/CodeGen/RegisterClassInfo.h" 29 30 #define DEBUG_TYPE "machine-scheduler" 31 32 using namespace llvm; 33 34 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 35 const MachineSchedContext *C) 36 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), 37 HasClusteredNodes(false), HasExcessPressure(false) {} 38 39 void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { 40 GenericScheduler::initialize(DAG); 41 42 MF = &DAG->MF; 43 44 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 45 46 // FIXME: This is also necessary, because some passes that run after 47 // scheduling and before regalloc increase register pressure. 48 const unsigned ErrorMargin = 3; 49 50 SGPRExcessLimit = 51 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 52 VGPRExcessLimit = 53 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 54 55 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 56 // Set the initial TargetOccupnacy to the maximum occupancy that we can 57 // achieve for this function. This effectively sets a lower bound on the 58 // 'Critical' register limits in the scheduler. 59 TargetOccupancy = MFI.getOccupancy(); 60 SGPRCriticalLimit = 61 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 62 VGPRCriticalLimit = 63 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 64 65 // Subtract error margin from register limits and avoid overflow. 66 SGPRCriticalLimit = 67 std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit); 68 VGPRCriticalLimit = 69 std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit); 70 SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit); 71 VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit); 72 } 73 74 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 75 bool AtTop, const RegPressureTracker &RPTracker, 76 const SIRegisterInfo *SRI, 77 unsigned SGPRPressure, 78 unsigned VGPRPressure) { 79 80 Cand.SU = SU; 81 Cand.AtTop = AtTop; 82 83 // getDownwardPressure() and getUpwardPressure() make temporary changes to 84 // the tracker, so we need to pass those function a non-const copy. 85 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 86 87 Pressure.clear(); 88 MaxPressure.clear(); 89 90 if (AtTop) 91 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 92 else { 93 // FIXME: I think for bottom up scheduling, the register pressure is cached 94 // and can be retrieved by DAG->getPressureDif(SU). 95 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 96 } 97 98 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 99 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 100 101 // If two instructions increase the pressure of different register sets 102 // by the same amount, the generic scheduler will prefer to schedule the 103 // instruction that increases the set with the least amount of registers, 104 // which in our case would be SGPRs. This is rarely what we want, so 105 // when we report excess/critical register pressure, we do it either 106 // only for VGPRs or only for SGPRs. 107 108 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 109 const unsigned MaxVGPRPressureInc = 16; 110 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 111 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 112 113 114 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 115 // to increase the likelihood we don't go over the limits. We should improve 116 // the analysis to look through dependencies to find the path with the least 117 // register pressure. 118 119 // We only need to update the RPDelta for instructions that increase register 120 // pressure. Instructions that decrease or keep reg pressure the same will be 121 // marked as RegExcess in tryCandidate() when they are compared with 122 // instructions that increase the register pressure. 123 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 124 HasExcessPressure = true; 125 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 126 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 127 } 128 129 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 130 HasExcessPressure = true; 131 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 132 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 133 } 134 135 // Register pressure is considered 'CRITICAL' if it is approaching a value 136 // that would reduce the wave occupancy for the execution unit. When 137 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 138 // has the same cost, so we don't need to prefer one over the other. 139 140 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 141 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 142 143 if (SGPRDelta >= 0 || VGPRDelta >= 0) { 144 HasExcessPressure = true; 145 if (SGPRDelta > VGPRDelta) { 146 Cand.RPDelta.CriticalMax = 147 PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 148 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 149 } else { 150 Cand.RPDelta.CriticalMax = 151 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 152 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 153 } 154 } 155 } 156 157 // This function is mostly cut and pasted from 158 // GenericScheduler::pickNodeFromQueue() 159 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 160 const CandPolicy &ZonePolicy, 161 const RegPressureTracker &RPTracker, 162 SchedCandidate &Cand) { 163 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 164 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 165 unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 166 unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 167 ReadyQueue &Q = Zone.Available; 168 for (SUnit *SU : Q) { 169 170 SchedCandidate TryCand(ZonePolicy); 171 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, 172 SGPRPressure, VGPRPressure); 173 // Pass SchedBoundary only when comparing nodes from the same boundary. 174 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 175 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); 176 if (TryCand.Reason != NoCand) { 177 // Initialize resource delta if needed in case future heuristics query it. 178 if (TryCand.ResDelta == SchedResourceDelta()) 179 TryCand.initResourceDelta(Zone.DAG, SchedModel); 180 Cand.setBest(TryCand); 181 LLVM_DEBUG(traceCandidate(Cand)); 182 } 183 } 184 } 185 186 // This function is mostly cut and pasted from 187 // GenericScheduler::pickNodeBidirectional() 188 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 189 // Schedule as far as possible in the direction of no choice. This is most 190 // efficient, but also provides the best heuristics for CriticalPSets. 191 if (SUnit *SU = Bot.pickOnlyChoice()) { 192 IsTopNode = false; 193 return SU; 194 } 195 if (SUnit *SU = Top.pickOnlyChoice()) { 196 IsTopNode = true; 197 return SU; 198 } 199 // Set the bottom-up policy based on the state of the current bottom zone and 200 // the instructions outside the zone, including the top zone. 201 CandPolicy BotPolicy; 202 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 203 // Set the top-down policy based on the state of the current top zone and 204 // the instructions outside the zone, including the bottom zone. 205 CandPolicy TopPolicy; 206 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 207 208 // See if BotCand is still valid (because we previously scheduled from Top). 209 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 210 if (!BotCand.isValid() || BotCand.SU->isScheduled || 211 BotCand.Policy != BotPolicy) { 212 BotCand.reset(CandPolicy()); 213 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 214 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 215 } else { 216 LLVM_DEBUG(traceCandidate(BotCand)); 217 #ifndef NDEBUG 218 if (VerifyScheduling) { 219 SchedCandidate TCand; 220 TCand.reset(CandPolicy()); 221 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 222 assert(TCand.SU == BotCand.SU && 223 "Last pick result should correspond to re-picking right now"); 224 } 225 #endif 226 } 227 228 // Check if the top Q has a better candidate. 229 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 230 if (!TopCand.isValid() || TopCand.SU->isScheduled || 231 TopCand.Policy != TopPolicy) { 232 TopCand.reset(CandPolicy()); 233 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 234 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 235 } else { 236 LLVM_DEBUG(traceCandidate(TopCand)); 237 #ifndef NDEBUG 238 if (VerifyScheduling) { 239 SchedCandidate TCand; 240 TCand.reset(CandPolicy()); 241 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 242 assert(TCand.SU == TopCand.SU && 243 "Last pick result should correspond to re-picking right now"); 244 } 245 #endif 246 } 247 248 // Pick best from BotCand and TopCand. 249 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 250 dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 251 SchedCandidate Cand = BotCand; 252 TopCand.Reason = NoCand; 253 GenericScheduler::tryCandidate(Cand, TopCand, nullptr); 254 if (TopCand.Reason != NoCand) { 255 Cand.setBest(TopCand); 256 } 257 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 258 259 IsTopNode = Cand.AtTop; 260 return Cand.SU; 261 } 262 263 // This function is mostly cut and pasted from 264 // GenericScheduler::pickNode() 265 SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { 266 if (DAG->top() == DAG->bottom()) { 267 assert(Top.Available.empty() && Top.Pending.empty() && 268 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 269 return nullptr; 270 } 271 SUnit *SU; 272 do { 273 if (RegionPolicy.OnlyTopDown) { 274 SU = Top.pickOnlyChoice(); 275 if (!SU) { 276 CandPolicy NoPolicy; 277 TopCand.reset(NoPolicy); 278 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 279 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 280 SU = TopCand.SU; 281 } 282 IsTopNode = true; 283 } else if (RegionPolicy.OnlyBottomUp) { 284 SU = Bot.pickOnlyChoice(); 285 if (!SU) { 286 CandPolicy NoPolicy; 287 BotCand.reset(NoPolicy); 288 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 289 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 290 SU = BotCand.SU; 291 } 292 IsTopNode = false; 293 } else { 294 SU = pickNodeBidirectional(IsTopNode); 295 } 296 } while (SU->isScheduled); 297 298 if (SU->isTopReady()) 299 Top.removeReady(SU); 300 if (SU->isBottomReady()) 301 Bot.removeReady(SU); 302 303 if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) { 304 for (SDep &Dep : SU->Preds) { 305 if (Dep.isCluster()) { 306 HasClusteredNodes = true; 307 break; 308 } 309 } 310 } 311 312 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 313 << *SU->getInstr()); 314 return SU; 315 } 316 317 GCNScheduleDAGMILive::GCNScheduleDAGMILive( 318 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) 319 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()), 320 MFI(*MF.getInfo<SIMachineFunctionInfo>()), 321 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) { 322 323 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 324 } 325 326 void GCNScheduleDAGMILive::schedule() { 327 // Collect all scheduling regions. The actual scheduling is performed in 328 // GCNScheduleDAGMILive::finalizeSchedule. 329 Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); 330 } 331 332 GCNRegPressure 333 GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { 334 GCNDownwardRPTracker RPTracker(*LIS); 335 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 336 return RPTracker.moveMaxPressure(); 337 } 338 339 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, 340 const MachineBasicBlock *MBB) { 341 GCNDownwardRPTracker RPTracker(*LIS); 342 343 // If the block has the only successor then live-ins of that successor are 344 // live-outs of the current block. We can reuse calculated live set if the 345 // successor will be sent to scheduling past current block. 346 const MachineBasicBlock *OnlySucc = nullptr; 347 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { 348 SlotIndexes *Ind = LIS->getSlotIndexes(); 349 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) 350 OnlySucc = *MBB->succ_begin(); 351 } 352 353 // Scheduler sends regions from the end of the block upwards. 354 size_t CurRegion = RegionIdx; 355 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 356 if (Regions[CurRegion].first->getParent() != MBB) 357 break; 358 --CurRegion; 359 360 auto I = MBB->begin(); 361 auto LiveInIt = MBBLiveIns.find(MBB); 362 auto &Rgn = Regions[CurRegion]; 363 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 364 if (LiveInIt != MBBLiveIns.end()) { 365 auto LiveIn = std::move(LiveInIt->second); 366 RPTracker.reset(*MBB->begin(), &LiveIn); 367 MBBLiveIns.erase(LiveInIt); 368 } else { 369 I = Rgn.first; 370 auto LRS = BBLiveInMap.lookup(NonDbgMI); 371 #ifdef EXPENSIVE_CHECKS 372 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 373 #endif 374 RPTracker.reset(*I, &LRS); 375 } 376 377 for (;;) { 378 I = RPTracker.getNext(); 379 380 if (Regions[CurRegion].first == I || NonDbgMI == I) { 381 LiveIns[CurRegion] = RPTracker.getLiveRegs(); 382 RPTracker.clearMaxPressure(); 383 } 384 385 if (Regions[CurRegion].second == I) { 386 Pressure[CurRegion] = RPTracker.moveMaxPressure(); 387 if (CurRegion-- == RegionIdx) 388 break; 389 } 390 RPTracker.advanceToNext(); 391 RPTracker.advanceBeforeNext(); 392 } 393 394 if (OnlySucc) { 395 if (I != MBB->end()) { 396 RPTracker.advanceToNext(); 397 RPTracker.advance(MBB->end()); 398 } 399 RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); 400 RPTracker.advanceBeforeNext(); 401 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 402 } 403 } 404 405 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 406 GCNScheduleDAGMILive::getBBLiveInMap() const { 407 assert(!Regions.empty()); 408 std::vector<MachineInstr *> BBStarters; 409 BBStarters.reserve(Regions.size()); 410 auto I = Regions.rbegin(), E = Regions.rend(); 411 auto *BB = I->first->getParent(); 412 do { 413 auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 414 BBStarters.push_back(MI); 415 do { 416 ++I; 417 } while (I != E && I->first->getParent() == BB); 418 } while (I != E); 419 return getLiveRegMap(BBStarters, false /*After*/, *LIS); 420 } 421 422 void GCNScheduleDAGMILive::finalizeSchedule() { 423 // Start actual scheduling here. This function is called by the base 424 // MachineScheduler after all regions have been recorded by 425 // GCNScheduleDAGMILive::schedule(). 426 LiveIns.resize(Regions.size()); 427 Pressure.resize(Regions.size()); 428 RescheduleRegions.resize(Regions.size()); 429 RegionsWithClusters.resize(Regions.size()); 430 RegionsWithHighRP.resize(Regions.size()); 431 RegionsWithMinOcc.resize(Regions.size()); 432 RescheduleRegions.set(); 433 RegionsWithClusters.reset(); 434 RegionsWithHighRP.reset(); 435 RegionsWithMinOcc.reset(); 436 437 runSchedStages(); 438 } 439 440 void GCNScheduleDAGMILive::runSchedStages() { 441 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 442 InitialScheduleStage S0(GCNSchedStageID::InitialSchedule, *this); 443 UnclusteredRescheduleStage S1(GCNSchedStageID::UnclusteredReschedule, *this); 444 ClusteredLowOccStage S2(GCNSchedStageID::ClusteredLowOccupancyReschedule, 445 *this); 446 PreRARematStage S3(GCNSchedStageID::PreRARematerialize, *this); 447 GCNSchedStage *SchedStages[] = {&S0, &S1, &S2, &S3}; 448 449 if (!Regions.empty()) 450 BBLiveInMap = getBBLiveInMap(); 451 452 for (auto *Stage : SchedStages) { 453 if (!Stage->initGCNSchedStage()) 454 continue; 455 456 for (auto Region : Regions) { 457 RegionBegin = Region.first; 458 RegionEnd = Region.second; 459 // Setup for scheduling the region and check whether it should be skipped. 460 if (!Stage->initGCNRegion()) { 461 Stage->advanceRegion(); 462 exitRegion(); 463 continue; 464 } 465 466 ScheduleDAGMILive::schedule(); 467 Stage->finalizeGCNRegion(); 468 } 469 470 Stage->finalizeGCNSchedStage(); 471 } 472 } 473 474 #ifndef NDEBUG 475 raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { 476 switch (StageID) { 477 case GCNSchedStageID::InitialSchedule: 478 OS << "Initial Schedule"; 479 break; 480 case GCNSchedStageID::UnclusteredReschedule: 481 OS << "Unclustered Reschedule"; 482 break; 483 case GCNSchedStageID::ClusteredLowOccupancyReschedule: 484 OS << "Clustered Low Occupancy Reschedule"; 485 break; 486 case GCNSchedStageID::PreRARematerialize: 487 OS << "Pre-RA Rematerialize"; 488 break; 489 } 490 return OS; 491 } 492 #endif 493 494 GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 495 : DAG(DAG), S(static_cast<GCNMaxOccupancySchedStrategy &>(*DAG.SchedImpl)), 496 MF(DAG.MF), MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {} 497 498 bool GCNSchedStage::initGCNSchedStage() { 499 if (!DAG.LIS) 500 return false; 501 502 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n"); 503 return true; 504 } 505 506 bool UnclusteredRescheduleStage::initGCNSchedStage() { 507 if (!GCNSchedStage::initGCNSchedStage()) 508 return false; 509 510 if (DAG.RescheduleRegions.none()) 511 return false; 512 513 SavedMutations.swap(DAG.Mutations); 514 515 LLVM_DEBUG(dbgs() << "Retrying function scheduling without clustering.\n"); 516 return true; 517 } 518 519 bool ClusteredLowOccStage::initGCNSchedStage() { 520 if (!GCNSchedStage::initGCNSchedStage()) 521 return false; 522 523 // Don't bother trying to improve ILP in lower RP regions if occupancy has not 524 // been dropped. All regions will have already been scheduled with the ideal 525 // occupancy targets. 526 if (DAG.StartingOccupancy <= DAG.MinOccupancy) 527 return false; 528 529 LLVM_DEBUG( 530 dbgs() << "Retrying function scheduling with lowest recorded occupancy " 531 << DAG.MinOccupancy << ".\n"); 532 return true; 533 } 534 535 bool PreRARematStage::initGCNSchedStage() { 536 if (!GCNSchedStage::initGCNSchedStage()) 537 return false; 538 539 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1) 540 return false; 541 542 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 543 // Check maximum occupancy 544 if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) == 545 DAG.MinOccupancy) 546 return false; 547 548 // FIXME: This pass will invalidate cached MBBLiveIns for regions 549 // inbetween the defs and region we sinked the def to. Cached pressure 550 // for regions where a def is sinked from will also be invalidated. Will 551 // need to be fixed if there is another pass after this pass. 552 553 collectRematerializableInstructions(); 554 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII)) 555 return false; 556 557 LLVM_DEBUG( 558 dbgs() << "Retrying function scheduling with improved occupancy of " 559 << DAG.MinOccupancy << " from rematerializing\n"); 560 return true; 561 } 562 563 void GCNSchedStage::finalizeGCNSchedStage() { 564 DAG.finishBlock(); 565 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n"); 566 } 567 568 void UnclusteredRescheduleStage::finalizeGCNSchedStage() { 569 SavedMutations.swap(DAG.Mutations); 570 571 GCNSchedStage::finalizeGCNSchedStage(); 572 } 573 574 bool GCNSchedStage::initGCNRegion() { 575 // Check whether this new region is also a new block. 576 if (DAG.RegionBegin->getParent() != CurrentMBB) 577 setupNewBlock(); 578 579 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end()); 580 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs); 581 582 // Skip empty scheduling regions (0 or 1 schedulable instructions). 583 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) 584 return false; 585 586 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 587 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB) 588 << " " << CurrentMBB->getName() 589 << "\n From: " << *DAG.begin() << " To: "; 590 if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; 591 else dbgs() << "End"; 592 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 593 594 // Save original instruction order before scheduling for possible revert. 595 Unsched.clear(); 596 Unsched.reserve(DAG.NumRegionInstrs); 597 for (auto &I : DAG) 598 Unsched.push_back(&I); 599 600 PressureBefore = DAG.Pressure[RegionIdx]; 601 602 LLVM_DEBUG( 603 dbgs() << "Pressure before scheduling:\nRegion live-ins:"; 604 GCNRPTracker::printLiveRegs(dbgs(), DAG.LiveIns[RegionIdx], DAG.MRI); 605 dbgs() << "Region live-in pressure: "; 606 llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]).print(dbgs()); 607 dbgs() << "Region register pressure: "; PressureBefore.print(dbgs())); 608 609 // Set HasClusteredNodes to true for late stages where we have already 610 // collected it. That way pickNode() will not scan SDep's when not needed. 611 S.HasClusteredNodes = StageID > GCNSchedStageID::InitialSchedule; 612 S.HasExcessPressure = false; 613 614 return true; 615 } 616 617 bool UnclusteredRescheduleStage::initGCNRegion() { 618 if (!DAG.RescheduleRegions[RegionIdx]) 619 return false; 620 621 return GCNSchedStage::initGCNRegion(); 622 } 623 624 bool ClusteredLowOccStage::initGCNRegion() { 625 // We may need to reschedule this region if it doesn't have clusters so it 626 // wasn't rescheduled in the last stage, or if we found it was testing 627 // critical register pressure limits in the unclustered reschedule stage. The 628 // later is because we may not have been able to raise the min occupancy in 629 // the previous stage so the region may be overly constrained even if it was 630 // already rescheduled. 631 if (!DAG.RegionsWithClusters[RegionIdx] && !DAG.RegionsWithHighRP[RegionIdx]) 632 return false; 633 634 return GCNSchedStage::initGCNRegion(); 635 } 636 637 bool PreRARematStage::initGCNRegion() { 638 if (!DAG.RescheduleRegions[RegionIdx]) 639 return false; 640 641 return GCNSchedStage::initGCNRegion(); 642 } 643 644 void GCNSchedStage::setupNewBlock() { 645 if (CurrentMBB) 646 DAG.finishBlock(); 647 648 CurrentMBB = DAG.RegionBegin->getParent(); 649 DAG.startBlock(CurrentMBB); 650 // Get real RP for the region if it hasn't be calculated before. After the 651 // initial schedule stage real RP will be collected after scheduling. 652 if (StageID == GCNSchedStageID::InitialSchedule) 653 DAG.computeBlockPressure(RegionIdx, CurrentMBB); 654 } 655 656 void GCNSchedStage::finalizeGCNRegion() { 657 DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd); 658 DAG.RescheduleRegions[RegionIdx] = false; 659 if (S.HasExcessPressure) 660 DAG.RegionsWithHighRP[RegionIdx] = true; 661 662 // Revert scheduling if we have dropped occupancy or there is some other 663 // reason that the original schedule is better. 664 checkScheduling(); 665 666 DAG.exitRegion(); 667 RegionIdx++; 668 } 669 670 void InitialScheduleStage::finalizeGCNRegion() { 671 // Record which regions have clustered nodes for the next unclustered 672 // reschedule stage. 673 assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule); 674 if (S.HasClusteredNodes) 675 DAG.RegionsWithClusters[RegionIdx] = true; 676 677 GCNSchedStage::finalizeGCNRegion(); 678 } 679 680 void GCNSchedStage::checkScheduling() { 681 // Check the results of scheduling. 682 PressureAfter = DAG.getRealRegPressure(RegionIdx); 683 LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; 684 PressureAfter.print(dbgs())); 685 686 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 687 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 688 DAG.Pressure[RegionIdx] = PressureAfter; 689 DAG.RegionsWithMinOcc[RegionIdx] = 690 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 691 692 // Early out if we have achieve the occupancy target. 693 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 694 return; 695 } 696 697 unsigned WavesAfter = 698 std::min(S.getTargetOccupancy(), PressureAfter.getOccupancy(ST)); 699 unsigned WavesBefore = 700 std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST)); 701 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 702 << ", after " << WavesAfter << ".\n"); 703 704 // We may not be able to keep the current target occupancy because of the just 705 // scheduled region. We might still be able to revert scheduling if the 706 // occupancy before was higher, or if the current schedule has register 707 // pressure higher than the excess limits which could lead to more spilling. 708 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 709 710 // Allow memory bound functions to drop to 4 waves if not limited by an 711 // attribute. 712 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy && 713 WavesAfter >= MFI.getMinAllowedOccupancy()) { 714 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 715 << MFI.getMinAllowedOccupancy() << " waves\n"); 716 NewOccupancy = WavesAfter; 717 } 718 719 if (NewOccupancy < DAG.MinOccupancy) { 720 DAG.MinOccupancy = NewOccupancy; 721 MFI.limitOccupancy(DAG.MinOccupancy); 722 DAG.RegionsWithMinOcc.reset(); 723 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 724 << DAG.MinOccupancy << ".\n"); 725 } 726 727 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 728 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 729 if (PressureAfter.getVGPRNum(false) > MaxVGPRs || 730 PressureAfter.getAGPRNum() > MaxVGPRs || 731 PressureAfter.getSGPRNum() > MaxSGPRs) { 732 DAG.RescheduleRegions[RegionIdx] = true; 733 DAG.RegionsWithHighRP[RegionIdx] = true; 734 } 735 736 // Revert if this region's schedule would cause a drop in occupancy or 737 // spilling. 738 if (shouldRevertScheduling(WavesAfter)) { 739 revertScheduling(); 740 } else { 741 DAG.Pressure[RegionIdx] = PressureAfter; 742 DAG.RegionsWithMinOcc[RegionIdx] = 743 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 744 } 745 } 746 747 bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) { 748 if (WavesAfter < DAG.MinOccupancy) 749 return true; 750 751 return false; 752 } 753 754 bool InitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 755 if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 756 return true; 757 758 if (mayCauseSpilling(WavesAfter)) 759 return true; 760 761 assert(nextStage(StageID) == GCNSchedStageID::UnclusteredReschedule); 762 // Don't reschedule the region in the next stage if it doesn't have clusters. 763 if (!DAG.RegionsWithClusters[RegionIdx]) 764 DAG.RescheduleRegions[RegionIdx] = false; 765 766 return false; 767 } 768 769 bool UnclusteredRescheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 770 if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 771 return true; 772 773 // If RP is not reduced in the unclustred reschedule stage, revert to the old 774 // schedule. 775 if (!PressureAfter.less(ST, PressureBefore)) { 776 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 777 return true; 778 } 779 780 return false; 781 } 782 783 bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) { 784 if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 785 return true; 786 787 if (mayCauseSpilling(WavesAfter)) 788 return true; 789 790 return false; 791 } 792 793 bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) { 794 if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 795 return true; 796 797 if (mayCauseSpilling(WavesAfter)) 798 return true; 799 800 return false; 801 } 802 803 bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { 804 if (WavesAfter <= MFI.getMinWavesPerEU() && 805 !PressureAfter.less(ST, PressureBefore) && 806 DAG.RescheduleRegions[RegionIdx]) { 807 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 808 return true; 809 } 810 811 return false; 812 } 813 814 void GCNSchedStage::revertScheduling() { 815 DAG.RegionsWithMinOcc[RegionIdx] = 816 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy; 817 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 818 DAG.RescheduleRegions[RegionIdx] = 819 DAG.RegionsWithClusters[RegionIdx] || 820 (nextStage(StageID)) != GCNSchedStageID::UnclusteredReschedule; 821 DAG.RegionEnd = DAG.RegionBegin; 822 int SkippedDebugInstr = 0; 823 for (MachineInstr *MI : Unsched) { 824 if (MI->isDebugInstr()) { 825 ++SkippedDebugInstr; 826 continue; 827 } 828 829 if (MI->getIterator() != DAG.RegionEnd) { 830 DAG.BB->remove(MI); 831 DAG.BB->insert(DAG.RegionEnd, MI); 832 if (!MI->isDebugInstr()) 833 DAG.LIS->handleMove(*MI, true); 834 } 835 836 // Reset read-undef flags and update them later. 837 for (auto &Op : MI->operands()) 838 if (Op.isReg() && Op.isDef()) 839 Op.setIsUndef(false); 840 RegisterOperands RegOpers; 841 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false); 842 if (!MI->isDebugInstr()) { 843 if (DAG.ShouldTrackLaneMasks) { 844 // Adjust liveness and add missing dead+read-undef flags. 845 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot(); 846 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI); 847 } else { 848 // Adjust for missing dead-def flags. 849 RegOpers.detectDeadDefs(*MI, *DAG.LIS); 850 } 851 } 852 DAG.RegionEnd = MI->getIterator(); 853 ++DAG.RegionEnd; 854 LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 855 } 856 857 // After reverting schedule, debug instrs will now be at the end of the block 858 // and RegionEnd will point to the first debug instr. Increment RegionEnd 859 // pass debug instrs to the actual end of the scheduling region. 860 while (SkippedDebugInstr-- > 0) 861 ++DAG.RegionEnd; 862 863 // If Unsched.front() instruction is a debug instruction, this will actually 864 // shrink the region since we moved all debug instructions to the end of the 865 // block. Find the first instruction that is not a debug instruction. 866 DAG.RegionBegin = Unsched.front()->getIterator(); 867 if (DAG.RegionBegin->isDebugInstr()) { 868 for (MachineInstr *MI : Unsched) { 869 if (MI->isDebugInstr()) 870 continue; 871 DAG.RegionBegin = MI->getIterator(); 872 break; 873 } 874 } 875 876 // Then move the debug instructions back into their correct place and set 877 // RegionBegin and RegionEnd if needed. 878 DAG.placeDebugValues(); 879 880 DAG.Regions[RegionIdx] = std::make_pair(DAG.RegionBegin, DAG.RegionEnd); 881 } 882 883 void PreRARematStage::collectRematerializableInstructions() { 884 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI); 885 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) { 886 Register Reg = Register::index2VirtReg(I); 887 if (!DAG.LIS->hasInterval(Reg)) 888 continue; 889 890 // TODO: Handle AGPR and SGPR rematerialization 891 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) || 892 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg)) 893 continue; 894 895 MachineOperand *Op = DAG.MRI.getOneDef(Reg); 896 MachineInstr *Def = Op->getParent(); 897 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) 898 continue; 899 900 MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg); 901 if (Def->getParent() == UseI->getParent()) 902 continue; 903 904 // We are only collecting defs that are defined in another block and are 905 // live-through or used inside regions at MinOccupancy. This means that the 906 // register must be in the live-in set for the region. 907 bool AddedToRematList = false; 908 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { 909 auto It = DAG.LiveIns[I].find(Reg); 910 if (It != DAG.LiveIns[I].end() && !It->second.none()) { 911 if (DAG.RegionsWithMinOcc[I]) { 912 RematerializableInsts[I][Def] = UseI; 913 AddedToRematList = true; 914 } 915 916 // Collect regions with rematerializable reg as live-in to avoid 917 // searching later when updating RP. 918 RematDefToLiveInRegions[Def].push_back(I); 919 } 920 } 921 if (!AddedToRematList) 922 RematDefToLiveInRegions.erase(Def); 923 } 924 } 925 926 bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, 927 const TargetInstrInfo *TII) { 928 // Temporary copies of cached variables we will be modifying and replacing if 929 // sinking succeeds. 930 SmallVector< 931 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32> 932 NewRegions; 933 DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns; 934 DenseMap<unsigned, GCNRegPressure> NewPressure; 935 BitVector NewRescheduleRegions; 936 LiveIntervals *LIS = DAG.LIS; 937 938 NewRegions.resize(DAG.Regions.size()); 939 NewRescheduleRegions.resize(DAG.Regions.size()); 940 941 // Collect only regions that has a rematerializable def as a live-in. 942 SmallSet<unsigned, 16> ImpactedRegions; 943 for (const auto &It : RematDefToLiveInRegions) 944 ImpactedRegions.insert(It.second.begin(), It.second.end()); 945 946 // Make copies of register pressure and live-ins cache that will be updated 947 // as we rematerialize. 948 for (auto Idx : ImpactedRegions) { 949 NewPressure[Idx] = DAG.Pressure[Idx]; 950 NewLiveIns[Idx] = DAG.LiveIns[Idx]; 951 } 952 NewRegions = DAG.Regions; 953 NewRescheduleRegions.reset(); 954 955 DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef; 956 bool Improved = false; 957 for (auto I : ImpactedRegions) { 958 if (!DAG.RegionsWithMinOcc[I]) 959 continue; 960 961 Improved = false; 962 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts()); 963 int SGPRUsage = NewPressure[I].getSGPRNum(); 964 965 // TODO: Handle occupancy drop due to AGPR and SGPR. 966 // Check if cause of occupancy drop is due to VGPR usage and not SGPR. 967 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy) 968 break; 969 970 // The occupancy of this region could have been improved by a previous 971 // iteration's sinking of defs. 972 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) { 973 NewRescheduleRegions[I] = true; 974 Improved = true; 975 continue; 976 } 977 978 // First check if we have enough trivially rematerializable instructions to 979 // improve occupancy. Optimistically assume all instructions we are able to 980 // sink decreased RP. 981 int TotalSinkableRegs = 0; 982 for (const auto &It : RematerializableInsts[I]) { 983 MachineInstr *Def = It.first; 984 Register DefReg = Def->getOperand(0).getReg(); 985 TotalSinkableRegs += 986 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]); 987 } 988 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; 989 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink); 990 // If in the most optimistic scenario, we cannot improve occupancy, then do 991 // not attempt to sink any instructions. 992 if (OptimisticOccupancy <= DAG.MinOccupancy) 993 break; 994 995 unsigned ImproveOccupancy = 0; 996 SmallVector<MachineInstr *, 4> SinkedDefs; 997 for (auto &It : RematerializableInsts[I]) { 998 MachineInstr *Def = It.first; 999 MachineBasicBlock::iterator InsertPos = 1000 MachineBasicBlock::iterator(It.second); 1001 Register Reg = Def->getOperand(0).getReg(); 1002 // Rematerialize MI to its use block. Since we are only rematerializing 1003 // instructions that do not have any virtual reg uses, we do not need to 1004 // call LiveRangeEdit::allUsesAvailableAt() and 1005 // LiveRangeEdit::canRematerializeAt(). 1006 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, 1007 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI); 1008 MachineInstr *NewMI = &*(--InsertPos); 1009 LIS->InsertMachineInstrInMaps(*NewMI); 1010 LIS->removeInterval(Reg); 1011 LIS->createAndComputeVirtRegInterval(Reg); 1012 InsertedMIToOldDef[NewMI] = Def; 1013 1014 // Update region boundaries in scheduling region we sinked from since we 1015 // may sink an instruction that was at the beginning or end of its region 1016 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr, 1017 /*Removing =*/true); 1018 1019 // Update region boundaries in region we sinked to. 1020 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI); 1021 1022 LaneBitmask PrevMask = NewLiveIns[I][Reg]; 1023 // FIXME: Also update cached pressure for where the def was sinked from. 1024 // Update RP for all regions that has this reg as a live-in and remove 1025 // the reg from all regions as a live-in. 1026 for (auto Idx : RematDefToLiveInRegions[Def]) { 1027 NewLiveIns[Idx].erase(Reg); 1028 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) { 1029 // Def is live-through and not used in this block. 1030 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI); 1031 } else { 1032 // Def is used and rematerialized into this block. 1033 GCNDownwardRPTracker RPT(*LIS); 1034 auto *NonDbgMI = &*skipDebugInstructionsForward( 1035 NewRegions[Idx].first, NewRegions[Idx].second); 1036 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]); 1037 RPT.advance(NewRegions[Idx].second); 1038 NewPressure[Idx] = RPT.moveMaxPressure(); 1039 } 1040 } 1041 1042 SinkedDefs.push_back(Def); 1043 ImproveOccupancy = NewPressure[I].getOccupancy(ST); 1044 if (ImproveOccupancy > DAG.MinOccupancy) 1045 break; 1046 } 1047 1048 // Remove defs we just sinked from all regions' list of sinkable defs 1049 for (auto &Def : SinkedDefs) 1050 for (auto TrackedIdx : RematDefToLiveInRegions[Def]) 1051 RematerializableInsts[TrackedIdx].erase(Def); 1052 1053 if (ImproveOccupancy <= DAG.MinOccupancy) 1054 break; 1055 1056 NewRescheduleRegions[I] = true; 1057 Improved = true; 1058 } 1059 1060 if (!Improved) { 1061 // Occupancy was not improved for all regions that were at MinOccupancy. 1062 // Undo sinking and remove newly rematerialized instructions. 1063 for (auto &Entry : InsertedMIToOldDef) { 1064 MachineInstr *MI = Entry.first; 1065 MachineInstr *OldMI = Entry.second; 1066 Register Reg = MI->getOperand(0).getReg(); 1067 LIS->RemoveMachineInstrFromMaps(*MI); 1068 MI->eraseFromParent(); 1069 OldMI->clearRegisterDeads(Reg); 1070 LIS->removeInterval(Reg); 1071 LIS->createAndComputeVirtRegInterval(Reg); 1072 } 1073 return false; 1074 } 1075 1076 // Occupancy was improved for all regions. 1077 for (auto &Entry : InsertedMIToOldDef) { 1078 MachineInstr *MI = Entry.first; 1079 MachineInstr *OldMI = Entry.second; 1080 1081 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. 1082 DAG.BBLiveInMap.erase(OldMI); 1083 1084 // Remove OldMI and update LIS 1085 Register Reg = MI->getOperand(0).getReg(); 1086 LIS->RemoveMachineInstrFromMaps(*OldMI); 1087 OldMI->eraseFromParent(); 1088 LIS->removeInterval(Reg); 1089 LIS->createAndComputeVirtRegInterval(Reg); 1090 } 1091 1092 // Update live-ins, register pressure, and regions caches. 1093 for (auto Idx : ImpactedRegions) { 1094 DAG.LiveIns[Idx] = NewLiveIns[Idx]; 1095 DAG.Pressure[Idx] = NewPressure[Idx]; 1096 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); 1097 } 1098 DAG.Regions = NewRegions; 1099 DAG.RescheduleRegions = NewRescheduleRegions; 1100 1101 SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1102 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 1103 1104 return true; 1105 } 1106 1107 // Copied from MachineLICM 1108 bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) { 1109 if (!DAG.TII->isTriviallyReMaterializable(MI)) 1110 return false; 1111 1112 for (const MachineOperand &MO : MI.operands()) 1113 if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) 1114 return false; 1115 1116 return true; 1117 } 1118 1119 // When removing, we will have to check both beginning and ending of the region. 1120 // When inserting, we will only have to check if we are inserting NewMI in front 1121 // of a scheduling region and do not need to check the ending since we will only 1122 // ever be inserting before an already existing MI. 1123 void GCNScheduleDAGMILive::updateRegionBoundaries( 1124 SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 1125 MachineBasicBlock::iterator>> &RegionBoundaries, 1126 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) { 1127 unsigned I = 0, E = RegionBoundaries.size(); 1128 // Search for first region of the block where MI is located 1129 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) 1130 ++I; 1131 1132 for (; I != E; ++I) { 1133 if (MI->getParent() != RegionBoundaries[I].first->getParent()) 1134 return; 1135 1136 if (Removing && MI == RegionBoundaries[I].first && 1137 MI == RegionBoundaries[I].second) { 1138 // MI is in a region with size 1, after removing, the region will be 1139 // size 0, set RegionBegin and RegionEnd to pass end of block iterator. 1140 RegionBoundaries[I] = 1141 std::make_pair(MI->getParent()->end(), MI->getParent()->end()); 1142 return; 1143 } 1144 if (MI == RegionBoundaries[I].first) { 1145 if (Removing) 1146 RegionBoundaries[I] = 1147 std::make_pair(std::next(MI), RegionBoundaries[I].second); 1148 else 1149 // Inserted NewMI in front of region, set new RegionBegin to NewMI 1150 RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI), 1151 RegionBoundaries[I].second); 1152 return; 1153 } 1154 if (Removing && MI == RegionBoundaries[I].second) { 1155 RegionBoundaries[I] = 1156 std::make_pair(RegionBoundaries[I].first, std::prev(MI)); 1157 return; 1158 } 1159 } 1160 } 1161