1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 15 16 #include "GCNRegPressure.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/MapVector.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineScheduler.h" 21 22 namespace llvm { 23 24 class SIMachineFunctionInfo; 25 class SIRegisterInfo; 26 class GCNSubtarget; 27 class GCNSchedStage; 28 29 enum class GCNSchedStageID : unsigned { 30 OccInitialSchedule = 0, 31 UnclusteredHighRPReschedule = 1, 32 ClusteredLowOccupancyReschedule = 2, 33 PreRARematerialize = 3, 34 ILPInitialSchedule = 4, 35 MemoryClauseInitialSchedule = 5 36 }; 37 38 #ifndef NDEBUG 39 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); 40 #endif 41 42 /// This is a minimal scheduler strategy. The main difference between this 43 /// and the GenericScheduler is that GCNSchedStrategy uses different 44 /// heuristics to determine excess/critical pressure sets. 45 class GCNSchedStrategy : public GenericScheduler { 46 protected: 47 SUnit *pickNodeBidirectional(bool &IsTopNode); 48 49 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, 50 const RegPressureTracker &RPTracker, 51 SchedCandidate &Cand, bool IsBottomUp); 52 53 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 54 const RegPressureTracker &RPTracker, 55 const SIRegisterInfo *SRI, unsigned SGPRPressure, 56 unsigned VGPRPressure, bool IsBottomUp); 57 58 std::vector<unsigned> Pressure; 59 60 std::vector<unsigned> MaxPressure; 61 62 unsigned SGPRExcessLimit; 63 64 unsigned VGPRExcessLimit; 65 66 unsigned TargetOccupancy; 67 68 MachineFunction *MF; 69 70 // Scheduling stages for this strategy. 71 SmallVector<GCNSchedStageID, 4> SchedStages; 72 73 // Pointer to the current SchedStageID. 74 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr; 75 76 // GCN RP Tracker for top-down scheduling 77 mutable GCNDownwardRPTracker DownwardTracker; 78 79 // GCN RP Tracker for botttom-up scheduling 80 mutable GCNUpwardRPTracker UpwardTracker; 81 82 public: 83 // schedule() have seen register pressure over the critical limits and had to 84 // track register pressure for actual scheduling heuristics. 85 bool HasHighPressure; 86 87 // Schedule known to have excess register pressure. Be more conservative in 88 // increasing ILP and preserving VGPRs. 89 bool KnownExcessRP = false; 90 91 // An error margin is necessary because of poor performance of the generic RP 92 // tracker and can be adjusted up for tuning heuristics to try and more 93 // aggressively reduce register pressure. 94 unsigned ErrorMargin = 3; 95 96 // Bias for SGPR limits under a high register pressure. 97 const unsigned HighRPSGPRBias = 7; 98 99 // Bias for VGPR limits under a high register pressure. 100 const unsigned HighRPVGPRBias = 7; 101 102 unsigned SGPRCriticalLimit; 103 104 unsigned VGPRCriticalLimit; 105 106 unsigned SGPRLimitBias = 0; 107 108 unsigned VGPRLimitBias = 0; 109 110 GCNSchedStrategy(const MachineSchedContext *C); 111 112 SUnit *pickNode(bool &IsTopNode) override; 113 114 void schedNode(SUnit *SU, bool IsTopNode) override; 115 116 void initialize(ScheduleDAGMI *DAG) override; 117 getTargetOccupancy()118 unsigned getTargetOccupancy() { return TargetOccupancy; } 119 setTargetOccupancy(unsigned Occ)120 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } 121 122 GCNSchedStageID getCurrentStage(); 123 124 // Advances stage. Returns true if there are remaining stages. 125 bool advanceStage(); 126 127 bool hasNextStage() const; 128 129 GCNSchedStageID getNextStage() const; 130 getDownwardTracker()131 GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; } 132 getUpwardTracker()133 GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; } 134 }; 135 136 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. 137 /// maximum number of waves per simd). 138 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { 139 public: 140 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, 141 bool IsLegacyScheduler = false); 142 }; 143 144 /// The goal of this scheduling strategy is to maximize ILP for a single wave 145 /// (i.e. latency hiding). 146 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy { 147 protected: 148 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 149 SchedBoundary *Zone) const override; 150 151 public: 152 GCNMaxILPSchedStrategy(const MachineSchedContext *C); 153 }; 154 155 /// The goal of this scheduling strategy is to maximize memory clause for a 156 /// single wave. 157 class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy { 158 protected: 159 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 160 SchedBoundary *Zone) const override; 161 162 public: 163 GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C); 164 }; 165 166 class ScheduleMetrics { 167 unsigned ScheduleLength; 168 unsigned BubbleCycles; 169 170 public: ScheduleMetrics()171 ScheduleMetrics() {} ScheduleMetrics(unsigned L,unsigned BC)172 ScheduleMetrics(unsigned L, unsigned BC) 173 : ScheduleLength(L), BubbleCycles(BC) {} getLength()174 unsigned getLength() const { return ScheduleLength; } getBubbles()175 unsigned getBubbles() const { return BubbleCycles; } getMetric()176 unsigned getMetric() const { 177 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength; 178 // Metric is zero if the amount of bubbles is less than 1% which is too 179 // small. So, return 1. 180 return Metric ? Metric : 1; 181 } 182 static const unsigned ScaleFactor; 183 }; 184 185 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) { 186 dbgs() << "\n Schedule Metric (scaled by " 187 << ScheduleMetrics::ScaleFactor 188 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/" 189 << Sm.getLength() << " ]\n"; 190 return OS; 191 } 192 193 class GCNScheduleDAGMILive; 194 class RegionPressureMap { 195 GCNScheduleDAGMILive *DAG; 196 // The live in/out pressure as indexed by the first or last MI in the region 197 // before scheduling. 198 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap; 199 // The mapping of RegionIDx to key instruction 200 DenseMap<unsigned, MachineInstr *> IdxToInstruction; 201 // Whether we are calculating LiveOuts or LiveIns 202 bool IsLiveOut; 203 204 public: RegionPressureMap()205 RegionPressureMap() {} RegionPressureMap(GCNScheduleDAGMILive * GCNDAG,bool LiveOut)206 RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut) 207 : DAG(GCNDAG), IsLiveOut(LiveOut) {} 208 // Build the Instr->LiveReg and RegionIdx->Instr maps 209 void buildLiveRegMap(); 210 211 // Retrieve the LiveReg for a given RegionIdx getLiveRegsForRegionIdx(unsigned RegionIdx)212 GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) { 213 assert(IdxToInstruction.contains(RegionIdx)); 214 MachineInstr *Key = IdxToInstruction[RegionIdx]; 215 return RegionLiveRegMap[Key]; 216 } 217 }; 218 219 /// A region's boundaries i.e. a pair of instruction bundle iterators. The lower 220 /// boundary is inclusive, the upper boundary is exclusive. 221 using RegionBoundaries = 222 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>; 223 224 class GCNScheduleDAGMILive final : public ScheduleDAGMILive { 225 friend class GCNSchedStage; 226 friend class OccInitialScheduleStage; 227 friend class UnclusteredHighRPStage; 228 friend class ClusteredLowOccStage; 229 friend class PreRARematStage; 230 friend class ILPInitialScheduleStage; 231 friend class RegionPressureMap; 232 233 const GCNSubtarget &ST; 234 235 SIMachineFunctionInfo &MFI; 236 237 // Occupancy target at the beginning of function scheduling cycle. 238 unsigned StartingOccupancy; 239 240 // Minimal real occupancy recorder for the function. 241 unsigned MinOccupancy; 242 243 // Vector of regions recorder for later rescheduling 244 SmallVector<RegionBoundaries, 32> Regions; 245 246 // Record regions with high register pressure. 247 BitVector RegionsWithHighRP; 248 249 // Record regions with excess register pressure over the physical register 250 // limit. Register pressure in these regions usually will result in spilling. 251 BitVector RegionsWithExcessRP; 252 253 // Regions that has the same occupancy as the latest MinOccupancy 254 BitVector RegionsWithMinOcc; 255 256 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). 257 BitVector RegionsWithIGLPInstrs; 258 259 // Region live-in cache. 260 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; 261 262 // Region pressure cache. 263 SmallVector<GCNRegPressure, 32> Pressure; 264 265 // Temporary basic block live-in cache. 266 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; 267 268 // The map of the initial first region instruction to region live in registers 269 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; 270 271 // Calculate the map of the initial first region instruction to region live in 272 // registers 273 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const; 274 275 // Calculate the map of the initial last region instruction to region live out 276 // registers 277 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 278 getRegionLiveOutMap() const; 279 280 // The live out registers per region. These are internally stored as a map of 281 // the initial last region instruction to region live out registers, but can 282 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx. 283 RegionPressureMap RegionLiveOuts; 284 285 // Return current region pressure. 286 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; 287 288 // Compute and cache live-ins and pressure for all regions in block. 289 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); 290 291 /// If necessary, updates a region's boundaries following insertion ( \p NewMI 292 /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region. 293 /// For an MI removal, this must be called before the MI is actually erased 294 /// from its parent MBB. 295 void updateRegionBoundaries(RegionBoundaries &RegionBounds, 296 MachineBasicBlock::iterator MI, 297 MachineInstr *NewMI); 298 299 void runSchedStages(); 300 301 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID); 302 303 public: 304 GCNScheduleDAGMILive(MachineSchedContext *C, 305 std::unique_ptr<MachineSchedStrategy> S); 306 307 void schedule() override; 308 309 void finalizeSchedule() override; 310 }; 311 312 // GCNSchedStrategy applies multiple scheduling stages to a function. 313 class GCNSchedStage { 314 protected: 315 GCNScheduleDAGMILive &DAG; 316 317 GCNSchedStrategy &S; 318 319 MachineFunction &MF; 320 321 SIMachineFunctionInfo &MFI; 322 323 const GCNSubtarget &ST; 324 325 const GCNSchedStageID StageID; 326 327 // The current block being scheduled. 328 MachineBasicBlock *CurrentMBB = nullptr; 329 330 // Current region index. 331 unsigned RegionIdx = 0; 332 333 // Record the original order of instructions before scheduling. 334 std::vector<MachineInstr *> Unsched; 335 336 // RP before scheduling the current region. 337 GCNRegPressure PressureBefore; 338 339 // RP after scheduling the current region. 340 GCNRegPressure PressureAfter; 341 342 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 343 344 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); 345 346 public: 347 // Initialize state for a scheduling stage. Returns false if the current stage 348 // should be skipped. 349 virtual bool initGCNSchedStage(); 350 351 // Finalize state after finishing a scheduling pass on the function. 352 virtual void finalizeGCNSchedStage(); 353 354 // Setup for scheduling a region. Returns false if the current region should 355 // be skipped. 356 virtual bool initGCNRegion(); 357 358 // Track whether a new region is also a new MBB. 359 void setupNewBlock(); 360 361 // Finalize state after scheudling a region. 362 void finalizeGCNRegion(); 363 364 // Check result of scheduling. 365 void checkScheduling(); 366 367 // computes the given schedule virtual execution time in clocks 368 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule); 369 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG); 370 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, 371 DenseMap<unsigned, unsigned> &ReadyCycles, 372 const TargetSchedModel &SM); 373 374 // Returns true if scheduling should be reverted. 375 virtual bool shouldRevertScheduling(unsigned WavesAfter); 376 377 // Returns true if current region has known excess pressure. isRegionWithExcessRP()378 bool isRegionWithExcessRP() const { 379 return DAG.RegionsWithExcessRP[RegionIdx]; 380 } 381 382 // The region number this stage is currently working on getRegionIdx()383 unsigned getRegionIdx() { return RegionIdx; } 384 385 // Returns true if the new schedule may result in more spilling. 386 bool mayCauseSpilling(unsigned WavesAfter); 387 388 // Attempt to revert scheduling for this region. 389 void revertScheduling(); 390 advanceRegion()391 void advanceRegion() { RegionIdx++; } 392 393 virtual ~GCNSchedStage() = default; 394 }; 395 396 class OccInitialScheduleStage : public GCNSchedStage { 397 public: 398 bool shouldRevertScheduling(unsigned WavesAfter) override; 399 OccInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)400 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 401 : GCNSchedStage(StageID, DAG) {} 402 }; 403 404 class UnclusteredHighRPStage : public GCNSchedStage { 405 private: 406 // Save the initial occupancy before starting this stage. 407 unsigned InitialOccupancy; 408 409 public: 410 bool initGCNSchedStage() override; 411 412 void finalizeGCNSchedStage() override; 413 414 bool initGCNRegion() override; 415 416 bool shouldRevertScheduling(unsigned WavesAfter) override; 417 UnclusteredHighRPStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)418 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 419 : GCNSchedStage(StageID, DAG) {} 420 }; 421 422 // Retry function scheduling if we found resulting occupancy and it is 423 // lower than used for other scheduling passes. This will give more freedom 424 // to schedule low register pressure blocks. 425 class ClusteredLowOccStage : public GCNSchedStage { 426 public: 427 bool initGCNSchedStage() override; 428 429 bool initGCNRegion() override; 430 431 bool shouldRevertScheduling(unsigned WavesAfter) override; 432 ClusteredLowOccStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)433 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 434 : GCNSchedStage(StageID, DAG) {} 435 }; 436 437 /// Attempts to reduce function spilling or, if there is no spilling, to 438 /// increase function occupancy by one with respect to ArchVGPR usage by sinking 439 /// trivially rematerializable instructions to their use. When the stage 440 /// estimates reducing spilling or increasing occupancy is possible, as few 441 /// instructions as possible are rematerialized to reduce potential negative 442 /// effects on function latency. 443 class PreRARematStage : public GCNSchedStage { 444 private: 445 /// Useful information about a rematerializable instruction. 446 struct RematInstruction { 447 /// Single use of the rematerializable instruction's defined register, 448 /// located in a different block. 449 MachineInstr *UseMI; 450 /// Rematerialized version of \p DefMI, set in 451 /// PreRARematStage::rematerialize. Used for reverting rematerializations. 452 MachineInstr *RematMI; 453 /// Set of regions in which the rematerializable instruction's defined 454 /// register is a live-in. 455 SmallDenseSet<unsigned, 4> LiveInRegions; 456 RematInstructionRematInstruction457 RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {} 458 }; 459 460 /// Maps all MIs to their parent region. MI terminators are considered to be 461 /// outside the region they delimitate, and as such are not stored in the map. 462 DenseMap<MachineInstr *, unsigned> MIRegion; 463 /// Parent MBB to each region, in region order. 464 SmallVector<MachineBasicBlock *> RegionBB; 465 /// Collects instructions to rematerialize. 466 MapVector<MachineInstr *, RematInstruction> Rematerializations; 467 /// Collects regions whose live-ins or register pressure will change due to 468 /// rematerializations. 469 DenseMap<unsigned, GCNRegPressure> ImpactedRegions; 470 /// In case we need to rollback rematerializations, save lane masks for all 471 /// rematerialized registers in all regions in which they are live-ins. 472 DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks; 473 /// After successful stage initialization, indicates which regions should be 474 /// rescheduled. 475 BitVector RescheduleRegions; 476 /// Target occupancy the stage estimates is reachable through 477 /// rematerialization. Greater than or equal to the pre-stage min occupancy. 478 unsigned TargetOcc; 479 /// Achieved occupancy *only* through rematerializations (pre-rescheduling). 480 /// Smaller than or equal to the target occupancy. 481 unsigned AchievedOcc; 482 /// Whether the stage is attempting to increase occupancy in the abscence of 483 /// spilling. 484 bool IncreaseOccupancy; 485 486 /// Returns whether remat can reduce spilling or increase function occupancy 487 /// by 1 through rematerialization. If it can do one, collects instructions in 488 /// PreRARematStage::Rematerializations and sets the target occupancy in 489 /// PreRARematStage::TargetOccupancy. 490 bool canIncreaseOccupancyOrReduceSpill(); 491 492 /// Whether the MI is trivially rematerializable and does not have any virtual 493 /// register use. 494 bool isTriviallyReMaterializable(const MachineInstr &MI); 495 496 /// Rematerializes all instructions in PreRARematStage::Rematerializations 497 /// and stores the achieved occupancy after remat in 498 /// PreRARematStage::AchievedOcc. 499 void rematerialize(); 500 501 /// If remat alone did not increase occupancy to the target one, rollbacks all 502 /// rematerializations and resets live-ins/RP in all regions impacted by the 503 /// stage to their pre-stage values. 504 void finalizeGCNSchedStage() override; 505 506 /// \p Returns true if all the uses in \p InstToRemat defined at \p 507 /// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual 508 /// reg uses. 509 bool allUsesAvailableAt(const MachineInstr *InstToRemat, 510 SlotIndex OriginalIdx, SlotIndex RematIdx) const; 511 512 public: 513 bool initGCNSchedStage() override; 514 515 bool initGCNRegion() override; 516 517 bool shouldRevertScheduling(unsigned WavesAfter) override; 518 PreRARematStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)519 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 520 : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {} 521 }; 522 523 class ILPInitialScheduleStage : public GCNSchedStage { 524 public: 525 bool shouldRevertScheduling(unsigned WavesAfter) override; 526 ILPInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)527 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 528 : GCNSchedStage(StageID, DAG) {} 529 }; 530 531 class MemoryClauseInitialScheduleStage : public GCNSchedStage { 532 public: 533 bool shouldRevertScheduling(unsigned WavesAfter) override; 534 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)535 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, 536 GCNScheduleDAGMILive &DAG) 537 : GCNSchedStage(StageID, DAG) {} 538 }; 539 540 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { 541 private: 542 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 543 544 bool HasIGLPInstrs = false; 545 546 public: 547 void schedule() override; 548 549 void finalizeSchedule() override; 550 551 GCNPostScheduleDAGMILive(MachineSchedContext *C, 552 std::unique_ptr<MachineSchedStrategy> S, 553 bool RemoveKillFlags); 554 }; 555 556 } // End namespace llvm 557 558 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 559