1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 15 16 #include "GCNRegPressure.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 20 namespace llvm { 21 22 class SIMachineFunctionInfo; 23 class SIRegisterInfo; 24 class GCNSubtarget; 25 class GCNSchedStage; 26 27 enum class GCNSchedStageID : unsigned { 28 OccInitialSchedule = 0, 29 UnclusteredHighRPReschedule = 1, 30 ClusteredLowOccupancyReschedule = 2, 31 PreRARematerialize = 3, 32 ILPInitialSchedule = 4 33 }; 34 35 #ifndef NDEBUG 36 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); 37 #endif 38 39 /// This is a minimal scheduler strategy. The main difference between this 40 /// and the GenericScheduler is that GCNSchedStrategy uses different 41 /// heuristics to determine excess/critical pressure sets. 42 class GCNSchedStrategy : public GenericScheduler { 43 protected: 44 SUnit *pickNodeBidirectional(bool &IsTopNode); 45 46 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, 47 const RegPressureTracker &RPTracker, 48 SchedCandidate &Cand, bool IsBottomUp); 49 50 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 51 const RegPressureTracker &RPTracker, 52 const SIRegisterInfo *SRI, unsigned SGPRPressure, 53 unsigned VGPRPressure, bool IsBottomUp); 54 55 std::vector<unsigned> Pressure; 56 57 std::vector<unsigned> MaxPressure; 58 59 unsigned SGPRExcessLimit; 60 61 unsigned VGPRExcessLimit; 62 63 unsigned TargetOccupancy; 64 65 MachineFunction *MF; 66 67 // Scheduling stages for this strategy. 68 SmallVector<GCNSchedStageID, 4> SchedStages; 69 70 // Pointer to the current SchedStageID. 71 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr; 72 73 public: 74 // schedule() have seen register pressure over the critical limits and had to 75 // track register pressure for actual scheduling heuristics. 76 bool HasHighPressure; 77 78 // Schedule known to have excess register pressure. Be more conservative in 79 // increasing ILP and preserving VGPRs. 80 bool KnownExcessRP = false; 81 82 // An error margin is necessary because of poor performance of the generic RP 83 // tracker and can be adjusted up for tuning heuristics to try and more 84 // aggressively reduce register pressure. 85 unsigned ErrorMargin = 3; 86 87 // Bias for SGPR limits under a high register pressure. 88 const unsigned HighRPSGPRBias = 7; 89 90 // Bias for VGPR limits under a high register pressure. 91 const unsigned HighRPVGPRBias = 7; 92 93 unsigned SGPRCriticalLimit; 94 95 unsigned VGPRCriticalLimit; 96 97 unsigned SGPRLimitBias = 0; 98 99 unsigned VGPRLimitBias = 0; 100 101 GCNSchedStrategy(const MachineSchedContext *C); 102 103 SUnit *pickNode(bool &IsTopNode) override; 104 105 void initialize(ScheduleDAGMI *DAG) override; 106 getTargetOccupancy()107 unsigned getTargetOccupancy() { return TargetOccupancy; } 108 setTargetOccupancy(unsigned Occ)109 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } 110 111 GCNSchedStageID getCurrentStage(); 112 113 // Advances stage. Returns true if there are remaining stages. 114 bool advanceStage(); 115 116 bool hasNextStage() const; 117 118 GCNSchedStageID getNextStage() const; 119 }; 120 121 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. 122 /// maximum number of waves per simd). 123 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { 124 public: 125 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); 126 }; 127 128 /// The goal of this scheduling strategy is to maximize ILP for a single wave 129 /// (i.e. latency hiding). 130 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy { 131 protected: 132 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 133 SchedBoundary *Zone) const override; 134 135 public: 136 GCNMaxILPSchedStrategy(const MachineSchedContext *C); 137 }; 138 139 class ScheduleMetrics { 140 unsigned ScheduleLength; 141 unsigned BubbleCycles; 142 143 public: ScheduleMetrics()144 ScheduleMetrics() {} ScheduleMetrics(unsigned L,unsigned BC)145 ScheduleMetrics(unsigned L, unsigned BC) 146 : ScheduleLength(L), BubbleCycles(BC) {} getLength()147 unsigned getLength() const { return ScheduleLength; } getBubbles()148 unsigned getBubbles() const { return BubbleCycles; } getMetric()149 unsigned getMetric() const { 150 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength; 151 // Metric is zero if the amount of bubbles is less than 1% which is too 152 // small. So, return 1. 153 return Metric ? Metric : 1; 154 } 155 static const unsigned ScaleFactor; 156 }; 157 158 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) { 159 dbgs() << "\n Schedule Metric (scaled by " 160 << ScheduleMetrics::ScaleFactor 161 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/" 162 << Sm.getLength() << " ]\n"; 163 return OS; 164 } 165 166 class GCNScheduleDAGMILive final : public ScheduleDAGMILive { 167 friend class GCNSchedStage; 168 friend class OccInitialScheduleStage; 169 friend class UnclusteredHighRPStage; 170 friend class ClusteredLowOccStage; 171 friend class PreRARematStage; 172 friend class ILPInitialScheduleStage; 173 174 const GCNSubtarget &ST; 175 176 SIMachineFunctionInfo &MFI; 177 178 // Occupancy target at the beginning of function scheduling cycle. 179 unsigned StartingOccupancy; 180 181 // Minimal real occupancy recorder for the function. 182 unsigned MinOccupancy; 183 184 // Vector of regions recorder for later rescheduling 185 SmallVector<std::pair<MachineBasicBlock::iterator, 186 MachineBasicBlock::iterator>, 32> Regions; 187 188 // Records if a region is not yet scheduled, or schedule has been reverted, 189 // or we generally desire to reschedule it. 190 BitVector RescheduleRegions; 191 192 // Record regions with high register pressure. 193 BitVector RegionsWithHighRP; 194 195 // Record regions with excess register pressure over the physical register 196 // limit. Register pressure in these regions usually will result in spilling. 197 BitVector RegionsWithExcessRP; 198 199 // Regions that has the same occupancy as the latest MinOccupancy 200 BitVector RegionsWithMinOcc; 201 202 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). 203 BitVector RegionsWithIGLPInstrs; 204 205 // Region live-in cache. 206 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; 207 208 // Region pressure cache. 209 SmallVector<GCNRegPressure, 32> Pressure; 210 211 // Temporary basic block live-in cache. 212 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; 213 214 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; 215 216 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const; 217 218 // Return current region pressure. 219 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; 220 221 // Compute and cache live-ins and pressure for all regions in block. 222 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); 223 224 // Update region boundaries when removing MI or inserting NewMI before MI. 225 void updateRegionBoundaries( 226 SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 227 MachineBasicBlock::iterator>> &RegionBoundaries, 228 MachineBasicBlock::iterator MI, MachineInstr *NewMI, 229 bool Removing = false); 230 231 void runSchedStages(); 232 233 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID); 234 235 public: 236 GCNScheduleDAGMILive(MachineSchedContext *C, 237 std::unique_ptr<MachineSchedStrategy> S); 238 239 void schedule() override; 240 241 void finalizeSchedule() override; 242 }; 243 244 // GCNSchedStrategy applies multiple scheduling stages to a function. 245 class GCNSchedStage { 246 protected: 247 GCNScheduleDAGMILive &DAG; 248 249 GCNSchedStrategy &S; 250 251 MachineFunction &MF; 252 253 SIMachineFunctionInfo &MFI; 254 255 const GCNSubtarget &ST; 256 257 const GCNSchedStageID StageID; 258 259 // The current block being scheduled. 260 MachineBasicBlock *CurrentMBB = nullptr; 261 262 // Current region index. 263 unsigned RegionIdx = 0; 264 265 // Record the original order of instructions before scheduling. 266 std::vector<MachineInstr *> Unsched; 267 268 // RP before scheduling the current region. 269 GCNRegPressure PressureBefore; 270 271 // RP after scheduling the current region. 272 GCNRegPressure PressureAfter; 273 274 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 275 276 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); 277 278 public: 279 // Initialize state for a scheduling stage. Returns false if the current stage 280 // should be skipped. 281 virtual bool initGCNSchedStage(); 282 283 // Finalize state after finishing a scheduling pass on the function. 284 virtual void finalizeGCNSchedStage(); 285 286 // Setup for scheduling a region. Returns false if the current region should 287 // be skipped. 288 virtual bool initGCNRegion(); 289 290 // Track whether a new region is also a new MBB. 291 void setupNewBlock(); 292 293 // Finalize state after scheudling a region. 294 void finalizeGCNRegion(); 295 296 // Check result of scheduling. 297 void checkScheduling(); 298 299 // computes the given schedule virtual execution time in clocks 300 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule); 301 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG); 302 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, 303 DenseMap<unsigned, unsigned> &ReadyCycles, 304 const TargetSchedModel &SM); 305 306 // Returns true if scheduling should be reverted. 307 virtual bool shouldRevertScheduling(unsigned WavesAfter); 308 309 // Returns true if current region has known excess pressure. isRegionWithExcessRP()310 bool isRegionWithExcessRP() const { 311 return DAG.RegionsWithExcessRP[RegionIdx]; 312 } 313 314 // Returns true if the new schedule may result in more spilling. 315 bool mayCauseSpilling(unsigned WavesAfter); 316 317 // Attempt to revert scheduling for this region. 318 void revertScheduling(); 319 advanceRegion()320 void advanceRegion() { RegionIdx++; } 321 322 virtual ~GCNSchedStage() = default; 323 }; 324 325 class OccInitialScheduleStage : public GCNSchedStage { 326 public: 327 bool shouldRevertScheduling(unsigned WavesAfter) override; 328 OccInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)329 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 330 : GCNSchedStage(StageID, DAG) {} 331 }; 332 333 class UnclusteredHighRPStage : public GCNSchedStage { 334 private: 335 // Save the initial occupancy before starting this stage. 336 unsigned InitialOccupancy; 337 338 public: 339 bool initGCNSchedStage() override; 340 341 void finalizeGCNSchedStage() override; 342 343 bool initGCNRegion() override; 344 345 bool shouldRevertScheduling(unsigned WavesAfter) override; 346 UnclusteredHighRPStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)347 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 348 : GCNSchedStage(StageID, DAG) {} 349 }; 350 351 // Retry function scheduling if we found resulting occupancy and it is 352 // lower than used for other scheduling passes. This will give more freedom 353 // to schedule low register pressure blocks. 354 class ClusteredLowOccStage : public GCNSchedStage { 355 public: 356 bool initGCNSchedStage() override; 357 358 bool initGCNRegion() override; 359 360 bool shouldRevertScheduling(unsigned WavesAfter) override; 361 ClusteredLowOccStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)362 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 363 : GCNSchedStage(StageID, DAG) {} 364 }; 365 366 class PreRARematStage : public GCNSchedStage { 367 private: 368 // Each region at MinOccupancy will have their own list of trivially 369 // rematerializable instructions we can remat to reduce RP. The list maps an 370 // instruction to the position we should remat before, usually the MI using 371 // the rematerializable instruction. 372 MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>> 373 RematerializableInsts; 374 375 // Map a trivially rematerializable def to a list of regions at MinOccupancy 376 // that has the defined reg as a live-in. 377 DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions; 378 379 // Collect all trivially rematerializable VGPR instructions with a single def 380 // and single use outside the defining block into RematerializableInsts. 381 void collectRematerializableInstructions(); 382 383 bool isTriviallyReMaterializable(const MachineInstr &MI); 384 385 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs 386 // Attempt to reduce RP of VGPR by sinking trivially rematerializable 387 // instructions. Returns true if we were able to sink instruction(s). 388 bool sinkTriviallyRematInsts(const GCNSubtarget &ST, 389 const TargetInstrInfo *TII); 390 391 public: 392 bool initGCNSchedStage() override; 393 394 bool initGCNRegion() override; 395 396 bool shouldRevertScheduling(unsigned WavesAfter) override; 397 PreRARematStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)398 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 399 : GCNSchedStage(StageID, DAG) {} 400 }; 401 402 class ILPInitialScheduleStage : public GCNSchedStage { 403 public: 404 bool shouldRevertScheduling(unsigned WavesAfter) override; 405 ILPInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)406 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 407 : GCNSchedStage(StageID, DAG) {} 408 }; 409 410 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { 411 private: 412 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 413 414 bool HasIGLPInstrs = false; 415 416 public: 417 void schedule() override; 418 419 void finalizeSchedule() override; 420 421 GCNPostScheduleDAGMILive(MachineSchedContext *C, 422 std::unique_ptr<MachineSchedStrategy> S, 423 bool RemoveKillFlags); 424 }; 425 426 } // End namespace llvm 427 428 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 429