1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 15 16 #include "GCNRegPressure.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 20 namespace llvm { 21 22 class SIMachineFunctionInfo; 23 class SIRegisterInfo; 24 class GCNSubtarget; 25 26 /// This is a minimal scheduler strategy. The main difference between this 27 /// and the GenericScheduler is that GCNSchedStrategy uses different 28 /// heuristics to determine excess/critical pressure sets. Its goal is to 29 /// maximize kernel occupancy (i.e. maximum number of waves per simd). 30 class GCNMaxOccupancySchedStrategy final : public GenericScheduler { 31 SUnit *pickNodeBidirectional(bool &IsTopNode); 32 33 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, 34 const RegPressureTracker &RPTracker, 35 SchedCandidate &Cand); 36 37 void initCandidate(SchedCandidate &Cand, SUnit *SU, 38 bool AtTop, const RegPressureTracker &RPTracker, 39 const SIRegisterInfo *SRI, 40 unsigned SGPRPressure, unsigned VGPRPressure); 41 42 std::vector<unsigned> Pressure; 43 44 std::vector<unsigned> MaxPressure; 45 46 unsigned SGPRExcessLimit; 47 48 unsigned VGPRExcessLimit; 49 50 unsigned TargetOccupancy; 51 52 MachineFunction *MF; 53 54 public: 55 // schedule() have seen a clustered memory operation. Set it to false 56 // before a region scheduling to know if the region had such clusters. 57 bool HasClusteredNodes; 58 59 // schedule() have seen an excess register pressure and had to track 60 // register pressure for actual scheduling heuristics. 61 bool HasExcessPressure; 62 63 unsigned SGPRCriticalLimit; 64 65 unsigned VGPRCriticalLimit; 66 67 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); 68 69 SUnit *pickNode(bool &IsTopNode) override; 70 71 void initialize(ScheduleDAGMI *DAG) override; 72 73 unsigned getTargetOccupancy() { return TargetOccupancy; } 74 75 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } 76 }; 77 78 enum class GCNSchedStageID : unsigned { 79 InitialSchedule = 0, 80 UnclusteredReschedule = 1, 81 ClusteredLowOccupancyReschedule = 2, 82 PreRARematerialize = 3, 83 LastStage = PreRARematerialize 84 }; 85 86 #ifndef NDEBUG 87 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); 88 #endif 89 90 inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) { 91 assert(Stage != GCNSchedStageID::PreRARematerialize); 92 Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1); 93 return Stage; 94 } 95 96 inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) { 97 return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1); 98 } 99 100 inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) { 101 return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS); 102 } 103 104 class GCNScheduleDAGMILive final : public ScheduleDAGMILive { 105 friend class GCNSchedStage; 106 friend class InitialScheduleStage; 107 friend class UnclusteredRescheduleStage; 108 friend class ClusteredLowOccStage; 109 friend class PreRARematStage; 110 111 const GCNSubtarget &ST; 112 113 SIMachineFunctionInfo &MFI; 114 115 // Occupancy target at the beginning of function scheduling cycle. 116 unsigned StartingOccupancy; 117 118 // Minimal real occupancy recorder for the function. 119 unsigned MinOccupancy; 120 121 // Vector of regions recorder for later rescheduling 122 SmallVector<std::pair<MachineBasicBlock::iterator, 123 MachineBasicBlock::iterator>, 32> Regions; 124 125 // Records if a region is not yet scheduled, or schedule has been reverted, 126 // or we generally desire to reschedule it. 127 BitVector RescheduleRegions; 128 129 // Record regions which use clustered loads/stores. 130 BitVector RegionsWithClusters; 131 132 // Record regions with high register pressure. 133 BitVector RegionsWithHighRP; 134 135 // Regions that has the same occupancy as the latest MinOccupancy 136 BitVector RegionsWithMinOcc; 137 138 // Region live-in cache. 139 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; 140 141 // Region pressure cache. 142 SmallVector<GCNRegPressure, 32> Pressure; 143 144 // Temporary basic block live-in cache. 145 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; 146 147 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; 148 149 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const; 150 151 // Return current region pressure. 152 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; 153 154 // Compute and cache live-ins and pressure for all regions in block. 155 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); 156 157 // Update region boundaries when removing MI or inserting NewMI before MI. 158 void updateRegionBoundaries( 159 SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 160 MachineBasicBlock::iterator>> &RegionBoundaries, 161 MachineBasicBlock::iterator MI, MachineInstr *NewMI, 162 bool Removing = false); 163 164 void runSchedStages(); 165 166 public: 167 GCNScheduleDAGMILive(MachineSchedContext *C, 168 std::unique_ptr<MachineSchedStrategy> S); 169 170 void schedule() override; 171 172 void finalizeSchedule() override; 173 }; 174 175 // GCNSchedStrategy applies multiple scheduling stages to a function. 176 class GCNSchedStage { 177 protected: 178 GCNScheduleDAGMILive &DAG; 179 180 GCNMaxOccupancySchedStrategy &S; 181 182 MachineFunction &MF; 183 184 SIMachineFunctionInfo &MFI; 185 186 const GCNSubtarget &ST; 187 188 const GCNSchedStageID StageID; 189 190 // The current block being scheduled. 191 MachineBasicBlock *CurrentMBB = nullptr; 192 193 // Current region index. 194 unsigned RegionIdx = 0; 195 196 // Record the original order of instructions before scheduling. 197 std::vector<MachineInstr *> Unsched; 198 199 // RP before scheduling the current region. 200 GCNRegPressure PressureBefore; 201 202 // RP after scheduling the current region. 203 GCNRegPressure PressureAfter; 204 205 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); 206 207 public: 208 // Initialize state for a scheduling stage. Returns false if the current stage 209 // should be skipped. 210 virtual bool initGCNSchedStage(); 211 212 // Finalize state after finishing a scheduling pass on the function. 213 virtual void finalizeGCNSchedStage(); 214 215 // Setup for scheduling a region. Returns false if the current region should 216 // be skipped. 217 virtual bool initGCNRegion(); 218 219 // Track whether a new region is also a new MBB. 220 void setupNewBlock(); 221 222 // Finalize state after scheudling a region. 223 virtual void finalizeGCNRegion(); 224 225 // Check result of scheduling. 226 void checkScheduling(); 227 228 // Returns true if scheduling should be reverted. 229 virtual bool shouldRevertScheduling(unsigned WavesAfter); 230 231 // Returns true if the new schedule may result in more spilling. 232 bool mayCauseSpilling(unsigned WavesAfter); 233 234 // Attempt to revert scheduling for this region. 235 void revertScheduling(); 236 237 void advanceRegion() { RegionIdx++; } 238 239 virtual ~GCNSchedStage() = default; 240 }; 241 242 class InitialScheduleStage : public GCNSchedStage { 243 public: 244 void finalizeGCNRegion() override; 245 246 bool shouldRevertScheduling(unsigned WavesAfter) override; 247 248 InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 249 : GCNSchedStage(StageID, DAG) {} 250 }; 251 252 class UnclusteredRescheduleStage : public GCNSchedStage { 253 private: 254 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 255 256 public: 257 bool initGCNSchedStage() override; 258 259 void finalizeGCNSchedStage() override; 260 261 bool initGCNRegion() override; 262 263 bool shouldRevertScheduling(unsigned WavesAfter) override; 264 265 UnclusteredRescheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 266 : GCNSchedStage(StageID, DAG) {} 267 }; 268 269 // Retry function scheduling if we found resulting occupancy and it is 270 // lower than used for other scheduling passes. This will give more freedom 271 // to schedule low register pressure blocks. 272 class ClusteredLowOccStage : public GCNSchedStage { 273 public: 274 bool initGCNSchedStage() override; 275 276 bool initGCNRegion() override; 277 278 bool shouldRevertScheduling(unsigned WavesAfter) override; 279 280 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 281 : GCNSchedStage(StageID, DAG) {} 282 }; 283 284 class PreRARematStage : public GCNSchedStage { 285 private: 286 // Each region at MinOccupancy will have their own list of trivially 287 // rematerializable instructions we can remat to reduce RP. The list maps an 288 // instruction to the position we should remat before, usually the MI using 289 // the rematerializable instruction. 290 MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>> 291 RematerializableInsts; 292 293 // Map a trivially remateriazable def to a list of regions at MinOccupancy 294 // that has the defined reg as a live-in. 295 DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions; 296 297 // Collect all trivially rematerializable VGPR instructions with a single def 298 // and single use outside the defining block into RematerializableInsts. 299 void collectRematerializableInstructions(); 300 301 bool isTriviallyReMaterializable(const MachineInstr &MI); 302 303 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs 304 // Attempt to reduce RP of VGPR by sinking trivially rematerializable 305 // instructions. Returns true if we were able to sink instruction(s). 306 bool sinkTriviallyRematInsts(const GCNSubtarget &ST, 307 const TargetInstrInfo *TII); 308 309 public: 310 bool initGCNSchedStage() override; 311 312 bool initGCNRegion() override; 313 314 bool shouldRevertScheduling(unsigned WavesAfter) override; 315 316 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 317 : GCNSchedStage(StageID, DAG) {} 318 }; 319 320 } // End namespace llvm 321 322 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 323