xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15 
16 #include "GCNRegPressure.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineScheduler.h"
21 
22 namespace llvm {
23 
24 class SIMachineFunctionInfo;
25 class SIRegisterInfo;
26 class GCNSubtarget;
27 class GCNSchedStage;
28 
29 enum class GCNSchedStageID : unsigned {
30   OccInitialSchedule = 0,
31   UnclusteredHighRPReschedule = 1,
32   ClusteredLowOccupancyReschedule = 2,
33   PreRARematerialize = 3,
34   ILPInitialSchedule = 4,
35   MemoryClauseInitialSchedule = 5
36 };
37 
38 #ifndef NDEBUG
39 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
40 #endif
41 
42 /// This is a minimal scheduler strategy.  The main difference between this
43 /// and the GenericScheduler is that GCNSchedStrategy uses different
44 /// heuristics to determine excess/critical pressure sets.
45 class GCNSchedStrategy : public GenericScheduler {
46 protected:
47   SUnit *pickNodeBidirectional(bool &IsTopNode);
48 
49   void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
50                          const RegPressureTracker &RPTracker,
51                          SchedCandidate &Cand, bool IsBottomUp);
52 
53   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
54                      const RegPressureTracker &RPTracker,
55                      const SIRegisterInfo *SRI, unsigned SGPRPressure,
56                      unsigned VGPRPressure, bool IsBottomUp);
57 
58   std::vector<unsigned> Pressure;
59 
60   std::vector<unsigned> MaxPressure;
61 
62   unsigned SGPRExcessLimit;
63 
64   unsigned VGPRExcessLimit;
65 
66   unsigned TargetOccupancy;
67 
68   MachineFunction *MF;
69 
70   // Scheduling stages for this strategy.
71   SmallVector<GCNSchedStageID, 4> SchedStages;
72 
73   // Pointer to the current SchedStageID.
74   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
75 
76   // GCN RP Tracker for top-down scheduling
77   mutable GCNDownwardRPTracker DownwardTracker;
78 
79   // GCN RP Tracker for botttom-up scheduling
80   mutable GCNUpwardRPTracker UpwardTracker;
81 
82 public:
83   // schedule() have seen register pressure over the critical limits and had to
84   // track register pressure for actual scheduling heuristics.
85   bool HasHighPressure;
86 
87   // Schedule known to have excess register pressure. Be more conservative in
88   // increasing ILP and preserving VGPRs.
89   bool KnownExcessRP = false;
90 
91   // An error margin is necessary because of poor performance of the generic RP
92   // tracker and can be adjusted up for tuning heuristics to try and more
93   // aggressively reduce register pressure.
94   unsigned ErrorMargin = 3;
95 
96   // Bias for SGPR limits under a high register pressure.
97   const unsigned HighRPSGPRBias = 7;
98 
99   // Bias for VGPR limits under a high register pressure.
100   const unsigned HighRPVGPRBias = 7;
101 
102   unsigned SGPRCriticalLimit;
103 
104   unsigned VGPRCriticalLimit;
105 
106   unsigned SGPRLimitBias = 0;
107 
108   unsigned VGPRLimitBias = 0;
109 
110   GCNSchedStrategy(const MachineSchedContext *C);
111 
112   SUnit *pickNode(bool &IsTopNode) override;
113 
114   void schedNode(SUnit *SU, bool IsTopNode) override;
115 
116   void initialize(ScheduleDAGMI *DAG) override;
117 
getTargetOccupancy()118   unsigned getTargetOccupancy() { return TargetOccupancy; }
119 
setTargetOccupancy(unsigned Occ)120   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
121 
122   GCNSchedStageID getCurrentStage();
123 
124   // Advances stage. Returns true if there are remaining stages.
125   bool advanceStage();
126 
127   bool hasNextStage() const;
128 
129   GCNSchedStageID getNextStage() const;
130 
getDownwardTracker()131   GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
132 
getUpwardTracker()133   GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
134 };
135 
136 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
137 /// maximum number of waves per simd).
138 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
139 public:
140   GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
141                                bool IsLegacyScheduler = false);
142 };
143 
144 /// The goal of this scheduling strategy is to maximize ILP for a single wave
145 /// (i.e. latency hiding).
146 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
147 protected:
148   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
149                     SchedBoundary *Zone) const override;
150 
151 public:
152   GCNMaxILPSchedStrategy(const MachineSchedContext *C);
153 };
154 
155 /// The goal of this scheduling strategy is to maximize memory clause for a
156 /// single wave.
157 class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
158 protected:
159   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
160                     SchedBoundary *Zone) const override;
161 
162 public:
163   GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
164 };
165 
166 class ScheduleMetrics {
167   unsigned ScheduleLength;
168   unsigned BubbleCycles;
169 
170 public:
ScheduleMetrics()171   ScheduleMetrics() {}
ScheduleMetrics(unsigned L,unsigned BC)172   ScheduleMetrics(unsigned L, unsigned BC)
173       : ScheduleLength(L), BubbleCycles(BC) {}
getLength()174   unsigned getLength() const { return ScheduleLength; }
getBubbles()175   unsigned getBubbles() const { return BubbleCycles; }
getMetric()176   unsigned getMetric() const {
177     unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
178     // Metric is zero if the amount of bubbles is less than 1% which is too
179     // small. So, return 1.
180     return Metric ? Metric : 1;
181   }
182   static const unsigned ScaleFactor;
183 };
184 
185 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
186   dbgs() << "\n Schedule Metric (scaled by "
187          << ScheduleMetrics::ScaleFactor
188          << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
189          << Sm.getLength() << " ]\n";
190   return OS;
191 }
192 
193 class GCNScheduleDAGMILive;
194 class RegionPressureMap {
195   GCNScheduleDAGMILive *DAG;
196   // The live in/out pressure as indexed by the first or last MI in the region
197   // before scheduling.
198   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
199   // The mapping of RegionIDx to key instruction
200   DenseMap<unsigned, MachineInstr *> IdxToInstruction;
201   // Whether we are calculating LiveOuts or LiveIns
202   bool IsLiveOut;
203 
204 public:
RegionPressureMap()205   RegionPressureMap() {}
RegionPressureMap(GCNScheduleDAGMILive * GCNDAG,bool LiveOut)206   RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
207       : DAG(GCNDAG), IsLiveOut(LiveOut) {}
208   // Build the Instr->LiveReg and RegionIdx->Instr maps
209   void buildLiveRegMap();
210 
211   // Retrieve the LiveReg for a given RegionIdx
getLiveRegsForRegionIdx(unsigned RegionIdx)212   GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
213     assert(IdxToInstruction.contains(RegionIdx));
214     MachineInstr *Key = IdxToInstruction[RegionIdx];
215     return RegionLiveRegMap[Key];
216   }
217 };
218 
219 /// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
220 /// boundary is inclusive, the upper boundary is exclusive.
221 using RegionBoundaries =
222     std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
223 
224 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
225   friend class GCNSchedStage;
226   friend class OccInitialScheduleStage;
227   friend class UnclusteredHighRPStage;
228   friend class ClusteredLowOccStage;
229   friend class PreRARematStage;
230   friend class ILPInitialScheduleStage;
231   friend class RegionPressureMap;
232 
233   const GCNSubtarget &ST;
234 
235   SIMachineFunctionInfo &MFI;
236 
237   // Occupancy target at the beginning of function scheduling cycle.
238   unsigned StartingOccupancy;
239 
240   // Minimal real occupancy recorder for the function.
241   unsigned MinOccupancy;
242 
243   // Vector of regions recorder for later rescheduling
244   SmallVector<RegionBoundaries, 32> Regions;
245 
246   // Record regions with high register pressure.
247   BitVector RegionsWithHighRP;
248 
249   // Record regions with excess register pressure over the physical register
250   // limit. Register pressure in these regions usually will result in spilling.
251   BitVector RegionsWithExcessRP;
252 
253   // Regions that has the same occupancy as the latest MinOccupancy
254   BitVector RegionsWithMinOcc;
255 
256   // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
257   BitVector RegionsWithIGLPInstrs;
258 
259   // Region live-in cache.
260   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
261 
262   // Region pressure cache.
263   SmallVector<GCNRegPressure, 32> Pressure;
264 
265   // Temporary basic block live-in cache.
266   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
267 
268   // The map of the initial first region instruction to region live in registers
269   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
270 
271   // Calculate the map of the initial first region instruction to region live in
272   // registers
273   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
274 
275   // Calculate the map of the initial last region instruction to region live out
276   // registers
277   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
278   getRegionLiveOutMap() const;
279 
280   // The live out registers per region. These are internally stored as a map of
281   // the initial last region instruction to region live out registers, but can
282   // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
283   RegionPressureMap RegionLiveOuts;
284 
285   // Return current region pressure.
286   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
287 
288   // Compute and cache live-ins and pressure for all regions in block.
289   void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
290 
291   /// If necessary, updates a region's boundaries following insertion ( \p NewMI
292   /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
293   /// For an MI removal, this must be called before the MI is actually erased
294   /// from its parent MBB.
295   void updateRegionBoundaries(RegionBoundaries &RegionBounds,
296                               MachineBasicBlock::iterator MI,
297                               MachineInstr *NewMI);
298 
299   void runSchedStages();
300 
301   std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
302 
303 public:
304   GCNScheduleDAGMILive(MachineSchedContext *C,
305                        std::unique_ptr<MachineSchedStrategy> S);
306 
307   void schedule() override;
308 
309   void finalizeSchedule() override;
310 };
311 
312 // GCNSchedStrategy applies multiple scheduling stages to a function.
313 class GCNSchedStage {
314 protected:
315   GCNScheduleDAGMILive &DAG;
316 
317   GCNSchedStrategy &S;
318 
319   MachineFunction &MF;
320 
321   SIMachineFunctionInfo &MFI;
322 
323   const GCNSubtarget &ST;
324 
325   const GCNSchedStageID StageID;
326 
327   // The current block being scheduled.
328   MachineBasicBlock *CurrentMBB = nullptr;
329 
330   // Current region index.
331   unsigned RegionIdx = 0;
332 
333   // Record the original order of instructions before scheduling.
334   std::vector<MachineInstr *> Unsched;
335 
336   // RP before scheduling the current region.
337   GCNRegPressure PressureBefore;
338 
339   // RP after scheduling the current region.
340   GCNRegPressure PressureAfter;
341 
342   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
343 
344   GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
345 
346 public:
347   // Initialize state for a scheduling stage. Returns false if the current stage
348   // should be skipped.
349   virtual bool initGCNSchedStage();
350 
351   // Finalize state after finishing a scheduling pass on the function.
352   virtual void finalizeGCNSchedStage();
353 
354   // Setup for scheduling a region. Returns false if the current region should
355   // be skipped.
356   virtual bool initGCNRegion();
357 
358   // Track whether a new region is also a new MBB.
359   void setupNewBlock();
360 
361   // Finalize state after scheudling a region.
362   void finalizeGCNRegion();
363 
364   // Check result of scheduling.
365   void checkScheduling();
366 
367   // computes the given schedule virtual execution time in clocks
368   ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
369   ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
370   unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
371                                   DenseMap<unsigned, unsigned> &ReadyCycles,
372                                   const TargetSchedModel &SM);
373 
374   // Returns true if scheduling should be reverted.
375   virtual bool shouldRevertScheduling(unsigned WavesAfter);
376 
377   // Returns true if current region has known excess pressure.
isRegionWithExcessRP()378   bool isRegionWithExcessRP() const {
379     return DAG.RegionsWithExcessRP[RegionIdx];
380   }
381 
382   // The region number this stage is currently working on
getRegionIdx()383   unsigned getRegionIdx() { return RegionIdx; }
384 
385   // Returns true if the new schedule may result in more spilling.
386   bool mayCauseSpilling(unsigned WavesAfter);
387 
388   // Attempt to revert scheduling for this region.
389   void revertScheduling();
390 
advanceRegion()391   void advanceRegion() { RegionIdx++; }
392 
393   virtual ~GCNSchedStage() = default;
394 };
395 
396 class OccInitialScheduleStage : public GCNSchedStage {
397 public:
398   bool shouldRevertScheduling(unsigned WavesAfter) override;
399 
OccInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)400   OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
401       : GCNSchedStage(StageID, DAG) {}
402 };
403 
404 class UnclusteredHighRPStage : public GCNSchedStage {
405 private:
406   // Save the initial occupancy before starting this stage.
407   unsigned InitialOccupancy;
408 
409 public:
410   bool initGCNSchedStage() override;
411 
412   void finalizeGCNSchedStage() override;
413 
414   bool initGCNRegion() override;
415 
416   bool shouldRevertScheduling(unsigned WavesAfter) override;
417 
UnclusteredHighRPStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)418   UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
419       : GCNSchedStage(StageID, DAG) {}
420 };
421 
422 // Retry function scheduling if we found resulting occupancy and it is
423 // lower than used for other scheduling passes. This will give more freedom
424 // to schedule low register pressure blocks.
425 class ClusteredLowOccStage : public GCNSchedStage {
426 public:
427   bool initGCNSchedStage() override;
428 
429   bool initGCNRegion() override;
430 
431   bool shouldRevertScheduling(unsigned WavesAfter) override;
432 
ClusteredLowOccStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)433   ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
434       : GCNSchedStage(StageID, DAG) {}
435 };
436 
437 /// Attempts to reduce function spilling or, if there is no spilling, to
438 /// increase function occupancy by one with respect to ArchVGPR usage by sinking
439 /// trivially rematerializable instructions to their use. When the stage
440 /// estimates reducing spilling or increasing occupancy is possible, as few
441 /// instructions as possible are rematerialized to reduce potential negative
442 /// effects on function latency.
443 class PreRARematStage : public GCNSchedStage {
444 private:
445   /// Useful information about a rematerializable instruction.
446   struct RematInstruction {
447     /// Single use of the rematerializable instruction's defined register,
448     /// located in a different block.
449     MachineInstr *UseMI;
450     /// Rematerialized version of \p DefMI, set in
451     /// PreRARematStage::rematerialize. Used for reverting rematerializations.
452     MachineInstr *RematMI;
453     /// Set of regions in which the rematerializable instruction's defined
454     /// register is a live-in.
455     SmallDenseSet<unsigned, 4> LiveInRegions;
456 
RematInstructionRematInstruction457     RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
458   };
459 
460   /// Maps all MIs to their parent region. MI terminators are considered to be
461   /// outside the region they delimitate, and as such are not stored in the map.
462   DenseMap<MachineInstr *, unsigned> MIRegion;
463   /// Parent MBB to each region, in region order.
464   SmallVector<MachineBasicBlock *> RegionBB;
465   /// Collects instructions to rematerialize.
466   MapVector<MachineInstr *, RematInstruction> Rematerializations;
467   /// Collects regions whose live-ins or register pressure will change due to
468   /// rematerializations.
469   DenseMap<unsigned, GCNRegPressure> ImpactedRegions;
470   /// In case we need to rollback rematerializations, save lane masks for all
471   /// rematerialized registers in all regions in which they are live-ins.
472   DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks;
473   /// After successful stage initialization, indicates which regions should be
474   /// rescheduled.
475   BitVector RescheduleRegions;
476   /// Target occupancy the stage estimates is reachable through
477   /// rematerialization. Greater than or equal to the pre-stage min occupancy.
478   unsigned TargetOcc;
479   /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
480   /// Smaller than or equal to the target occupancy.
481   unsigned AchievedOcc;
482   /// Whether the stage is attempting to increase occupancy in the abscence of
483   /// spilling.
484   bool IncreaseOccupancy;
485 
486   /// Returns whether remat can reduce spilling or increase function occupancy
487   /// by 1 through rematerialization. If it can do one, collects instructions in
488   /// PreRARematStage::Rematerializations and sets the target occupancy in
489   /// PreRARematStage::TargetOccupancy.
490   bool canIncreaseOccupancyOrReduceSpill();
491 
492   /// Whether the MI is trivially rematerializable and does not have any virtual
493   /// register use.
494   bool isTriviallyReMaterializable(const MachineInstr &MI);
495 
496   /// Rematerializes all instructions in PreRARematStage::Rematerializations
497   /// and stores the achieved occupancy after remat in
498   /// PreRARematStage::AchievedOcc.
499   void rematerialize();
500 
501   /// If remat alone did not increase occupancy to the target one, rollbacks all
502   /// rematerializations and resets live-ins/RP in all regions impacted by the
503   /// stage to their pre-stage values.
504   void finalizeGCNSchedStage() override;
505 
506   /// \p Returns true if all the uses in \p InstToRemat defined at \p
507   /// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
508   /// reg uses.
509   bool allUsesAvailableAt(const MachineInstr *InstToRemat,
510                           SlotIndex OriginalIdx, SlotIndex RematIdx) const;
511 
512 public:
513   bool initGCNSchedStage() override;
514 
515   bool initGCNRegion() override;
516 
517   bool shouldRevertScheduling(unsigned WavesAfter) override;
518 
PreRARematStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)519   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
520       : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {}
521 };
522 
523 class ILPInitialScheduleStage : public GCNSchedStage {
524 public:
525   bool shouldRevertScheduling(unsigned WavesAfter) override;
526 
ILPInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)527   ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
528       : GCNSchedStage(StageID, DAG) {}
529 };
530 
531 class MemoryClauseInitialScheduleStage : public GCNSchedStage {
532 public:
533   bool shouldRevertScheduling(unsigned WavesAfter) override;
534 
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)535   MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
536                                    GCNScheduleDAGMILive &DAG)
537       : GCNSchedStage(StageID, DAG) {}
538 };
539 
540 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
541 private:
542   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
543 
544   bool HasIGLPInstrs = false;
545 
546 public:
547   void schedule() override;
548 
549   void finalizeSchedule() override;
550 
551   GCNPostScheduleDAGMILive(MachineSchedContext *C,
552                            std::unique_ptr<MachineSchedStrategy> S,
553                            bool RemoveKillFlags);
554 };
555 
556 } // End namespace llvm
557 
558 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
559