xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (revision 972a253a57b6f144b0e4a3e2080a2a0076ec55a0)
10b57cec5SDimitry Andric //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
140b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "GCNRegPressure.h"
1781ad6265SDimitry Andric #include "llvm/ADT/MapVector.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric namespace llvm {
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric class SIMachineFunctionInfo;
230b57cec5SDimitry Andric class SIRegisterInfo;
240b57cec5SDimitry Andric class GCNSubtarget;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric /// This is a minimal scheduler strategy.  The main difference between this
270b57cec5SDimitry Andric /// and the GenericScheduler is that GCNSchedStrategy uses different
280b57cec5SDimitry Andric /// heuristics to determine excess/critical pressure sets.  Its goal is to
290b57cec5SDimitry Andric /// maximize kernel occupancy (i.e. maximum number of waves per simd).
300b57cec5SDimitry Andric class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
310b57cec5SDimitry Andric   SUnit *pickNodeBidirectional(bool &IsTopNode);
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
340b57cec5SDimitry Andric                          const RegPressureTracker &RPTracker,
350b57cec5SDimitry Andric                          SchedCandidate &Cand);
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric   void initCandidate(SchedCandidate &Cand, SUnit *SU,
380b57cec5SDimitry Andric                      bool AtTop, const RegPressureTracker &RPTracker,
390b57cec5SDimitry Andric                      const SIRegisterInfo *SRI,
400b57cec5SDimitry Andric                      unsigned SGPRPressure, unsigned VGPRPressure);
410b57cec5SDimitry Andric 
428bcb0991SDimitry Andric   std::vector<unsigned> Pressure;
43*972a253aSDimitry Andric 
448bcb0991SDimitry Andric   std::vector<unsigned> MaxPressure;
458bcb0991SDimitry Andric 
460b57cec5SDimitry Andric   unsigned SGPRExcessLimit;
47*972a253aSDimitry Andric 
480b57cec5SDimitry Andric   unsigned VGPRExcessLimit;
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   unsigned TargetOccupancy;
510b57cec5SDimitry Andric 
52*972a253aSDimitry Andric   MachineFunction *MF;
53*972a253aSDimitry Andric 
54*972a253aSDimitry Andric public:
55fe6060f1SDimitry Andric   // schedule() have seen a clustered memory operation. Set it to false
56fe6060f1SDimitry Andric   // before a region scheduling to know if the region had such clusters.
57fe6060f1SDimitry Andric   bool HasClusteredNodes;
58fe6060f1SDimitry Andric 
59349cc55cSDimitry Andric   // schedule() have seen an excess register pressure and had to track
60fe6060f1SDimitry Andric   // register pressure for actual scheduling heuristics.
61fe6060f1SDimitry Andric   bool HasExcessPressure;
62fe6060f1SDimitry Andric 
63*972a253aSDimitry Andric   unsigned SGPRCriticalLimit;
640b57cec5SDimitry Andric 
65*972a253aSDimitry Andric   unsigned VGPRCriticalLimit;
66*972a253aSDimitry Andric 
670b57cec5SDimitry Andric   GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric   SUnit *pickNode(bool &IsTopNode) override;
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   void initialize(ScheduleDAGMI *DAG) override;
720b57cec5SDimitry Andric 
73*972a253aSDimitry Andric   unsigned getTargetOccupancy() { return TargetOccupancy; }
74*972a253aSDimitry Andric 
750b57cec5SDimitry Andric   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
760b57cec5SDimitry Andric };
770b57cec5SDimitry Andric 
78*972a253aSDimitry Andric enum class GCNSchedStageID : unsigned {
79*972a253aSDimitry Andric   InitialSchedule = 0,
80*972a253aSDimitry Andric   UnclusteredReschedule = 1,
81*972a253aSDimitry Andric   ClusteredLowOccupancyReschedule = 2,
82*972a253aSDimitry Andric   PreRARematerialize = 3,
8381ad6265SDimitry Andric   LastStage = PreRARematerialize
845ffd83dbSDimitry Andric };
855ffd83dbSDimitry Andric 
86*972a253aSDimitry Andric #ifndef NDEBUG
87*972a253aSDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
88*972a253aSDimitry Andric #endif
89*972a253aSDimitry Andric 
90*972a253aSDimitry Andric inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) {
91*972a253aSDimitry Andric   assert(Stage != GCNSchedStageID::PreRARematerialize);
92*972a253aSDimitry Andric   Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
93*972a253aSDimitry Andric   return Stage;
94*972a253aSDimitry Andric }
95*972a253aSDimitry Andric 
96*972a253aSDimitry Andric inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) {
97*972a253aSDimitry Andric   return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
98*972a253aSDimitry Andric }
99*972a253aSDimitry Andric 
100*972a253aSDimitry Andric inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) {
101*972a253aSDimitry Andric   return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS);
102*972a253aSDimitry Andric }
103*972a253aSDimitry Andric 
104*972a253aSDimitry Andric class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
105*972a253aSDimitry Andric   friend class GCNSchedStage;
106*972a253aSDimitry Andric   friend class InitialScheduleStage;
107*972a253aSDimitry Andric   friend class UnclusteredRescheduleStage;
108*972a253aSDimitry Andric   friend class ClusteredLowOccStage;
109*972a253aSDimitry Andric   friend class PreRARematStage;
110*972a253aSDimitry Andric 
1110b57cec5SDimitry Andric   const GCNSubtarget &ST;
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   SIMachineFunctionInfo &MFI;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   // Occupancy target at the beginning of function scheduling cycle.
1160b57cec5SDimitry Andric   unsigned StartingOccupancy;
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric   // Minimal real occupancy recorder for the function.
1190b57cec5SDimitry Andric   unsigned MinOccupancy;
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   // Vector of regions recorder for later rescheduling
1220b57cec5SDimitry Andric   SmallVector<std::pair<MachineBasicBlock::iterator,
1230b57cec5SDimitry Andric                         MachineBasicBlock::iterator>, 32> Regions;
1240b57cec5SDimitry Andric 
1255ffd83dbSDimitry Andric   // Records if a region is not yet scheduled, or schedule has been reverted,
1265ffd83dbSDimitry Andric   // or we generally desire to reschedule it.
1275ffd83dbSDimitry Andric   BitVector RescheduleRegions;
1285ffd83dbSDimitry Andric 
129fe6060f1SDimitry Andric   // Record regions which use clustered loads/stores.
130fe6060f1SDimitry Andric   BitVector RegionsWithClusters;
131fe6060f1SDimitry Andric 
132fe6060f1SDimitry Andric   // Record regions with high register pressure.
133fe6060f1SDimitry Andric   BitVector RegionsWithHighRP;
134fe6060f1SDimitry Andric 
13581ad6265SDimitry Andric   // Regions that has the same occupancy as the latest MinOccupancy
13681ad6265SDimitry Andric   BitVector RegionsWithMinOcc;
13781ad6265SDimitry Andric 
1380b57cec5SDimitry Andric   // Region live-in cache.
1390b57cec5SDimitry Andric   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   // Region pressure cache.
1420b57cec5SDimitry Andric   SmallVector<GCNRegPressure, 32> Pressure;
1430b57cec5SDimitry Andric 
144*972a253aSDimitry Andric   // Temporary basic block live-in cache.
145*972a253aSDimitry Andric   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
146*972a253aSDimitry Andric 
147*972a253aSDimitry Andric   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
148*972a253aSDimitry Andric 
149*972a253aSDimitry Andric   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
150*972a253aSDimitry Andric 
151*972a253aSDimitry Andric   // Return current region pressure.
152*972a253aSDimitry Andric   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
153*972a253aSDimitry Andric 
154*972a253aSDimitry Andric   // Compute and cache live-ins and pressure for all regions in block.
155*972a253aSDimitry Andric   void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
156*972a253aSDimitry Andric 
157*972a253aSDimitry Andric   // Update region boundaries when removing MI or inserting NewMI before MI.
158*972a253aSDimitry Andric   void updateRegionBoundaries(
159*972a253aSDimitry Andric       SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
160*972a253aSDimitry Andric                                 MachineBasicBlock::iterator>> &RegionBoundaries,
161*972a253aSDimitry Andric       MachineBasicBlock::iterator MI, MachineInstr *NewMI,
162*972a253aSDimitry Andric       bool Removing = false);
163*972a253aSDimitry Andric 
164*972a253aSDimitry Andric   void runSchedStages();
165*972a253aSDimitry Andric 
166*972a253aSDimitry Andric public:
167*972a253aSDimitry Andric   GCNScheduleDAGMILive(MachineSchedContext *C,
168*972a253aSDimitry Andric                        std::unique_ptr<MachineSchedStrategy> S);
169*972a253aSDimitry Andric 
170*972a253aSDimitry Andric   void schedule() override;
171*972a253aSDimitry Andric 
172*972a253aSDimitry Andric   void finalizeSchedule() override;
173*972a253aSDimitry Andric };
174*972a253aSDimitry Andric 
175*972a253aSDimitry Andric // GCNSchedStrategy applies multiple scheduling stages to a function.
176*972a253aSDimitry Andric class GCNSchedStage {
177*972a253aSDimitry Andric protected:
178*972a253aSDimitry Andric   GCNScheduleDAGMILive &DAG;
179*972a253aSDimitry Andric 
180*972a253aSDimitry Andric   GCNMaxOccupancySchedStrategy &S;
181*972a253aSDimitry Andric 
182*972a253aSDimitry Andric   MachineFunction &MF;
183*972a253aSDimitry Andric 
184*972a253aSDimitry Andric   SIMachineFunctionInfo &MFI;
185*972a253aSDimitry Andric 
186*972a253aSDimitry Andric   const GCNSubtarget &ST;
187*972a253aSDimitry Andric 
188*972a253aSDimitry Andric   const GCNSchedStageID StageID;
189*972a253aSDimitry Andric 
190*972a253aSDimitry Andric   // The current block being scheduled.
191*972a253aSDimitry Andric   MachineBasicBlock *CurrentMBB = nullptr;
192*972a253aSDimitry Andric 
193*972a253aSDimitry Andric   // Current region index.
194*972a253aSDimitry Andric   unsigned RegionIdx = 0;
195*972a253aSDimitry Andric 
196*972a253aSDimitry Andric   // Record the original order of instructions before scheduling.
197*972a253aSDimitry Andric   std::vector<MachineInstr *> Unsched;
198*972a253aSDimitry Andric 
199*972a253aSDimitry Andric   // RP before scheduling the current region.
200*972a253aSDimitry Andric   GCNRegPressure PressureBefore;
201*972a253aSDimitry Andric 
202*972a253aSDimitry Andric   // RP after scheduling the current region.
203*972a253aSDimitry Andric   GCNRegPressure PressureAfter;
204*972a253aSDimitry Andric 
205*972a253aSDimitry Andric   GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
206*972a253aSDimitry Andric 
207*972a253aSDimitry Andric public:
208*972a253aSDimitry Andric   // Initialize state for a scheduling stage. Returns false if the current stage
209*972a253aSDimitry Andric   // should be skipped.
210*972a253aSDimitry Andric   virtual bool initGCNSchedStage();
211*972a253aSDimitry Andric 
212*972a253aSDimitry Andric   // Finalize state after finishing a scheduling pass on the function.
213*972a253aSDimitry Andric   virtual void finalizeGCNSchedStage();
214*972a253aSDimitry Andric 
215*972a253aSDimitry Andric   // Setup for scheduling a region. Returns false if the current region should
216*972a253aSDimitry Andric   // be skipped.
217*972a253aSDimitry Andric   virtual bool initGCNRegion();
218*972a253aSDimitry Andric 
219*972a253aSDimitry Andric   // Track whether a new region is also a new MBB.
220*972a253aSDimitry Andric   void setupNewBlock();
221*972a253aSDimitry Andric 
222*972a253aSDimitry Andric   // Finalize state after scheudling a region.
223*972a253aSDimitry Andric   virtual void finalizeGCNRegion();
224*972a253aSDimitry Andric 
225*972a253aSDimitry Andric   // Check result of scheduling.
226*972a253aSDimitry Andric   void checkScheduling();
227*972a253aSDimitry Andric 
228*972a253aSDimitry Andric   // Returns true if scheduling should be reverted.
229*972a253aSDimitry Andric   virtual bool shouldRevertScheduling(unsigned WavesAfter);
230*972a253aSDimitry Andric 
231*972a253aSDimitry Andric   // Returns true if the new schedule may result in more spilling.
232*972a253aSDimitry Andric   bool mayCauseSpilling(unsigned WavesAfter);
233*972a253aSDimitry Andric 
234*972a253aSDimitry Andric   // Attempt to revert scheduling for this region.
235*972a253aSDimitry Andric   void revertScheduling();
236*972a253aSDimitry Andric 
237*972a253aSDimitry Andric   void advanceRegion() { RegionIdx++; }
238*972a253aSDimitry Andric 
239*972a253aSDimitry Andric   virtual ~GCNSchedStage() = default;
240*972a253aSDimitry Andric };
241*972a253aSDimitry Andric 
242*972a253aSDimitry Andric class InitialScheduleStage : public GCNSchedStage {
243*972a253aSDimitry Andric public:
244*972a253aSDimitry Andric   void finalizeGCNRegion() override;
245*972a253aSDimitry Andric 
246*972a253aSDimitry Andric   bool shouldRevertScheduling(unsigned WavesAfter) override;
247*972a253aSDimitry Andric 
248*972a253aSDimitry Andric   InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
249*972a253aSDimitry Andric       : GCNSchedStage(StageID, DAG) {}
250*972a253aSDimitry Andric };
251*972a253aSDimitry Andric 
252*972a253aSDimitry Andric class UnclusteredRescheduleStage : public GCNSchedStage {
253*972a253aSDimitry Andric private:
254*972a253aSDimitry Andric   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
255*972a253aSDimitry Andric 
256*972a253aSDimitry Andric public:
257*972a253aSDimitry Andric   bool initGCNSchedStage() override;
258*972a253aSDimitry Andric 
259*972a253aSDimitry Andric   void finalizeGCNSchedStage() override;
260*972a253aSDimitry Andric 
261*972a253aSDimitry Andric   bool initGCNRegion() override;
262*972a253aSDimitry Andric 
263*972a253aSDimitry Andric   bool shouldRevertScheduling(unsigned WavesAfter) override;
264*972a253aSDimitry Andric 
265*972a253aSDimitry Andric   UnclusteredRescheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
266*972a253aSDimitry Andric       : GCNSchedStage(StageID, DAG) {}
267*972a253aSDimitry Andric };
268*972a253aSDimitry Andric 
269*972a253aSDimitry Andric // Retry function scheduling if we found resulting occupancy and it is
270*972a253aSDimitry Andric // lower than used for other scheduling passes. This will give more freedom
271*972a253aSDimitry Andric // to schedule low register pressure blocks.
272*972a253aSDimitry Andric class ClusteredLowOccStage : public GCNSchedStage {
273*972a253aSDimitry Andric public:
274*972a253aSDimitry Andric   bool initGCNSchedStage() override;
275*972a253aSDimitry Andric 
276*972a253aSDimitry Andric   bool initGCNRegion() override;
277*972a253aSDimitry Andric 
278*972a253aSDimitry Andric   bool shouldRevertScheduling(unsigned WavesAfter) override;
279*972a253aSDimitry Andric 
280*972a253aSDimitry Andric   ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
281*972a253aSDimitry Andric       : GCNSchedStage(StageID, DAG) {}
282*972a253aSDimitry Andric };
283*972a253aSDimitry Andric 
284*972a253aSDimitry Andric class PreRARematStage : public GCNSchedStage {
285*972a253aSDimitry Andric private:
28681ad6265SDimitry Andric   // Each region at MinOccupancy will have their own list of trivially
28781ad6265SDimitry Andric   // rematerializable instructions we can remat to reduce RP. The list maps an
28881ad6265SDimitry Andric   // instruction to the position we should remat before, usually the MI using
28981ad6265SDimitry Andric   // the rematerializable instruction.
29081ad6265SDimitry Andric   MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
29181ad6265SDimitry Andric       RematerializableInsts;
29281ad6265SDimitry Andric 
29381ad6265SDimitry Andric   // Map a trivially remateriazable def to a list of regions at MinOccupancy
29481ad6265SDimitry Andric   // that has the defined reg as a live-in.
29581ad6265SDimitry Andric   DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
29681ad6265SDimitry Andric 
29781ad6265SDimitry Andric   // Collect all trivially rematerializable VGPR instructions with a single def
29881ad6265SDimitry Andric   // and single use outside the defining block into RematerializableInsts.
29981ad6265SDimitry Andric   void collectRematerializableInstructions();
30081ad6265SDimitry Andric 
301fcaf7f86SDimitry Andric   bool isTriviallyReMaterializable(const MachineInstr &MI);
30281ad6265SDimitry Andric 
30381ad6265SDimitry Andric   // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
30481ad6265SDimitry Andric   // Attempt to reduce RP of VGPR by sinking trivially rematerializable
30581ad6265SDimitry Andric   // instructions. Returns true if we were able to sink instruction(s).
30681ad6265SDimitry Andric   bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
30781ad6265SDimitry Andric                                const TargetInstrInfo *TII);
30881ad6265SDimitry Andric 
3090b57cec5SDimitry Andric public:
310*972a253aSDimitry Andric   bool initGCNSchedStage() override;
3110b57cec5SDimitry Andric 
312*972a253aSDimitry Andric   bool initGCNRegion() override;
3130b57cec5SDimitry Andric 
314*972a253aSDimitry Andric   bool shouldRevertScheduling(unsigned WavesAfter) override;
315*972a253aSDimitry Andric 
316*972a253aSDimitry Andric   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
317*972a253aSDimitry Andric       : GCNSchedStage(StageID, DAG) {}
3180b57cec5SDimitry Andric };
3190b57cec5SDimitry Andric 
3200b57cec5SDimitry Andric } // End namespace llvm
3210b57cec5SDimitry Andric 
32204eeddc0SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
323