10b57cec5SDimitry Andric //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 140b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "GCNRegPressure.h" 1781ad6265SDimitry Andric #include "llvm/ADT/MapVector.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric namespace llvm { 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric class SIMachineFunctionInfo; 230b57cec5SDimitry Andric class SIRegisterInfo; 240b57cec5SDimitry Andric class GCNSubtarget; 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric /// This is a minimal scheduler strategy. The main difference between this 270b57cec5SDimitry Andric /// and the GenericScheduler is that GCNSchedStrategy uses different 280b57cec5SDimitry Andric /// heuristics to determine excess/critical pressure sets. Its goal is to 290b57cec5SDimitry Andric /// maximize kernel occupancy (i.e. maximum number of waves per simd). 300b57cec5SDimitry Andric class GCNMaxOccupancySchedStrategy final : public GenericScheduler { 310b57cec5SDimitry Andric SUnit *pickNodeBidirectional(bool &IsTopNode); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, 340b57cec5SDimitry Andric const RegPressureTracker &RPTracker, 350b57cec5SDimitry Andric SchedCandidate &Cand); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric void initCandidate(SchedCandidate &Cand, SUnit *SU, 380b57cec5SDimitry Andric bool AtTop, const RegPressureTracker &RPTracker, 390b57cec5SDimitry Andric const SIRegisterInfo *SRI, 400b57cec5SDimitry Andric unsigned SGPRPressure, unsigned VGPRPressure); 410b57cec5SDimitry Andric 428bcb0991SDimitry Andric std::vector<unsigned> Pressure; 43*972a253aSDimitry Andric 448bcb0991SDimitry Andric std::vector<unsigned> MaxPressure; 458bcb0991SDimitry Andric 460b57cec5SDimitry Andric unsigned SGPRExcessLimit; 47*972a253aSDimitry Andric 480b57cec5SDimitry Andric unsigned VGPRExcessLimit; 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric unsigned TargetOccupancy; 510b57cec5SDimitry Andric 52*972a253aSDimitry Andric MachineFunction *MF; 53*972a253aSDimitry Andric 54*972a253aSDimitry Andric public: 55fe6060f1SDimitry Andric // schedule() have seen a clustered memory operation. Set it to false 56fe6060f1SDimitry Andric // before a region scheduling to know if the region had such clusters. 57fe6060f1SDimitry Andric bool HasClusteredNodes; 58fe6060f1SDimitry Andric 59349cc55cSDimitry Andric // schedule() have seen an excess register pressure and had to track 60fe6060f1SDimitry Andric // register pressure for actual scheduling heuristics. 61fe6060f1SDimitry Andric bool HasExcessPressure; 62fe6060f1SDimitry Andric 63*972a253aSDimitry Andric unsigned SGPRCriticalLimit; 640b57cec5SDimitry Andric 65*972a253aSDimitry Andric unsigned VGPRCriticalLimit; 66*972a253aSDimitry Andric 670b57cec5SDimitry Andric GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric SUnit *pickNode(bool &IsTopNode) override; 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric void initialize(ScheduleDAGMI *DAG) override; 720b57cec5SDimitry Andric 73*972a253aSDimitry Andric unsigned getTargetOccupancy() { return TargetOccupancy; } 74*972a253aSDimitry Andric 750b57cec5SDimitry Andric void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } 760b57cec5SDimitry Andric }; 770b57cec5SDimitry Andric 78*972a253aSDimitry Andric enum class GCNSchedStageID : unsigned { 79*972a253aSDimitry Andric InitialSchedule = 0, 80*972a253aSDimitry Andric UnclusteredReschedule = 1, 81*972a253aSDimitry Andric ClusteredLowOccupancyReschedule = 2, 82*972a253aSDimitry Andric PreRARematerialize = 3, 8381ad6265SDimitry Andric LastStage = PreRARematerialize 845ffd83dbSDimitry Andric }; 855ffd83dbSDimitry Andric 86*972a253aSDimitry Andric #ifndef NDEBUG 87*972a253aSDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); 88*972a253aSDimitry Andric #endif 89*972a253aSDimitry Andric 90*972a253aSDimitry Andric inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) { 91*972a253aSDimitry Andric assert(Stage != GCNSchedStageID::PreRARematerialize); 92*972a253aSDimitry Andric Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1); 93*972a253aSDimitry Andric return Stage; 94*972a253aSDimitry Andric } 95*972a253aSDimitry Andric 96*972a253aSDimitry Andric inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) { 97*972a253aSDimitry Andric return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1); 98*972a253aSDimitry Andric } 99*972a253aSDimitry Andric 100*972a253aSDimitry Andric inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) { 101*972a253aSDimitry Andric return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS); 102*972a253aSDimitry Andric } 103*972a253aSDimitry Andric 104*972a253aSDimitry Andric class GCNScheduleDAGMILive final : public ScheduleDAGMILive { 105*972a253aSDimitry Andric friend class GCNSchedStage; 106*972a253aSDimitry Andric friend class InitialScheduleStage; 107*972a253aSDimitry Andric friend class UnclusteredRescheduleStage; 108*972a253aSDimitry Andric friend class ClusteredLowOccStage; 109*972a253aSDimitry Andric friend class PreRARematStage; 110*972a253aSDimitry Andric 1110b57cec5SDimitry Andric const GCNSubtarget &ST; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric SIMachineFunctionInfo &MFI; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric // Occupancy target at the beginning of function scheduling cycle. 1160b57cec5SDimitry Andric unsigned StartingOccupancy; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric // Minimal real occupancy recorder for the function. 1190b57cec5SDimitry Andric unsigned MinOccupancy; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Vector of regions recorder for later rescheduling 1220b57cec5SDimitry Andric SmallVector<std::pair<MachineBasicBlock::iterator, 1230b57cec5SDimitry Andric MachineBasicBlock::iterator>, 32> Regions; 1240b57cec5SDimitry Andric 1255ffd83dbSDimitry Andric // Records if a region is not yet scheduled, or schedule has been reverted, 1265ffd83dbSDimitry Andric // or we generally desire to reschedule it. 1275ffd83dbSDimitry Andric BitVector RescheduleRegions; 1285ffd83dbSDimitry Andric 129fe6060f1SDimitry Andric // Record regions which use clustered loads/stores. 130fe6060f1SDimitry Andric BitVector RegionsWithClusters; 131fe6060f1SDimitry Andric 132fe6060f1SDimitry Andric // Record regions with high register pressure. 133fe6060f1SDimitry Andric BitVector RegionsWithHighRP; 134fe6060f1SDimitry Andric 13581ad6265SDimitry Andric // Regions that has the same occupancy as the latest MinOccupancy 13681ad6265SDimitry Andric BitVector RegionsWithMinOcc; 13781ad6265SDimitry Andric 1380b57cec5SDimitry Andric // Region live-in cache. 1390b57cec5SDimitry Andric SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric // Region pressure cache. 1420b57cec5SDimitry Andric SmallVector<GCNRegPressure, 32> Pressure; 1430b57cec5SDimitry Andric 144*972a253aSDimitry Andric // Temporary basic block live-in cache. 145*972a253aSDimitry Andric DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; 146*972a253aSDimitry Andric 147*972a253aSDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; 148*972a253aSDimitry Andric 149*972a253aSDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const; 150*972a253aSDimitry Andric 151*972a253aSDimitry Andric // Return current region pressure. 152*972a253aSDimitry Andric GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; 153*972a253aSDimitry Andric 154*972a253aSDimitry Andric // Compute and cache live-ins and pressure for all regions in block. 155*972a253aSDimitry Andric void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); 156*972a253aSDimitry Andric 157*972a253aSDimitry Andric // Update region boundaries when removing MI or inserting NewMI before MI. 158*972a253aSDimitry Andric void updateRegionBoundaries( 159*972a253aSDimitry Andric SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 160*972a253aSDimitry Andric MachineBasicBlock::iterator>> &RegionBoundaries, 161*972a253aSDimitry Andric MachineBasicBlock::iterator MI, MachineInstr *NewMI, 162*972a253aSDimitry Andric bool Removing = false); 163*972a253aSDimitry Andric 164*972a253aSDimitry Andric void runSchedStages(); 165*972a253aSDimitry Andric 166*972a253aSDimitry Andric public: 167*972a253aSDimitry Andric GCNScheduleDAGMILive(MachineSchedContext *C, 168*972a253aSDimitry Andric std::unique_ptr<MachineSchedStrategy> S); 169*972a253aSDimitry Andric 170*972a253aSDimitry Andric void schedule() override; 171*972a253aSDimitry Andric 172*972a253aSDimitry Andric void finalizeSchedule() override; 173*972a253aSDimitry Andric }; 174*972a253aSDimitry Andric 175*972a253aSDimitry Andric // GCNSchedStrategy applies multiple scheduling stages to a function. 176*972a253aSDimitry Andric class GCNSchedStage { 177*972a253aSDimitry Andric protected: 178*972a253aSDimitry Andric GCNScheduleDAGMILive &DAG; 179*972a253aSDimitry Andric 180*972a253aSDimitry Andric GCNMaxOccupancySchedStrategy &S; 181*972a253aSDimitry Andric 182*972a253aSDimitry Andric MachineFunction &MF; 183*972a253aSDimitry Andric 184*972a253aSDimitry Andric SIMachineFunctionInfo &MFI; 185*972a253aSDimitry Andric 186*972a253aSDimitry Andric const GCNSubtarget &ST; 187*972a253aSDimitry Andric 188*972a253aSDimitry Andric const GCNSchedStageID StageID; 189*972a253aSDimitry Andric 190*972a253aSDimitry Andric // The current block being scheduled. 191*972a253aSDimitry Andric MachineBasicBlock *CurrentMBB = nullptr; 192*972a253aSDimitry Andric 193*972a253aSDimitry Andric // Current region index. 194*972a253aSDimitry Andric unsigned RegionIdx = 0; 195*972a253aSDimitry Andric 196*972a253aSDimitry Andric // Record the original order of instructions before scheduling. 197*972a253aSDimitry Andric std::vector<MachineInstr *> Unsched; 198*972a253aSDimitry Andric 199*972a253aSDimitry Andric // RP before scheduling the current region. 200*972a253aSDimitry Andric GCNRegPressure PressureBefore; 201*972a253aSDimitry Andric 202*972a253aSDimitry Andric // RP after scheduling the current region. 203*972a253aSDimitry Andric GCNRegPressure PressureAfter; 204*972a253aSDimitry Andric 205*972a253aSDimitry Andric GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); 206*972a253aSDimitry Andric 207*972a253aSDimitry Andric public: 208*972a253aSDimitry Andric // Initialize state for a scheduling stage. Returns false if the current stage 209*972a253aSDimitry Andric // should be skipped. 210*972a253aSDimitry Andric virtual bool initGCNSchedStage(); 211*972a253aSDimitry Andric 212*972a253aSDimitry Andric // Finalize state after finishing a scheduling pass on the function. 213*972a253aSDimitry Andric virtual void finalizeGCNSchedStage(); 214*972a253aSDimitry Andric 215*972a253aSDimitry Andric // Setup for scheduling a region. Returns false if the current region should 216*972a253aSDimitry Andric // be skipped. 217*972a253aSDimitry Andric virtual bool initGCNRegion(); 218*972a253aSDimitry Andric 219*972a253aSDimitry Andric // Track whether a new region is also a new MBB. 220*972a253aSDimitry Andric void setupNewBlock(); 221*972a253aSDimitry Andric 222*972a253aSDimitry Andric // Finalize state after scheudling a region. 223*972a253aSDimitry Andric virtual void finalizeGCNRegion(); 224*972a253aSDimitry Andric 225*972a253aSDimitry Andric // Check result of scheduling. 226*972a253aSDimitry Andric void checkScheduling(); 227*972a253aSDimitry Andric 228*972a253aSDimitry Andric // Returns true if scheduling should be reverted. 229*972a253aSDimitry Andric virtual bool shouldRevertScheduling(unsigned WavesAfter); 230*972a253aSDimitry Andric 231*972a253aSDimitry Andric // Returns true if the new schedule may result in more spilling. 232*972a253aSDimitry Andric bool mayCauseSpilling(unsigned WavesAfter); 233*972a253aSDimitry Andric 234*972a253aSDimitry Andric // Attempt to revert scheduling for this region. 235*972a253aSDimitry Andric void revertScheduling(); 236*972a253aSDimitry Andric 237*972a253aSDimitry Andric void advanceRegion() { RegionIdx++; } 238*972a253aSDimitry Andric 239*972a253aSDimitry Andric virtual ~GCNSchedStage() = default; 240*972a253aSDimitry Andric }; 241*972a253aSDimitry Andric 242*972a253aSDimitry Andric class InitialScheduleStage : public GCNSchedStage { 243*972a253aSDimitry Andric public: 244*972a253aSDimitry Andric void finalizeGCNRegion() override; 245*972a253aSDimitry Andric 246*972a253aSDimitry Andric bool shouldRevertScheduling(unsigned WavesAfter) override; 247*972a253aSDimitry Andric 248*972a253aSDimitry Andric InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 249*972a253aSDimitry Andric : GCNSchedStage(StageID, DAG) {} 250*972a253aSDimitry Andric }; 251*972a253aSDimitry Andric 252*972a253aSDimitry Andric class UnclusteredRescheduleStage : public GCNSchedStage { 253*972a253aSDimitry Andric private: 254*972a253aSDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; 255*972a253aSDimitry Andric 256*972a253aSDimitry Andric public: 257*972a253aSDimitry Andric bool initGCNSchedStage() override; 258*972a253aSDimitry Andric 259*972a253aSDimitry Andric void finalizeGCNSchedStage() override; 260*972a253aSDimitry Andric 261*972a253aSDimitry Andric bool initGCNRegion() override; 262*972a253aSDimitry Andric 263*972a253aSDimitry Andric bool shouldRevertScheduling(unsigned WavesAfter) override; 264*972a253aSDimitry Andric 265*972a253aSDimitry Andric UnclusteredRescheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 266*972a253aSDimitry Andric : GCNSchedStage(StageID, DAG) {} 267*972a253aSDimitry Andric }; 268*972a253aSDimitry Andric 269*972a253aSDimitry Andric // Retry function scheduling if we found resulting occupancy and it is 270*972a253aSDimitry Andric // lower than used for other scheduling passes. This will give more freedom 271*972a253aSDimitry Andric // to schedule low register pressure blocks. 272*972a253aSDimitry Andric class ClusteredLowOccStage : public GCNSchedStage { 273*972a253aSDimitry Andric public: 274*972a253aSDimitry Andric bool initGCNSchedStage() override; 275*972a253aSDimitry Andric 276*972a253aSDimitry Andric bool initGCNRegion() override; 277*972a253aSDimitry Andric 278*972a253aSDimitry Andric bool shouldRevertScheduling(unsigned WavesAfter) override; 279*972a253aSDimitry Andric 280*972a253aSDimitry Andric ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 281*972a253aSDimitry Andric : GCNSchedStage(StageID, DAG) {} 282*972a253aSDimitry Andric }; 283*972a253aSDimitry Andric 284*972a253aSDimitry Andric class PreRARematStage : public GCNSchedStage { 285*972a253aSDimitry Andric private: 28681ad6265SDimitry Andric // Each region at MinOccupancy will have their own list of trivially 28781ad6265SDimitry Andric // rematerializable instructions we can remat to reduce RP. The list maps an 28881ad6265SDimitry Andric // instruction to the position we should remat before, usually the MI using 28981ad6265SDimitry Andric // the rematerializable instruction. 29081ad6265SDimitry Andric MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>> 29181ad6265SDimitry Andric RematerializableInsts; 29281ad6265SDimitry Andric 29381ad6265SDimitry Andric // Map a trivially remateriazable def to a list of regions at MinOccupancy 29481ad6265SDimitry Andric // that has the defined reg as a live-in. 29581ad6265SDimitry Andric DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions; 29681ad6265SDimitry Andric 29781ad6265SDimitry Andric // Collect all trivially rematerializable VGPR instructions with a single def 29881ad6265SDimitry Andric // and single use outside the defining block into RematerializableInsts. 29981ad6265SDimitry Andric void collectRematerializableInstructions(); 30081ad6265SDimitry Andric 301fcaf7f86SDimitry Andric bool isTriviallyReMaterializable(const MachineInstr &MI); 30281ad6265SDimitry Andric 30381ad6265SDimitry Andric // TODO: Should also attempt to reduce RP of SGPRs and AGPRs 30481ad6265SDimitry Andric // Attempt to reduce RP of VGPR by sinking trivially rematerializable 30581ad6265SDimitry Andric // instructions. Returns true if we were able to sink instruction(s). 30681ad6265SDimitry Andric bool sinkTriviallyRematInsts(const GCNSubtarget &ST, 30781ad6265SDimitry Andric const TargetInstrInfo *TII); 30881ad6265SDimitry Andric 3090b57cec5SDimitry Andric public: 310*972a253aSDimitry Andric bool initGCNSchedStage() override; 3110b57cec5SDimitry Andric 312*972a253aSDimitry Andric bool initGCNRegion() override; 3130b57cec5SDimitry Andric 314*972a253aSDimitry Andric bool shouldRevertScheduling(unsigned WavesAfter) override; 315*972a253aSDimitry Andric 316*972a253aSDimitry Andric PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 317*972a253aSDimitry Andric : GCNSchedStage(StageID, DAG) {} 3180b57cec5SDimitry Andric }; 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric } // End namespace llvm 3210b57cec5SDimitry Andric 32204eeddc0SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H 323