xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp (revision e3f4a63af63bea70bc86b6c790b14aa5ee99fcd0)
1 //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to temporarily raise the wave priority beginning the start of
11 /// the shader function until its last VMEM instructions to allow younger
12 /// waves to issue their VMEM instructions as well.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "llvm/ADT/PostOrderIterator.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachinePassManager.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "amdgpu-set-wave-priority"
27 
28 static cl::opt<unsigned> DefaultVALUInstsThreshold(
29     "amdgpu-set-wave-priority-valu-insts-threshold",
30     cl::desc("VALU instruction count threshold for adjusting wave priority"),
31     cl::init(100), cl::Hidden);
32 
33 namespace {
34 
35 struct MBBInfo {
36   MBBInfo() = default;
37   unsigned NumVALUInstsAtStart = 0;
38   bool MayReachVMEMLoad = false;
39   MachineInstr *LastVMEMLoad = nullptr;
40 };
41 
42 using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
43 
44 class AMDGPUSetWavePriority {
45 public:
46   bool run(MachineFunction &MF);
47 
48 private:
49   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
50                                MachineBasicBlock::iterator I,
51                                unsigned priority) const;
52 
53   const SIInstrInfo *TII;
54 };
55 
56 class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass {
57 public:
58   static char ID;
59 
60   AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {}
61 
62   StringRef getPassName() const override { return "Set wave priority"; }
63 
64   bool runOnMachineFunction(MachineFunction &MF) override {
65     if (skipFunction(MF.getFunction()))
66       return false;
67 
68     return AMDGPUSetWavePriority().run(MF);
69   }
70 };
71 
72 } // End anonymous namespace.
73 
74 INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority",
75                 false, false)
76 
77 char AMDGPUSetWavePriorityLegacy::ID = 0;
78 
79 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
80   return new AMDGPUSetWavePriorityLegacy();
81 }
82 
83 MachineInstr *
84 AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
85                                       MachineBasicBlock::iterator I,
86                                       unsigned priority) const {
87   return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
88       .addImm(priority);
89 }
90 
91 // Checks that for every predecessor Pred that can reach a VMEM load,
92 // none of Pred's successors can reach a VMEM load.
93 static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
94                                                    MBBInfoSet &MBBInfos) {
95   for (const MachineBasicBlock *Pred : MBB.predecessors()) {
96     if (!MBBInfos[Pred].MayReachVMEMLoad)
97       continue;
98     for (const MachineBasicBlock *Succ : Pred->successors()) {
99       if (MBBInfos[Succ].MayReachVMEMLoad)
100         return false;
101     }
102   }
103   return true;
104 }
105 
106 static bool isVMEMLoad(const MachineInstr &MI) {
107   return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
108 }
109 
110 PreservedAnalyses
111 llvm::AMDGPUSetWavePriorityPass::run(MachineFunction &MF,
112                                      MachineFunctionAnalysisManager &MFAM) {
113   if (!AMDGPUSetWavePriority().run(MF))
114     return PreservedAnalyses::all();
115 
116   return getMachineFunctionPassPreservedAnalyses();
117 }
118 
119 bool AMDGPUSetWavePriority::run(MachineFunction &MF) {
120   const unsigned HighPriority = 3;
121   const unsigned LowPriority = 0;
122 
123   Function &F = MF.getFunction();
124   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
125     return false;
126 
127   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
128   TII = ST.getInstrInfo();
129 
130   unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
131   Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
132   if (A.isValid())
133     A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
134 
135   // Find VMEM loads that may be executed before long-enough sequences of
136   // VALU instructions. We currently assume that backedges/loops, branch
137   // probabilities and other details can be ignored, so we essentially
138   // determine the largest number of VALU instructions along every
139   // possible path from the start of the function that may potentially be
140   // executed provided no backedge is ever taken.
141   MBBInfoSet MBBInfos;
142   for (MachineBasicBlock *MBB : post_order(&MF)) {
143     bool AtStart = true;
144     unsigned MaxNumVALUInstsInMiddle = 0;
145     unsigned NumVALUInstsAtEnd = 0;
146     for (MachineInstr &MI : *MBB) {
147       if (isVMEMLoad(MI)) {
148         AtStart = false;
149         MBBInfo &Info = MBBInfos[MBB];
150         Info.NumVALUInstsAtStart = 0;
151         MaxNumVALUInstsInMiddle = 0;
152         NumVALUInstsAtEnd = 0;
153         Info.LastVMEMLoad = &MI;
154       } else if (SIInstrInfo::isDS(MI)) {
155         AtStart = false;
156         MaxNumVALUInstsInMiddle =
157             std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
158         NumVALUInstsAtEnd = 0;
159       } else if (SIInstrInfo::isVALU(MI)) {
160         if (AtStart)
161           ++MBBInfos[MBB].NumVALUInstsAtStart;
162         ++NumVALUInstsAtEnd;
163       }
164     }
165 
166     bool SuccsMayReachVMEMLoad = false;
167     unsigned NumFollowingVALUInsts = 0;
168     for (const MachineBasicBlock *Succ : MBB->successors()) {
169       const MBBInfo &SuccInfo = MBBInfos[Succ];
170       SuccsMayReachVMEMLoad |= SuccInfo.MayReachVMEMLoad;
171       NumFollowingVALUInsts =
172           std::max(NumFollowingVALUInsts, SuccInfo.NumVALUInstsAtStart);
173     }
174     MBBInfo &Info = MBBInfos[MBB];
175     if (AtStart)
176       Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
177     NumVALUInstsAtEnd += NumFollowingVALUInsts;
178 
179     unsigned MaxNumVALUInsts =
180         std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
181     Info.MayReachVMEMLoad =
182         SuccsMayReachVMEMLoad ||
183         (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
184   }
185 
186   MachineBasicBlock &Entry = MF.front();
187   if (!MBBInfos[&Entry].MayReachVMEMLoad)
188     return false;
189 
190   // Raise the priority at the beginning of the shader.
191   MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
192   while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
193     ++I;
194   BuildSetprioMI(Entry, I, HighPriority);
195 
196   // Lower the priority on edges where control leaves blocks from which
197   // the VMEM loads are reachable.
198   SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
199   for (MachineBasicBlock &MBB : MF) {
200     if (MBBInfos[&MBB].MayReachVMEMLoad) {
201       if (MBB.succ_empty())
202         PriorityLoweringBlocks.insert(&MBB);
203       continue;
204     }
205 
206     if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
207       for (MachineBasicBlock *Pred : MBB.predecessors()) {
208         if (MBBInfos[Pred].MayReachVMEMLoad)
209           PriorityLoweringBlocks.insert(Pred);
210       }
211       continue;
212     }
213 
214     // Where lowering the priority in predecessors is not possible, the
215     // block receiving control either was not part of a loop in the first
216     // place or the loop simplification/canonicalization pass should have
217     // already tried to split the edge and insert a preheader, and if for
218     // whatever reason it failed to do so, then this leaves us with the
219     // only option of lowering the priority within the loop.
220     PriorityLoweringBlocks.insert(&MBB);
221   }
222 
223   for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
224     MachineInstr *LastVMEMLoad = MBBInfos[MBB].LastVMEMLoad;
225     BuildSetprioMI(*MBB,
226                    LastVMEMLoad
227                        ? std::next(MachineBasicBlock::iterator(LastVMEMLoad))
228                        : MBB->begin(),
229                    LowPriority);
230   }
231 
232   return true;
233 }
234