1 //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to temporarily raise the wave priority beginning the start of 11 /// the shader function until its last VMEM instructions to allow younger 12 /// waves to issue their VMEM instructions as well. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIInstrInfo.h" 20 #include "llvm/ADT/PostOrderIterator.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/InitializePasses.h" 23 #include "llvm/Support/Allocator.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "amdgpu-set-wave-priority" 28 29 static cl::opt<unsigned> DefaultVALUInstsThreshold( 30 "amdgpu-set-wave-priority-valu-insts-threshold", 31 cl::desc("VALU instruction count threshold for adjusting wave priority"), 32 cl::init(100), cl::Hidden); 33 34 namespace { 35 36 struct MBBInfo { 37 MBBInfo() = default; 38 unsigned NumVALUInstsAtStart = 0; 39 bool MayReachVMEMLoad = false; 40 MachineInstr *LastVMEMLoad = nullptr; 41 }; 42 43 using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>; 44 45 class AMDGPUSetWavePriority : public MachineFunctionPass { 46 public: 47 static char ID; 48 49 AMDGPUSetWavePriority() : MachineFunctionPass(ID) {} 50 51 StringRef getPassName() const override { return "Set wave priority"; } 52 53 bool runOnMachineFunction(MachineFunction &MF) override; 54 55 private: 56 MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB, 57 MachineBasicBlock::iterator I, 58 unsigned priority) const; 59 60 const SIInstrInfo *TII; 61 }; 62 63 } // End anonymous namespace. 64 65 INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false, 66 false) 67 68 char AMDGPUSetWavePriority::ID = 0; 69 70 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() { 71 return new AMDGPUSetWavePriority(); 72 } 73 74 MachineInstr * 75 AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator I, 77 unsigned priority) const { 78 return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO)) 79 .addImm(priority); 80 } 81 82 // Checks that for every predecessor Pred that can reach a VMEM load, 83 // none of Pred's successors can reach a VMEM load. 84 static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, 85 MBBInfoSet &MBBInfos) { 86 for (const MachineBasicBlock *Pred : MBB.predecessors()) { 87 if (!MBBInfos[Pred].MayReachVMEMLoad) 88 continue; 89 for (const MachineBasicBlock *Succ : Pred->successors()) { 90 if (MBBInfos[Succ].MayReachVMEMLoad) 91 return false; 92 } 93 } 94 return true; 95 } 96 97 static bool isVMEMLoad(const MachineInstr &MI) { 98 return SIInstrInfo::isVMEM(MI) && MI.mayLoad(); 99 } 100 101 bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) { 102 const unsigned HighPriority = 3; 103 const unsigned LowPriority = 0; 104 105 Function &F = MF.getFunction(); 106 if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) 107 return false; 108 109 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 110 TII = ST.getInstrInfo(); 111 112 unsigned VALUInstsThreshold = DefaultVALUInstsThreshold; 113 Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold"); 114 if (A.isValid()) 115 A.getValueAsString().getAsInteger(0, VALUInstsThreshold); 116 117 // Find VMEM loads that may be executed before long-enough sequences of 118 // VALU instructions. We currently assume that backedges/loops, branch 119 // probabilities and other details can be ignored, so we essentially 120 // determine the largest number of VALU instructions along every 121 // possible path from the start of the function that may potentially be 122 // executed provided no backedge is ever taken. 123 MBBInfoSet MBBInfos; 124 for (MachineBasicBlock *MBB : post_order(&MF)) { 125 bool AtStart = true; 126 unsigned MaxNumVALUInstsInMiddle = 0; 127 unsigned NumVALUInstsAtEnd = 0; 128 for (MachineInstr &MI : *MBB) { 129 if (isVMEMLoad(MI)) { 130 AtStart = false; 131 MBBInfo &Info = MBBInfos[MBB]; 132 Info.NumVALUInstsAtStart = 0; 133 MaxNumVALUInstsInMiddle = 0; 134 NumVALUInstsAtEnd = 0; 135 Info.LastVMEMLoad = &MI; 136 } else if (SIInstrInfo::isDS(MI)) { 137 AtStart = false; 138 MaxNumVALUInstsInMiddle = 139 std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd); 140 NumVALUInstsAtEnd = 0; 141 } else if (SIInstrInfo::isVALU(MI)) { 142 if (AtStart) 143 ++MBBInfos[MBB].NumVALUInstsAtStart; 144 ++NumVALUInstsAtEnd; 145 } 146 } 147 148 bool SuccsMayReachVMEMLoad = false; 149 unsigned NumFollowingVALUInsts = 0; 150 for (const MachineBasicBlock *Succ : MBB->successors()) { 151 SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad; 152 NumFollowingVALUInsts = 153 std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart); 154 } 155 MBBInfo &Info = MBBInfos[MBB]; 156 if (AtStart) 157 Info.NumVALUInstsAtStart += NumFollowingVALUInsts; 158 NumVALUInstsAtEnd += NumFollowingVALUInsts; 159 160 unsigned MaxNumVALUInsts = 161 std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd); 162 Info.MayReachVMEMLoad = 163 SuccsMayReachVMEMLoad || 164 (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold); 165 } 166 167 MachineBasicBlock &Entry = MF.front(); 168 if (!MBBInfos[&Entry].MayReachVMEMLoad) 169 return false; 170 171 // Raise the priority at the beginning of the shader. 172 MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end(); 173 while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator()) 174 ++I; 175 BuildSetprioMI(Entry, I, HighPriority); 176 177 // Lower the priority on edges where control leaves blocks from which 178 // the VMEM loads are reachable. 179 SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks; 180 for (MachineBasicBlock &MBB : MF) { 181 if (MBBInfos[&MBB].MayReachVMEMLoad) { 182 if (MBB.succ_empty()) 183 PriorityLoweringBlocks.insert(&MBB); 184 continue; 185 } 186 187 if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) { 188 for (MachineBasicBlock *Pred : MBB.predecessors()) { 189 if (MBBInfos[Pred].MayReachVMEMLoad) 190 PriorityLoweringBlocks.insert(Pred); 191 } 192 continue; 193 } 194 195 // Where lowering the priority in predecessors is not possible, the 196 // block receiving control either was not part of a loop in the first 197 // place or the loop simplification/canonicalization pass should have 198 // already tried to split the edge and insert a preheader, and if for 199 // whatever reason it failed to do so, then this leaves us with the 200 // only option of lowering the priority within the loop. 201 PriorityLoweringBlocks.insert(&MBB); 202 } 203 204 for (MachineBasicBlock *MBB : PriorityLoweringBlocks) { 205 BuildSetprioMI( 206 *MBB, 207 MBBInfos[MBB].LastVMEMLoad 208 ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad)) 209 : MBB->begin(), 210 LowPriority); 211 } 212 213 return true; 214 } 215