//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Pass to temporarily raise the wave priority beginning the start of /// the shader function until its last VMEM instructions to allow younger /// waves to issue their VMEM instructions as well. // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Allocator.h" using namespace llvm; #define DEBUG_TYPE "amdgpu-set-wave-priority" static cl::opt DefaultVALUInstsThreshold( "amdgpu-set-wave-priority-valu-insts-threshold", cl::desc("VALU instruction count threshold for adjusting wave priority"), cl::init(100), cl::Hidden); namespace { struct MBBInfo { MBBInfo() = default; unsigned NumVALUInstsAtStart = 0; bool MayReachVMEMLoad = false; MachineInstr *LastVMEMLoad = nullptr; }; using MBBInfoSet = DenseMap; class AMDGPUSetWavePriority : public MachineFunctionPass { public: static char ID; AMDGPUSetWavePriority() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Set wave priority"; } bool runOnMachineFunction(MachineFunction &MF) override; private: MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned priority) const; const SIInstrInfo *TII; }; } // End anonymous namespace. INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false, false) char AMDGPUSetWavePriority::ID = 0; FunctionPass *llvm::createAMDGPUSetWavePriorityPass() { return new AMDGPUSetWavePriority(); } MachineInstr * AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned priority) const { return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO)) .addImm(priority); } // Checks that for every predecessor Pred that can reach a VMEM load, // none of Pred's successors can reach a VMEM load. static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB, MBBInfoSet &MBBInfos) { for (const MachineBasicBlock *Pred : MBB.predecessors()) { if (!MBBInfos[Pred].MayReachVMEMLoad) continue; for (const MachineBasicBlock *Succ : Pred->successors()) { if (MBBInfos[Succ].MayReachVMEMLoad) return false; } } return true; } static bool isVMEMLoad(const MachineInstr &MI) { return SIInstrInfo::isVMEM(MI) && MI.mayLoad(); } bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) { const unsigned HighPriority = 3; const unsigned LowPriority = 0; Function &F = MF.getFunction(); if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); unsigned VALUInstsThreshold = DefaultVALUInstsThreshold; Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold"); if (A.isValid()) A.getValueAsString().getAsInteger(0, VALUInstsThreshold); // Find VMEM loads that may be executed before long-enough sequences of // VALU instructions. We currently assume that backedges/loops, branch // probabilities and other details can be ignored, so we essentially // determine the largest number of VALU instructions along every // possible path from the start of the function that may potentially be // executed provided no backedge is ever taken. MBBInfoSet MBBInfos; for (MachineBasicBlock *MBB : post_order(&MF)) { bool AtStart = true; unsigned MaxNumVALUInstsInMiddle = 0; unsigned NumVALUInstsAtEnd = 0; for (MachineInstr &MI : *MBB) { if (isVMEMLoad(MI)) { AtStart = false; MBBInfo &Info = MBBInfos[MBB]; Info.NumVALUInstsAtStart = 0; MaxNumVALUInstsInMiddle = 0; NumVALUInstsAtEnd = 0; Info.LastVMEMLoad = &MI; } else if (SIInstrInfo::isDS(MI)) { AtStart = false; MaxNumVALUInstsInMiddle = std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd); NumVALUInstsAtEnd = 0; } else if (SIInstrInfo::isVALU(MI)) { if (AtStart) ++MBBInfos[MBB].NumVALUInstsAtStart; ++NumVALUInstsAtEnd; } } bool SuccsMayReachVMEMLoad = false; unsigned NumFollowingVALUInsts = 0; for (const MachineBasicBlock *Succ : MBB->successors()) { SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad; NumFollowingVALUInsts = std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart); } MBBInfo &Info = MBBInfos[MBB]; if (AtStart) Info.NumVALUInstsAtStart += NumFollowingVALUInsts; NumVALUInstsAtEnd += NumFollowingVALUInsts; unsigned MaxNumVALUInsts = std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd); Info.MayReachVMEMLoad = SuccsMayReachVMEMLoad || (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold); } MachineBasicBlock &Entry = MF.front(); if (!MBBInfos[&Entry].MayReachVMEMLoad) return false; // Raise the priority at the beginning of the shader. MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end(); while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator()) ++I; BuildSetprioMI(Entry, I, HighPriority); // Lower the priority on edges where control leaves blocks from which // the VMEM loads are reachable. SmallSet PriorityLoweringBlocks; for (MachineBasicBlock &MBB : MF) { if (MBBInfos[&MBB].MayReachVMEMLoad) { if (MBB.succ_empty()) PriorityLoweringBlocks.insert(&MBB); continue; } if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) { for (MachineBasicBlock *Pred : MBB.predecessors()) { if (MBBInfos[Pred].MayReachVMEMLoad) PriorityLoweringBlocks.insert(Pred); } continue; } // Where lowering the priority in predecessors is not possible, the // block receiving control either was not part of a loop in the first // place or the loop simplification/canonicalization pass should have // already tried to split the edge and insert a preheader, and if for // whatever reason it failed to do so, then this leaves us with the // only option of lowering the priority within the loop. PriorityLoweringBlocks.insert(&MBB); } for (MachineBasicBlock *MBB : PriorityLoweringBlocks) { BuildSetprioMI( *MBB, MBBInfos[MBB].LastVMEMLoad ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad)) : MBB->begin(), LowPriority); } return true; }