xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass mainly lowers early terminate pseudo instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineDominators.h"
19 #include "llvm/InitializePasses.h"
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "si-late-branch-lowering"
24 
25 namespace {
26 
27 class SILateBranchLowering : public MachineFunctionPass {
28 private:
29   const SIRegisterInfo *TRI = nullptr;
30   const SIInstrInfo *TII = nullptr;
31   MachineDominatorTree *MDT = nullptr;
32 
33   void expandChainCall(MachineInstr &MI);
34   void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
35 
36 public:
37   static char ID;
38 
39   unsigned MovOpc;
40   Register ExecReg;
41 
SILateBranchLowering()42   SILateBranchLowering() : MachineFunctionPass(ID) {}
43 
44   bool runOnMachineFunction(MachineFunction &MF) override;
45 
getPassName() const46   StringRef getPassName() const override {
47     return "SI Final Branch Preparation";
48   }
49 
getAnalysisUsage(AnalysisUsage & AU) const50   void getAnalysisUsage(AnalysisUsage &AU) const override {
51     AU.addRequired<MachineDominatorTreeWrapperPass>();
52     AU.addPreserved<MachineDominatorTreeWrapperPass>();
53     MachineFunctionPass::getAnalysisUsage(AU);
54   }
55 };
56 
57 } // end anonymous namespace
58 
59 char SILateBranchLowering::ID = 0;
60 
61 INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
62                       "SI insert s_cbranch_execz instructions", false, false)
63 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
64 INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
65                     "SI insert s_cbranch_execz instructions", false, false)
66 
67 char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
68 
generateEndPgm(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,const SIInstrInfo * TII,MachineFunction & MF)69 static void generateEndPgm(MachineBasicBlock &MBB,
70                            MachineBasicBlock::iterator I, DebugLoc DL,
71                            const SIInstrInfo *TII, MachineFunction &MF) {
72   const Function &F = MF.getFunction();
73   bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
74 
75   // Check if hardware has been configured to expect color or depth exports.
76   bool HasColorExports = AMDGPU::getHasColorExport(F);
77   bool HasDepthExports = AMDGPU::getHasDepthExport(F);
78   bool HasExports = HasColorExports || HasDepthExports;
79 
80   // Prior to GFX10, hardware always expects at least one export for PS.
81   bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
82 
83   if (IsPS && (HasExports || MustExport)) {
84     // Generate "null export" if hardware is expecting PS to export.
85     const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
86     int Target =
87         ST.hasNullExportTarget()
88             ? AMDGPU::Exp::ET_NULL
89             : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ);
90     BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
91         .addImm(Target)
92         .addReg(AMDGPU::VGPR0, RegState::Undef)
93         .addReg(AMDGPU::VGPR0, RegState::Undef)
94         .addReg(AMDGPU::VGPR0, RegState::Undef)
95         .addReg(AMDGPU::VGPR0, RegState::Undef)
96         .addImm(1)  // vm
97         .addImm(0)  // compr
98         .addImm(0); // en
99   }
100 
101   // s_endpgm
102   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
103 }
104 
splitBlock(MachineBasicBlock & MBB,MachineInstr & MI,MachineDominatorTree * MDT)105 static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
106                        MachineDominatorTree *MDT) {
107   MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
108 
109   // Update dominator tree
110   using DomTreeT = DomTreeBase<MachineBasicBlock>;
111   SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
112   for (MachineBasicBlock *Succ : SplitBB->successors()) {
113     DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
114     DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
115   }
116   DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
117   MDT->getBase().applyUpdates(DTUpdates);
118 }
119 
expandChainCall(MachineInstr & MI)120 void SILateBranchLowering::expandChainCall(MachineInstr &MI) {
121   // This is a tail call that needs to be expanded into at least
122   // 2 instructions, one for setting EXEC and one for the actual tail call.
123   constexpr unsigned ExecIdx = 3;
124 
125   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MovOpc), ExecReg)
126       ->addOperand(MI.getOperand(ExecIdx));
127   MI.removeOperand(ExecIdx);
128 
129   MI.setDesc(TII->get(AMDGPU::SI_TCRETURN));
130 }
131 
earlyTerm(MachineInstr & MI,MachineBasicBlock * EarlyExitBlock)132 void SILateBranchLowering::earlyTerm(MachineInstr &MI,
133                                      MachineBasicBlock *EarlyExitBlock) {
134   MachineBasicBlock &MBB = *MI.getParent();
135   const DebugLoc DL = MI.getDebugLoc();
136 
137   auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
138                       .addMBB(EarlyExitBlock);
139   auto Next = std::next(MI.getIterator());
140 
141   if (Next != MBB.end() && !Next->isTerminator())
142     splitBlock(MBB, *BranchMI, MDT);
143 
144   MBB.addSuccessor(EarlyExitBlock);
145   MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
146 }
147 
runOnMachineFunction(MachineFunction & MF)148 bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
149   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
150   TII = ST.getInstrInfo();
151   TRI = &TII->getRegisterInfo();
152   MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
153 
154   MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
155   ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
156 
157   SmallVector<MachineInstr *, 4> EarlyTermInstrs;
158   SmallVector<MachineInstr *, 1> EpilogInstrs;
159   bool MadeChange = false;
160 
161   for (MachineBasicBlock &MBB : MF) {
162     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
163       switch (MI.getOpcode()) {
164       case AMDGPU::S_BRANCH:
165         // Optimize out branches to the next block.
166         // This only occurs in -O0 when BranchFolding is not executed.
167         if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
168           assert(&MI == &MBB.back());
169           MI.eraseFromParent();
170           MadeChange = true;
171         }
172         break;
173 
174       case AMDGPU::SI_CS_CHAIN_TC_W32:
175       case AMDGPU::SI_CS_CHAIN_TC_W64:
176         expandChainCall(MI);
177         MadeChange = true;
178         break;
179 
180       case AMDGPU::SI_EARLY_TERMINATE_SCC0:
181         EarlyTermInstrs.push_back(&MI);
182         break;
183 
184       case AMDGPU::SI_RETURN_TO_EPILOG:
185         EpilogInstrs.push_back(&MI);
186         break;
187 
188       default:
189         break;
190       }
191     }
192   }
193 
194   // Lower any early exit branches first
195   if (!EarlyTermInstrs.empty()) {
196     MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock();
197     DebugLoc DL;
198 
199     MF.insert(MF.end(), EarlyExitBlock);
200     BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
201             ExecReg)
202         .addImm(0);
203     generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
204 
205     for (MachineInstr *Instr : EarlyTermInstrs) {
206       // Early termination in GS does nothing
207       if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
208         earlyTerm(*Instr, EarlyExitBlock);
209       Instr->eraseFromParent();
210     }
211 
212     EarlyTermInstrs.clear();
213     MadeChange = true;
214   }
215 
216   // Now check return to epilog instructions occur at function end
217   if (!EpilogInstrs.empty()) {
218     MachineBasicBlock *EmptyMBBAtEnd = nullptr;
219     assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
220 
221     // If there are multiple returns to epilog then all will
222     // become jumps to new empty end block.
223     if (EpilogInstrs.size() > 1) {
224       EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
225       MF.insert(MF.end(), EmptyMBBAtEnd);
226     }
227 
228     for (auto *MI : EpilogInstrs) {
229       auto MBB = MI->getParent();
230       if (MBB == &MF.back() && MI == &MBB->back())
231         continue;
232 
233       // SI_RETURN_TO_EPILOG is not the last instruction.
234       // Jump to empty block at function end.
235       if (!EmptyMBBAtEnd) {
236         EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
237         MF.insert(MF.end(), EmptyMBBAtEnd);
238       }
239 
240       MBB->addSuccessor(EmptyMBBAtEnd);
241       MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd);
242       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
243           .addMBB(EmptyMBBAtEnd);
244       MI->eraseFromParent();
245       MadeChange = true;
246     }
247 
248     EpilogInstrs.clear();
249   }
250 
251   return MadeChange;
252 }
253