1fe6060f1SDimitry Andric //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// This pass mainly lowers early terminate pseudo instructions.
11fe6060f1SDimitry Andric //
12fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
13fe6060f1SDimitry Andric
14fe6060f1SDimitry Andric #include "AMDGPU.h"
15fe6060f1SDimitry Andric #include "GCNSubtarget.h"
16fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
19fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
20fe6060f1SDimitry Andric
21fe6060f1SDimitry Andric using namespace llvm;
22fe6060f1SDimitry Andric
23fe6060f1SDimitry Andric #define DEBUG_TYPE "si-late-branch-lowering"
24fe6060f1SDimitry Andric
25fe6060f1SDimitry Andric namespace {
26fe6060f1SDimitry Andric
27fe6060f1SDimitry Andric class SILateBranchLowering : public MachineFunctionPass {
28fe6060f1SDimitry Andric private:
29fe6060f1SDimitry Andric const SIRegisterInfo *TRI = nullptr;
30fe6060f1SDimitry Andric const SIInstrInfo *TII = nullptr;
31fe6060f1SDimitry Andric MachineDominatorTree *MDT = nullptr;
32fe6060f1SDimitry Andric
335f757f3fSDimitry Andric void expandChainCall(MachineInstr &MI);
34fe6060f1SDimitry Andric void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
35fe6060f1SDimitry Andric
36fe6060f1SDimitry Andric public:
37fe6060f1SDimitry Andric static char ID;
38fe6060f1SDimitry Andric
39fe6060f1SDimitry Andric unsigned MovOpc;
40fe6060f1SDimitry Andric Register ExecReg;
41fe6060f1SDimitry Andric
SILateBranchLowering()42fe6060f1SDimitry Andric SILateBranchLowering() : MachineFunctionPass(ID) {}
43fe6060f1SDimitry Andric
44fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
45fe6060f1SDimitry Andric
getPassName() const46fe6060f1SDimitry Andric StringRef getPassName() const override {
47fe6060f1SDimitry Andric return "SI Final Branch Preparation";
48fe6060f1SDimitry Andric }
49fe6060f1SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const50fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
51*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>();
52*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>();
53fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
54fe6060f1SDimitry Andric }
55fe6060f1SDimitry Andric };
56fe6060f1SDimitry Andric
57fe6060f1SDimitry Andric } // end anonymous namespace
58fe6060f1SDimitry Andric
59fe6060f1SDimitry Andric char SILateBranchLowering::ID = 0;
60fe6060f1SDimitry Andric
61fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
62fe6060f1SDimitry Andric "SI insert s_cbranch_execz instructions", false, false)
63*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
64fe6060f1SDimitry Andric INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
65fe6060f1SDimitry Andric "SI insert s_cbranch_execz instructions", false, false)
66fe6060f1SDimitry Andric
67fe6060f1SDimitry Andric char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
68fe6060f1SDimitry Andric
generateEndPgm(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,const SIInstrInfo * TII,MachineFunction & MF)69fe6060f1SDimitry Andric static void generateEndPgm(MachineBasicBlock &MBB,
70fe6060f1SDimitry Andric MachineBasicBlock::iterator I, DebugLoc DL,
71fe6060f1SDimitry Andric const SIInstrInfo *TII, MachineFunction &MF) {
72fe6060f1SDimitry Andric const Function &F = MF.getFunction();
73fe6060f1SDimitry Andric bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
74fe6060f1SDimitry Andric
75fe6060f1SDimitry Andric // Check if hardware has been configured to expect color or depth exports.
7681ad6265SDimitry Andric bool HasColorExports = AMDGPU::getHasColorExport(F);
7781ad6265SDimitry Andric bool HasDepthExports = AMDGPU::getHasDepthExport(F);
7881ad6265SDimitry Andric bool HasExports = HasColorExports || HasDepthExports;
79fe6060f1SDimitry Andric
80fe6060f1SDimitry Andric // Prior to GFX10, hardware always expects at least one export for PS.
81fe6060f1SDimitry Andric bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
82fe6060f1SDimitry Andric
83fe6060f1SDimitry Andric if (IsPS && (HasExports || MustExport)) {
84fe6060f1SDimitry Andric // Generate "null export" if hardware is expecting PS to export.
8581ad6265SDimitry Andric const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
8681ad6265SDimitry Andric int Target =
8781ad6265SDimitry Andric ST.hasNullExportTarget()
8881ad6265SDimitry Andric ? AMDGPU::Exp::ET_NULL
8981ad6265SDimitry Andric : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ);
90fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
9181ad6265SDimitry Andric .addImm(Target)
92fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef)
93fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef)
94fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef)
95fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef)
96fe6060f1SDimitry Andric .addImm(1) // vm
97fe6060f1SDimitry Andric .addImm(0) // compr
98fe6060f1SDimitry Andric .addImm(0); // en
99fe6060f1SDimitry Andric }
100fe6060f1SDimitry Andric
101fe6060f1SDimitry Andric // s_endpgm
102fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
103fe6060f1SDimitry Andric }
104fe6060f1SDimitry Andric
splitBlock(MachineBasicBlock & MBB,MachineInstr & MI,MachineDominatorTree * MDT)105fe6060f1SDimitry Andric static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
106fe6060f1SDimitry Andric MachineDominatorTree *MDT) {
107fe6060f1SDimitry Andric MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
108fe6060f1SDimitry Andric
109fe6060f1SDimitry Andric // Update dominator tree
110fe6060f1SDimitry Andric using DomTreeT = DomTreeBase<MachineBasicBlock>;
111fe6060f1SDimitry Andric SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
112fe6060f1SDimitry Andric for (MachineBasicBlock *Succ : SplitBB->successors()) {
113fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
114fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
115fe6060f1SDimitry Andric }
116fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
117fe6060f1SDimitry Andric MDT->getBase().applyUpdates(DTUpdates);
118fe6060f1SDimitry Andric }
119fe6060f1SDimitry Andric
expandChainCall(MachineInstr & MI)1205f757f3fSDimitry Andric void SILateBranchLowering::expandChainCall(MachineInstr &MI) {
1215f757f3fSDimitry Andric // This is a tail call that needs to be expanded into at least
1225f757f3fSDimitry Andric // 2 instructions, one for setting EXEC and one for the actual tail call.
1235f757f3fSDimitry Andric constexpr unsigned ExecIdx = 3;
1245f757f3fSDimitry Andric
1255f757f3fSDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MovOpc), ExecReg)
1265f757f3fSDimitry Andric ->addOperand(MI.getOperand(ExecIdx));
1275f757f3fSDimitry Andric MI.removeOperand(ExecIdx);
1285f757f3fSDimitry Andric
1295f757f3fSDimitry Andric MI.setDesc(TII->get(AMDGPU::SI_TCRETURN));
1305f757f3fSDimitry Andric }
1315f757f3fSDimitry Andric
earlyTerm(MachineInstr & MI,MachineBasicBlock * EarlyExitBlock)132fe6060f1SDimitry Andric void SILateBranchLowering::earlyTerm(MachineInstr &MI,
133fe6060f1SDimitry Andric MachineBasicBlock *EarlyExitBlock) {
134fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI.getParent();
135fe6060f1SDimitry Andric const DebugLoc DL = MI.getDebugLoc();
136fe6060f1SDimitry Andric
137fe6060f1SDimitry Andric auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
138fe6060f1SDimitry Andric .addMBB(EarlyExitBlock);
139fe6060f1SDimitry Andric auto Next = std::next(MI.getIterator());
140fe6060f1SDimitry Andric
141fe6060f1SDimitry Andric if (Next != MBB.end() && !Next->isTerminator())
142fe6060f1SDimitry Andric splitBlock(MBB, *BranchMI, MDT);
143fe6060f1SDimitry Andric
144fe6060f1SDimitry Andric MBB.addSuccessor(EarlyExitBlock);
145fe6060f1SDimitry Andric MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
146fe6060f1SDimitry Andric }
147fe6060f1SDimitry Andric
runOnMachineFunction(MachineFunction & MF)148fe6060f1SDimitry Andric bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
149fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
150fe6060f1SDimitry Andric TII = ST.getInstrInfo();
151fe6060f1SDimitry Andric TRI = &TII->getRegisterInfo();
152*0fca6ea1SDimitry Andric MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
153fe6060f1SDimitry Andric
154fe6060f1SDimitry Andric MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
155fe6060f1SDimitry Andric ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
156fe6060f1SDimitry Andric
157fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> EarlyTermInstrs;
158fe6060f1SDimitry Andric SmallVector<MachineInstr *, 1> EpilogInstrs;
159fe6060f1SDimitry Andric bool MadeChange = false;
160fe6060f1SDimitry Andric
161fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
162349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
163fe6060f1SDimitry Andric switch (MI.getOpcode()) {
164fe6060f1SDimitry Andric case AMDGPU::S_BRANCH:
165fe6060f1SDimitry Andric // Optimize out branches to the next block.
166fe6060f1SDimitry Andric // This only occurs in -O0 when BranchFolding is not executed.
167fe6060f1SDimitry Andric if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
168fe6060f1SDimitry Andric assert(&MI == &MBB.back());
169fe6060f1SDimitry Andric MI.eraseFromParent();
170fe6060f1SDimitry Andric MadeChange = true;
171fe6060f1SDimitry Andric }
172fe6060f1SDimitry Andric break;
173fe6060f1SDimitry Andric
1745f757f3fSDimitry Andric case AMDGPU::SI_CS_CHAIN_TC_W32:
1755f757f3fSDimitry Andric case AMDGPU::SI_CS_CHAIN_TC_W64:
1765f757f3fSDimitry Andric expandChainCall(MI);
1775f757f3fSDimitry Andric MadeChange = true;
1785f757f3fSDimitry Andric break;
1795f757f3fSDimitry Andric
180fe6060f1SDimitry Andric case AMDGPU::SI_EARLY_TERMINATE_SCC0:
181fe6060f1SDimitry Andric EarlyTermInstrs.push_back(&MI);
182fe6060f1SDimitry Andric break;
183fe6060f1SDimitry Andric
184fe6060f1SDimitry Andric case AMDGPU::SI_RETURN_TO_EPILOG:
185fe6060f1SDimitry Andric EpilogInstrs.push_back(&MI);
186fe6060f1SDimitry Andric break;
187fe6060f1SDimitry Andric
188fe6060f1SDimitry Andric default:
189fe6060f1SDimitry Andric break;
190fe6060f1SDimitry Andric }
191fe6060f1SDimitry Andric }
192fe6060f1SDimitry Andric }
193fe6060f1SDimitry Andric
194fe6060f1SDimitry Andric // Lower any early exit branches first
195fe6060f1SDimitry Andric if (!EarlyTermInstrs.empty()) {
196fe6060f1SDimitry Andric MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock();
197fe6060f1SDimitry Andric DebugLoc DL;
198fe6060f1SDimitry Andric
199fe6060f1SDimitry Andric MF.insert(MF.end(), EarlyExitBlock);
200fe6060f1SDimitry Andric BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
201fe6060f1SDimitry Andric ExecReg)
202fe6060f1SDimitry Andric .addImm(0);
203fe6060f1SDimitry Andric generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
204fe6060f1SDimitry Andric
205fe6060f1SDimitry Andric for (MachineInstr *Instr : EarlyTermInstrs) {
206fe6060f1SDimitry Andric // Early termination in GS does nothing
207fe6060f1SDimitry Andric if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
208fe6060f1SDimitry Andric earlyTerm(*Instr, EarlyExitBlock);
209fe6060f1SDimitry Andric Instr->eraseFromParent();
210fe6060f1SDimitry Andric }
211fe6060f1SDimitry Andric
212fe6060f1SDimitry Andric EarlyTermInstrs.clear();
213fe6060f1SDimitry Andric MadeChange = true;
214fe6060f1SDimitry Andric }
215fe6060f1SDimitry Andric
216fe6060f1SDimitry Andric // Now check return to epilog instructions occur at function end
217fe6060f1SDimitry Andric if (!EpilogInstrs.empty()) {
218fe6060f1SDimitry Andric MachineBasicBlock *EmptyMBBAtEnd = nullptr;
219fe6060f1SDimitry Andric assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
220fe6060f1SDimitry Andric
221fe6060f1SDimitry Andric // If there are multiple returns to epilog then all will
222fe6060f1SDimitry Andric // become jumps to new empty end block.
223fe6060f1SDimitry Andric if (EpilogInstrs.size() > 1) {
224fe6060f1SDimitry Andric EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
225fe6060f1SDimitry Andric MF.insert(MF.end(), EmptyMBBAtEnd);
226fe6060f1SDimitry Andric }
227fe6060f1SDimitry Andric
228bdd1243dSDimitry Andric for (auto *MI : EpilogInstrs) {
229fe6060f1SDimitry Andric auto MBB = MI->getParent();
230fe6060f1SDimitry Andric if (MBB == &MF.back() && MI == &MBB->back())
231fe6060f1SDimitry Andric continue;
232fe6060f1SDimitry Andric
233fe6060f1SDimitry Andric // SI_RETURN_TO_EPILOG is not the last instruction.
234fe6060f1SDimitry Andric // Jump to empty block at function end.
235fe6060f1SDimitry Andric if (!EmptyMBBAtEnd) {
236fe6060f1SDimitry Andric EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
237fe6060f1SDimitry Andric MF.insert(MF.end(), EmptyMBBAtEnd);
238fe6060f1SDimitry Andric }
239fe6060f1SDimitry Andric
240fe6060f1SDimitry Andric MBB->addSuccessor(EmptyMBBAtEnd);
241fe6060f1SDimitry Andric MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd);
242fe6060f1SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
243fe6060f1SDimitry Andric .addMBB(EmptyMBBAtEnd);
244fe6060f1SDimitry Andric MI->eraseFromParent();
245fe6060f1SDimitry Andric MadeChange = true;
246fe6060f1SDimitry Andric }
247fe6060f1SDimitry Andric
248fe6060f1SDimitry Andric EpilogInstrs.clear();
249fe6060f1SDimitry Andric }
250fe6060f1SDimitry Andric
251fe6060f1SDimitry Andric return MadeChange;
252fe6060f1SDimitry Andric }
253