1fe6060f1SDimitry Andric //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11fe6060f1SDimitry Andric /// structures and waterfall loops.
12fe6060f1SDimitry Andric ///
13fe6060f1SDimitry Andric /// When we do structurization, we usually transform an if-else into two
14349cc55cSDimitry Andric /// successive if-then (with a flow block to do predicate inversion). Consider a
15fe6060f1SDimitry Andric /// simple case after structurization: A divergent value %a was defined before
16fe6060f1SDimitry Andric /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17fe6060f1SDimitry Andric /// bb.if:
18fe6060f1SDimitry Andric /// %a = ...
19fe6060f1SDimitry Andric /// ...
20fe6060f1SDimitry Andric /// bb.then:
21fe6060f1SDimitry Andric /// ... = op %a
22fe6060f1SDimitry Andric /// ... // %a can be dead here
23fe6060f1SDimitry Andric /// bb.flow:
24fe6060f1SDimitry Andric /// ...
25fe6060f1SDimitry Andric /// bb.else:
26fe6060f1SDimitry Andric /// ... = %a
27fe6060f1SDimitry Andric /// ...
28fe6060f1SDimitry Andric /// bb.endif
29fe6060f1SDimitry Andric ///
30fe6060f1SDimitry Andric /// As register allocator has no idea of the thread-control-flow, it will just
31fe6060f1SDimitry Andric /// assume %a would be alive in the whole range of bb.then because of a later
32fe6060f1SDimitry Andric /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33fe6060f1SDimitry Andric /// to exec mask. For this if-else case, the lanes active in bb.then will be
34fe6060f1SDimitry Andric /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35fe6060f1SDimitry Andric /// after the last use in bb.then until the end of the block. The reason is
36fe6060f1SDimitry Andric /// the instructions in bb.then will only overwrite lanes that will never be
37fe6060f1SDimitry Andric /// accessed in bb.else.
38fe6060f1SDimitry Andric ///
39bdd1243dSDimitry Andric /// This pass aims to tell register allocator that %a is in-fact dead,
40fe6060f1SDimitry Andric /// through inserting a phi-node in bb.flow saying that %a is undef when coming
41fe6060f1SDimitry Andric /// from bb.then, and then replace the uses in the bb.else with the result of
42fe6060f1SDimitry Andric /// newly inserted phi.
43fe6060f1SDimitry Andric ///
44fe6060f1SDimitry Andric /// Two key conditions must be met to ensure correctness:
45fe6060f1SDimitry Andric /// 1.) The def-point should be in the same loop-level as if-else-endif to make
46fe6060f1SDimitry Andric /// sure the second loop iteration still get correct data.
47fe6060f1SDimitry Andric /// 2.) There should be no further uses after the IF-ELSE region.
48fe6060f1SDimitry Andric ///
49fe6060f1SDimitry Andric ///
50fe6060f1SDimitry Andric /// Waterfall loops get inserted around instructions that use divergent values
51fe6060f1SDimitry Andric /// but can only be executed with a uniform value. For example an indirect call
52fe6060f1SDimitry Andric /// to a divergent address:
53fe6060f1SDimitry Andric /// bb.start:
54fe6060f1SDimitry Andric /// %a = ...
55fe6060f1SDimitry Andric /// %fun = ...
56fe6060f1SDimitry Andric /// ...
57fe6060f1SDimitry Andric /// bb.loop:
58fe6060f1SDimitry Andric /// call %fun (%a)
59fe6060f1SDimitry Andric /// ... // %a can be dead here
60fe6060f1SDimitry Andric /// loop %bb.loop
61fe6060f1SDimitry Andric ///
62fe6060f1SDimitry Andric /// The loop block is executed multiple times, but it is run exactly once for
63fe6060f1SDimitry Andric /// each active lane. Similar to the if-else case, the register allocator
64fe6060f1SDimitry Andric /// assumes that %a is live throughout the loop as it is used again in the next
65fe6060f1SDimitry Andric /// iteration. If %a is a VGPR that is unused after the loop, it does not need
66fe6060f1SDimitry Andric /// to be live after its last use in the loop block. By inserting a phi-node at
67fe6060f1SDimitry Andric /// the start of bb.loop that is undef when coming from bb.loop, the register
68fe6060f1SDimitry Andric /// allocation knows that the value of %a does not need to be preserved through
69fe6060f1SDimitry Andric /// iterations of the loop.
70fe6060f1SDimitry Andric ///
71fe6060f1SDimitry Andric //
72fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
73fe6060f1SDimitry Andric
74fe6060f1SDimitry Andric #include "AMDGPU.h"
75fe6060f1SDimitry Andric #include "GCNSubtarget.h"
76fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
78fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveVariables.h"
79fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
80fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
81fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
82fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
83fe6060f1SDimitry Andric
84fe6060f1SDimitry Andric using namespace llvm;
85fe6060f1SDimitry Andric
86fe6060f1SDimitry Andric #define DEBUG_TYPE "si-opt-vgpr-liverange"
87fe6060f1SDimitry Andric
88fe6060f1SDimitry Andric namespace {
89fe6060f1SDimitry Andric
90fe6060f1SDimitry Andric class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91fe6060f1SDimitry Andric private:
92fe6060f1SDimitry Andric const SIRegisterInfo *TRI = nullptr;
93fe6060f1SDimitry Andric const SIInstrInfo *TII = nullptr;
94fe6060f1SDimitry Andric LiveVariables *LV = nullptr;
95fe6060f1SDimitry Andric MachineDominatorTree *MDT = nullptr;
96fe6060f1SDimitry Andric const MachineLoopInfo *Loops = nullptr;
97fe6060f1SDimitry Andric MachineRegisterInfo *MRI = nullptr;
98fe6060f1SDimitry Andric
99fe6060f1SDimitry Andric public:
100fe6060f1SDimitry Andric static char ID;
101fe6060f1SDimitry Andric
102fe6060f1SDimitry Andric MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103fe6060f1SDimitry Andric
104fe6060f1SDimitry Andric void collectElseRegionBlocks(MachineBasicBlock *Flow,
105fe6060f1SDimitry Andric MachineBasicBlock *Endif,
106fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &) const;
107fe6060f1SDimitry Andric
108fe6060f1SDimitry Andric void
109fe6060f1SDimitry Andric collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110fe6060f1SDimitry Andric MachineBasicBlock *Endif,
111fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112fe6060f1SDimitry Andric SmallVectorImpl<Register> &CandidateRegs) const;
113fe6060f1SDimitry Andric
114fe6060f1SDimitry Andric void collectWaterfallCandidateRegisters(
11581ad6265SDimitry Andric MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
11681ad6265SDimitry Andric SmallSetVector<Register, 16> &CandidateRegs,
11781ad6265SDimitry Andric SmallSetVector<MachineBasicBlock *, 2> &Blocks,
11881ad6265SDimitry Andric SmallVectorImpl<MachineInstr *> &Instructions) const;
119fe6060f1SDimitry Andric
120fe6060f1SDimitry Andric void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121fe6060f1SDimitry Andric SmallVectorImpl<MachineInstr *> &Uses) const;
122fe6060f1SDimitry Andric
123fe6060f1SDimitry Andric void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124fe6060f1SDimitry Andric MachineBasicBlock *Flow) const;
125fe6060f1SDimitry Andric
126fe6060f1SDimitry Andric void updateLiveRangeInElseRegion(
127fe6060f1SDimitry Andric Register Reg, Register NewReg, MachineBasicBlock *Flow,
128fe6060f1SDimitry Andric MachineBasicBlock *Endif,
129fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130fe6060f1SDimitry Andric
131fe6060f1SDimitry Andric void
132fe6060f1SDimitry Andric optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133fe6060f1SDimitry Andric MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135fe6060f1SDimitry Andric
13681ad6265SDimitry Andric void optimizeWaterfallLiveRange(
13781ad6265SDimitry Andric Register Reg, MachineBasicBlock *LoopHeader,
13881ad6265SDimitry Andric SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
13981ad6265SDimitry Andric SmallVectorImpl<MachineInstr *> &Instructions) const;
140fe6060f1SDimitry Andric
SIOptimizeVGPRLiveRange()141fe6060f1SDimitry Andric SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142fe6060f1SDimitry Andric
143fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
144fe6060f1SDimitry Andric
getPassName() const145fe6060f1SDimitry Andric StringRef getPassName() const override {
146fe6060f1SDimitry Andric return "SI Optimize VGPR LiveRange";
147fe6060f1SDimitry Andric }
148fe6060f1SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const149fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
150*0fca6ea1SDimitry Andric AU.addRequired<LiveVariablesWrapperPass>();
151*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>();
152*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>();
153*0fca6ea1SDimitry Andric AU.addPreserved<LiveVariablesWrapperPass>();
154*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>();
155*0fca6ea1SDimitry Andric AU.addPreserved<MachineLoopInfoWrapperPass>();
156fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
157fe6060f1SDimitry Andric }
158fe6060f1SDimitry Andric
getRequiredProperties() const159fe6060f1SDimitry Andric MachineFunctionProperties getRequiredProperties() const override {
160fe6060f1SDimitry Andric return MachineFunctionProperties().set(
161fe6060f1SDimitry Andric MachineFunctionProperties::Property::IsSSA);
162fe6060f1SDimitry Andric }
16304eeddc0SDimitry Andric
getClearedProperties() const16404eeddc0SDimitry Andric MachineFunctionProperties getClearedProperties() const override {
16504eeddc0SDimitry Andric return MachineFunctionProperties().set(
16604eeddc0SDimitry Andric MachineFunctionProperties::Property::NoPHIs);
16704eeddc0SDimitry Andric }
168fe6060f1SDimitry Andric };
169fe6060f1SDimitry Andric
170fe6060f1SDimitry Andric } // end anonymous namespace
171fe6060f1SDimitry Andric
172fe6060f1SDimitry Andric // Check whether the MBB is a else flow block and get the branching target which
173fe6060f1SDimitry Andric // is the Endif block
174fe6060f1SDimitry Andric MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const175fe6060f1SDimitry Andric SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176fe6060f1SDimitry Andric for (auto &BR : MBB->terminators()) {
177fe6060f1SDimitry Andric if (BR.getOpcode() == AMDGPU::SI_ELSE)
178fe6060f1SDimitry Andric return BR.getOperand(2).getMBB();
179fe6060f1SDimitry Andric }
180fe6060f1SDimitry Andric return nullptr;
181fe6060f1SDimitry Andric }
182fe6060f1SDimitry Andric
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const183fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184fe6060f1SDimitry Andric MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186fe6060f1SDimitry Andric assert(Flow != Endif);
187fe6060f1SDimitry Andric
188fe6060f1SDimitry Andric MachineBasicBlock *MBB = Endif;
189fe6060f1SDimitry Andric unsigned Cur = 0;
190fe6060f1SDimitry Andric while (MBB) {
191fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) {
192fe6060f1SDimitry Andric if (Pred != Flow && !Blocks.contains(Pred))
193fe6060f1SDimitry Andric Blocks.insert(Pred);
194fe6060f1SDimitry Andric }
195fe6060f1SDimitry Andric
196fe6060f1SDimitry Andric if (Cur < Blocks.size())
197fe6060f1SDimitry Andric MBB = Blocks[Cur++];
198fe6060f1SDimitry Andric else
199fe6060f1SDimitry Andric MBB = nullptr;
200fe6060f1SDimitry Andric }
201fe6060f1SDimitry Andric
202fe6060f1SDimitry Andric LLVM_DEBUG({
203fe6060f1SDimitry Andric dbgs() << "Found Else blocks: ";
204fe6060f1SDimitry Andric for (auto *MBB : Blocks)
205fe6060f1SDimitry Andric dbgs() << printMBBReference(*MBB) << ' ';
206fe6060f1SDimitry Andric dbgs() << '\n';
207fe6060f1SDimitry Andric });
208fe6060f1SDimitry Andric }
209fe6060f1SDimitry Andric
210fe6060f1SDimitry Andric /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const211fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *MBB,
213fe6060f1SDimitry Andric SmallVectorImpl<MachineInstr *> &Uses) const {
214fe6060f1SDimitry Andric for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215fe6060f1SDimitry Andric if (UseMI.getParent() == MBB && !UseMI.isPHI())
216fe6060f1SDimitry Andric Uses.push_back(&UseMI);
217fe6060f1SDimitry Andric }
218fe6060f1SDimitry Andric }
219fe6060f1SDimitry Andric
220fe6060f1SDimitry Andric /// Collect the killed registers in the ELSE region which are not alive through
221fe6060f1SDimitry Andric /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const222fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223fe6060f1SDimitry Andric MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225fe6060f1SDimitry Andric SmallVectorImpl<Register> &CandidateRegs) const {
226fe6060f1SDimitry Andric
227fe6060f1SDimitry Andric SmallSet<Register, 8> KillsInElse;
228fe6060f1SDimitry Andric
229fe6060f1SDimitry Andric for (auto *Else : ElseBlocks) {
230fe6060f1SDimitry Andric for (auto &MI : Else->instrs()) {
231fe6060f1SDimitry Andric if (MI.isDebugInstr())
232fe6060f1SDimitry Andric continue;
233fe6060f1SDimitry Andric
234fe6060f1SDimitry Andric for (auto &MO : MI.operands()) {
235fe6060f1SDimitry Andric if (!MO.isReg() || !MO.getReg() || MO.isDef())
236fe6060f1SDimitry Andric continue;
237fe6060f1SDimitry Andric
238fe6060f1SDimitry Andric Register MOReg = MO.getReg();
239fe6060f1SDimitry Andric // We can only optimize AGPR/VGPR virtual register
240fe6060f1SDimitry Andric if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241fe6060f1SDimitry Andric continue;
242fe6060f1SDimitry Andric
243fe6060f1SDimitry Andric if (MO.readsReg()) {
244fe6060f1SDimitry Andric LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245fe6060f1SDimitry Andric const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246fe6060f1SDimitry Andric // Make sure two conditions are met:
247fe6060f1SDimitry Andric // a.) the value is defined before/in the IF block
248fe6060f1SDimitry Andric // b.) should be defined in the same loop-level.
249fe6060f1SDimitry Andric if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250fe6060f1SDimitry Andric Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251fe6060f1SDimitry Andric // Check if the register is live into the endif block. If not,
252fe6060f1SDimitry Andric // consider it killed in the else region.
253fe6060f1SDimitry Andric LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254fe6060f1SDimitry Andric if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255fe6060f1SDimitry Andric KillsInElse.insert(MOReg);
256fe6060f1SDimitry Andric } else {
257fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258fe6060f1SDimitry Andric << " as Live in Endif\n");
259fe6060f1SDimitry Andric }
260fe6060f1SDimitry Andric }
261fe6060f1SDimitry Andric }
262fe6060f1SDimitry Andric }
263fe6060f1SDimitry Andric }
264fe6060f1SDimitry Andric }
265fe6060f1SDimitry Andric
266fe6060f1SDimitry Andric // Check the phis in the Endif, looking for value coming from the ELSE
267fe6060f1SDimitry Andric // region. Make sure the phi-use is the last use.
268fe6060f1SDimitry Andric for (auto &MI : Endif->phis()) {
269fe6060f1SDimitry Andric for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270fe6060f1SDimitry Andric auto &MO = MI.getOperand(Idx);
271fe6060f1SDimitry Andric auto *Pred = MI.getOperand(Idx + 1).getMBB();
272fe6060f1SDimitry Andric if (Pred == Flow)
273fe6060f1SDimitry Andric continue;
274fe6060f1SDimitry Andric assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275fe6060f1SDimitry Andric
276fe6060f1SDimitry Andric if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277fe6060f1SDimitry Andric continue;
278fe6060f1SDimitry Andric
279fe6060f1SDimitry Andric Register Reg = MO.getReg();
280fe6060f1SDimitry Andric if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281fe6060f1SDimitry Andric continue;
282fe6060f1SDimitry Andric
283fe6060f1SDimitry Andric LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284fe6060f1SDimitry Andric
285fe6060f1SDimitry Andric if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287fe6060f1SDimitry Andric << " as Live in Endif\n");
288fe6060f1SDimitry Andric continue;
289fe6060f1SDimitry Andric }
290fe6060f1SDimitry Andric // Make sure two conditions are met:
291fe6060f1SDimitry Andric // a.) the value is defined before/in the IF block
292fe6060f1SDimitry Andric // b.) should be defined in the same loop-level.
293fe6060f1SDimitry Andric const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294fe6060f1SDimitry Andric if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295fe6060f1SDimitry Andric Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296fe6060f1SDimitry Andric KillsInElse.insert(Reg);
297fe6060f1SDimitry Andric }
298fe6060f1SDimitry Andric }
299fe6060f1SDimitry Andric
300fe6060f1SDimitry Andric auto IsLiveThroughThen = [&](Register Reg) {
301fe6060f1SDimitry Andric for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302fe6060f1SDimitry Andric ++I) {
303fe6060f1SDimitry Andric if (!I->readsReg())
304fe6060f1SDimitry Andric continue;
305fe6060f1SDimitry Andric auto *UseMI = I->getParent();
306fe6060f1SDimitry Andric auto *UseMBB = UseMI->getParent();
307fe6060f1SDimitry Andric if (UseMBB == Flow || UseMBB == Endif) {
308fe6060f1SDimitry Andric if (!UseMI->isPHI())
309fe6060f1SDimitry Andric return true;
310fe6060f1SDimitry Andric
311fe6060f1SDimitry Andric auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312fe6060f1SDimitry Andric // The register is live through the path If->Flow or Flow->Endif.
313fe6060f1SDimitry Andric // we should not optimize for such cases.
314fe6060f1SDimitry Andric if ((UseMBB == Flow && IncomingMBB != If) ||
315fe6060f1SDimitry Andric (UseMBB == Endif && IncomingMBB == Flow))
316fe6060f1SDimitry Andric return true;
317fe6060f1SDimitry Andric }
318fe6060f1SDimitry Andric }
319fe6060f1SDimitry Andric return false;
320fe6060f1SDimitry Andric };
321fe6060f1SDimitry Andric
322fe6060f1SDimitry Andric for (auto Reg : KillsInElse) {
323fe6060f1SDimitry Andric if (!IsLiveThroughThen(Reg))
324fe6060f1SDimitry Andric CandidateRegs.push_back(Reg);
325fe6060f1SDimitry Andric }
326fe6060f1SDimitry Andric }
327fe6060f1SDimitry Andric
328fe6060f1SDimitry Andric /// Collect the registers used in the waterfall loop block that are defined
329fe6060f1SDimitry Andric /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * LoopHeader,MachineBasicBlock * LoopEnd,SmallSetVector<Register,16> & CandidateRegs,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const330fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
33181ad6265SDimitry Andric MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
33281ad6265SDimitry Andric SmallSetVector<Register, 16> &CandidateRegs,
33381ad6265SDimitry Andric SmallSetVector<MachineBasicBlock *, 2> &Blocks,
33481ad6265SDimitry Andric SmallVectorImpl<MachineInstr *> &Instructions) const {
335fe6060f1SDimitry Andric
33681ad6265SDimitry Andric // Collect loop instructions, potentially spanning multiple blocks
33781ad6265SDimitry Andric auto *MBB = LoopHeader;
33881ad6265SDimitry Andric for (;;) {
33981ad6265SDimitry Andric Blocks.insert(MBB);
34081ad6265SDimitry Andric for (auto &MI : *MBB) {
341fe6060f1SDimitry Andric if (MI.isDebugInstr())
342fe6060f1SDimitry Andric continue;
34381ad6265SDimitry Andric Instructions.push_back(&MI);
34481ad6265SDimitry Andric }
34581ad6265SDimitry Andric if (MBB == LoopEnd)
34681ad6265SDimitry Andric break;
34781ad6265SDimitry Andric
34881ad6265SDimitry Andric if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
34981ad6265SDimitry Andric (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
35081ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
35181ad6265SDimitry Andric return;
35281ad6265SDimitry Andric }
35381ad6265SDimitry Andric
35481ad6265SDimitry Andric MBB = *MBB->succ_begin();
35581ad6265SDimitry Andric }
35681ad6265SDimitry Andric
35781ad6265SDimitry Andric for (auto *I : Instructions) {
35881ad6265SDimitry Andric auto &MI = *I;
359fe6060f1SDimitry Andric
36006c3fb27SDimitry Andric for (auto &MO : MI.all_uses()) {
36106c3fb27SDimitry Andric if (!MO.getReg())
362fe6060f1SDimitry Andric continue;
363fe6060f1SDimitry Andric
364fe6060f1SDimitry Andric Register MOReg = MO.getReg();
365fe6060f1SDimitry Andric // We can only optimize AGPR/VGPR virtual register
366fe6060f1SDimitry Andric if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367fe6060f1SDimitry Andric continue;
368fe6060f1SDimitry Andric
369fe6060f1SDimitry Andric if (MO.readsReg()) {
37081ad6265SDimitry Andric MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371fe6060f1SDimitry Andric // Make sure the value is defined before the LOOP block
37281ad6265SDimitry Andric if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373fe6060f1SDimitry Andric // If the variable is used after the loop, the register coalescer will
374fe6060f1SDimitry Andric // merge the newly created register and remove the phi node again.
375fe6060f1SDimitry Andric // Just do nothing in that case.
376fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377fe6060f1SDimitry Andric bool IsUsed = false;
37881ad6265SDimitry Andric for (auto *Succ : LoopEnd->successors()) {
37981ad6265SDimitry Andric if (!Blocks.contains(Succ) &&
38081ad6265SDimitry Andric OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381fe6060f1SDimitry Andric IsUsed = true;
382fe6060f1SDimitry Andric break;
383fe6060f1SDimitry Andric }
384fe6060f1SDimitry Andric }
385fe6060f1SDimitry Andric if (!IsUsed) {
386fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found candidate reg: "
387fe6060f1SDimitry Andric << printReg(MOReg, TRI, 0, MRI) << '\n');
388fe6060f1SDimitry Andric CandidateRegs.insert(MOReg);
389fe6060f1SDimitry Andric } else {
390fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391fe6060f1SDimitry Andric << printReg(MOReg, TRI, 0, MRI) << '\n');
392fe6060f1SDimitry Andric }
393fe6060f1SDimitry Andric }
394fe6060f1SDimitry Andric }
395fe6060f1SDimitry Andric }
396fe6060f1SDimitry Andric }
397fe6060f1SDimitry Andric }
398fe6060f1SDimitry Andric
399fe6060f1SDimitry Andric // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const400fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
40204eeddc0SDimitry Andric SetVector<MachineBasicBlock *> Blocks;
40304eeddc0SDimitry Andric SmallVector<MachineBasicBlock *> WorkList({If});
404fe6060f1SDimitry Andric
40504eeddc0SDimitry Andric // Collect all successors until we see the flow block, where we should
40604eeddc0SDimitry Andric // reconverge.
40704eeddc0SDimitry Andric while (!WorkList.empty()) {
40804eeddc0SDimitry Andric auto *MBB = WorkList.pop_back_val();
40904eeddc0SDimitry Andric for (auto *Succ : MBB->successors()) {
41004eeddc0SDimitry Andric if (Succ != Flow && !Blocks.contains(Succ)) {
41104eeddc0SDimitry Andric WorkList.push_back(Succ);
41204eeddc0SDimitry Andric Blocks.insert(Succ);
413fe6060f1SDimitry Andric }
414fe6060f1SDimitry Andric }
41504eeddc0SDimitry Andric }
416fe6060f1SDimitry Andric
417fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
41804eeddc0SDimitry Andric for (MachineBasicBlock *MBB : Blocks) {
419fe6060f1SDimitry Andric // Clear Live bit, as we will recalculate afterwards
420fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421fe6060f1SDimitry Andric << '\n');
422fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423fe6060f1SDimitry Andric }
424fe6060f1SDimitry Andric
42504eeddc0SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
42604eeddc0SDimitry Andric
427fe6060f1SDimitry Andric // Get the blocks the Reg should be alive through
428fe6060f1SDimitry Andric for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429fe6060f1SDimitry Andric ++I) {
430fe6060f1SDimitry Andric auto *UseMI = I->getParent();
431fe6060f1SDimitry Andric if (UseMI->isPHI() && I->readsReg()) {
43204eeddc0SDimitry Andric if (Blocks.contains(UseMI->getParent()))
433fe6060f1SDimitry Andric PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434fe6060f1SDimitry Andric }
435fe6060f1SDimitry Andric }
436fe6060f1SDimitry Andric
43704eeddc0SDimitry Andric for (MachineBasicBlock *MBB : Blocks) {
438fe6060f1SDimitry Andric SmallVector<MachineInstr *> Uses;
439fe6060f1SDimitry Andric // PHI instructions has been processed before.
440fe6060f1SDimitry Andric findNonPHIUsesInBlock(Reg, MBB, Uses);
441fe6060f1SDimitry Andric
442fe6060f1SDimitry Andric if (Uses.size() == 1) {
443fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444fe6060f1SDimitry Andric << printMBBReference(*MBB) << '\n');
445fe6060f1SDimitry Andric LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446fe6060f1SDimitry Andric } else if (Uses.size() > 1) {
447fe6060f1SDimitry Andric // Process the instructions in-order
448fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449fe6060f1SDimitry Andric << printMBBReference(*MBB) << '\n');
450fe6060f1SDimitry Andric for (MachineInstr &MI : *MBB) {
451fe6060f1SDimitry Andric if (llvm::is_contained(Uses, &MI))
452fe6060f1SDimitry Andric LV->HandleVirtRegUse(Reg, MBB, MI);
453fe6060f1SDimitry Andric }
454fe6060f1SDimitry Andric }
455fe6060f1SDimitry Andric
456fe6060f1SDimitry Andric // Mark Reg alive through the block if this is a PHI incoming block
457fe6060f1SDimitry Andric if (PHIIncoming.contains(MBB))
458fe6060f1SDimitry Andric LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459fe6060f1SDimitry Andric MBB);
460fe6060f1SDimitry Andric }
461fe6060f1SDimitry Andric
462fe6060f1SDimitry Andric // Set the isKilled flag if we get new Kills in the THEN region.
463fe6060f1SDimitry Andric for (auto *MI : OldVarInfo.Kills) {
46404eeddc0SDimitry Andric if (Blocks.contains(MI->getParent()))
465fe6060f1SDimitry Andric MI->addRegisterKilled(Reg, TRI);
466fe6060f1SDimitry Andric }
467fe6060f1SDimitry Andric }
468fe6060f1SDimitry Andric
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const469fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470fe6060f1SDimitry Andric Register Reg, Register NewReg, MachineBasicBlock *Flow,
471fe6060f1SDimitry Andric MachineBasicBlock *Endif,
472fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473fe6060f1SDimitry Andric LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475fe6060f1SDimitry Andric
476fe6060f1SDimitry Andric // Transfer aliveBlocks from Reg to NewReg
477fe6060f1SDimitry Andric for (auto *MBB : ElseBlocks) {
478fe6060f1SDimitry Andric unsigned BBNum = MBB->getNumber();
479fe6060f1SDimitry Andric if (OldVarInfo.AliveBlocks.test(BBNum)) {
480fe6060f1SDimitry Andric NewVarInfo.AliveBlocks.set(BBNum);
481fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482fe6060f1SDimitry Andric << '\n');
483fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(BBNum);
484fe6060f1SDimitry Andric }
485fe6060f1SDimitry Andric }
486fe6060f1SDimitry Andric
487fe6060f1SDimitry Andric // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488fe6060f1SDimitry Andric auto I = OldVarInfo.Kills.begin();
489fe6060f1SDimitry Andric while (I != OldVarInfo.Kills.end()) {
490fe6060f1SDimitry Andric if (ElseBlocks.contains((*I)->getParent())) {
491fe6060f1SDimitry Andric NewVarInfo.Kills.push_back(*I);
492fe6060f1SDimitry Andric I = OldVarInfo.Kills.erase(I);
493fe6060f1SDimitry Andric } else {
494fe6060f1SDimitry Andric ++I;
495fe6060f1SDimitry Andric }
496fe6060f1SDimitry Andric }
497fe6060f1SDimitry Andric }
498fe6060f1SDimitry Andric
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const499fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501fe6060f1SDimitry Andric MachineBasicBlock *Endif,
502fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503fe6060f1SDimitry Andric // Insert a new PHI, marking the value from the THEN region being
504fe6060f1SDimitry Andric // undef.
505fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506fe6060f1SDimitry Andric const auto *RC = MRI->getRegClass(Reg);
507fe6060f1SDimitry Andric Register NewReg = MRI->createVirtualRegister(RC);
508fe6060f1SDimitry Andric Register UndefReg = MRI->createVirtualRegister(RC);
509fe6060f1SDimitry Andric MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510fe6060f1SDimitry Andric TII->get(TargetOpcode::PHI), NewReg);
511fe6060f1SDimitry Andric for (auto *Pred : Flow->predecessors()) {
512fe6060f1SDimitry Andric if (Pred == If)
513fe6060f1SDimitry Andric PHI.addReg(Reg).addMBB(Pred);
514fe6060f1SDimitry Andric else
515fe6060f1SDimitry Andric PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516fe6060f1SDimitry Andric }
517fe6060f1SDimitry Andric
518fe6060f1SDimitry Andric // Replace all uses in the ELSE region or the PHIs in ENDIF block
519fe6060f1SDimitry Andric // Use early increment range because setReg() will update the linked list.
520fe6060f1SDimitry Andric for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521fe6060f1SDimitry Andric auto *UseMI = O.getParent();
522fe6060f1SDimitry Andric auto *UseBlock = UseMI->getParent();
523fe6060f1SDimitry Andric // Replace uses in Endif block
524fe6060f1SDimitry Andric if (UseBlock == Endif) {
5255f757f3fSDimitry Andric if (UseMI->isPHI())
526fe6060f1SDimitry Andric O.setReg(NewReg);
5275f757f3fSDimitry Andric else if (UseMI->isDebugInstr())
5285f757f3fSDimitry Andric continue;
5295f757f3fSDimitry Andric else {
53006c3fb27SDimitry Andric // DetectDeadLanes may mark register uses as undef without removing
53106c3fb27SDimitry Andric // them, in which case a non-phi instruction using the original register
53206c3fb27SDimitry Andric // may exist in the Endif block even though the register is not live
53306c3fb27SDimitry Andric // into it.
53406c3fb27SDimitry Andric assert(!O.readsReg());
53506c3fb27SDimitry Andric }
536fe6060f1SDimitry Andric continue;
537fe6060f1SDimitry Andric }
538fe6060f1SDimitry Andric
539fe6060f1SDimitry Andric // Replace uses in Else region
540fe6060f1SDimitry Andric if (ElseBlocks.contains(UseBlock))
541fe6060f1SDimitry Andric O.setReg(NewReg);
542fe6060f1SDimitry Andric }
543fe6060f1SDimitry Andric
544fe6060f1SDimitry Andric // The optimized Reg is not alive through Flow blocks anymore.
545fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
546fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(Flow->getNumber());
547fe6060f1SDimitry Andric
548fe6060f1SDimitry Andric updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
549fe6060f1SDimitry Andric updateLiveRangeInThenRegion(Reg, If, Flow);
550fe6060f1SDimitry Andric }
551fe6060f1SDimitry Andric
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * LoopHeader,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const552fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
55381ad6265SDimitry Andric Register Reg, MachineBasicBlock *LoopHeader,
55481ad6265SDimitry Andric SmallSetVector<MachineBasicBlock *, 2> &Blocks,
55581ad6265SDimitry Andric SmallVectorImpl<MachineInstr *> &Instructions) const {
556fe6060f1SDimitry Andric // Insert a new PHI, marking the value from the last loop iteration undef.
557fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
558fe6060f1SDimitry Andric const auto *RC = MRI->getRegClass(Reg);
559fe6060f1SDimitry Andric Register NewReg = MRI->createVirtualRegister(RC);
560fe6060f1SDimitry Andric Register UndefReg = MRI->createVirtualRegister(RC);
561fe6060f1SDimitry Andric
562fe6060f1SDimitry Andric // Replace all uses in the LOOP region
563fe6060f1SDimitry Andric // Use early increment range because setReg() will update the linked list.
564fe6060f1SDimitry Andric for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
565fe6060f1SDimitry Andric auto *UseMI = O.getParent();
566fe6060f1SDimitry Andric auto *UseBlock = UseMI->getParent();
56781ad6265SDimitry Andric // Replace uses in Loop blocks
56881ad6265SDimitry Andric if (Blocks.contains(UseBlock))
569fe6060f1SDimitry Andric O.setReg(NewReg);
570fe6060f1SDimitry Andric }
571fe6060f1SDimitry Andric
57281ad6265SDimitry Andric MachineInstrBuilder PHI =
57381ad6265SDimitry Andric BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
574fe6060f1SDimitry Andric TII->get(TargetOpcode::PHI), NewReg);
57581ad6265SDimitry Andric for (auto *Pred : LoopHeader->predecessors()) {
57681ad6265SDimitry Andric if (Blocks.contains(Pred))
577fe6060f1SDimitry Andric PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
578fe6060f1SDimitry Andric else
579fe6060f1SDimitry Andric PHI.addReg(Reg).addMBB(Pred);
580fe6060f1SDimitry Andric }
581fe6060f1SDimitry Andric
582fe6060f1SDimitry Andric LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
583fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
584fe6060f1SDimitry Andric
58581ad6265SDimitry Andric // Find last use and mark as kill
58681ad6265SDimitry Andric MachineInstr *Kill = nullptr;
58781ad6265SDimitry Andric for (auto *MI : reverse(Instructions)) {
58881ad6265SDimitry Andric if (MI->readsRegister(NewReg, TRI)) {
58981ad6265SDimitry Andric MI->addRegisterKilled(NewReg, TRI);
59081ad6265SDimitry Andric NewVarInfo.Kills.push_back(MI);
59181ad6265SDimitry Andric Kill = MI;
592fe6060f1SDimitry Andric break;
593fe6060f1SDimitry Andric }
594fe6060f1SDimitry Andric }
59581ad6265SDimitry Andric assert(Kill && "Failed to find last usage of register in loop");
59681ad6265SDimitry Andric
59781ad6265SDimitry Andric MachineBasicBlock *KillBlock = Kill->getParent();
59881ad6265SDimitry Andric bool PostKillBlock = false;
59981ad6265SDimitry Andric for (auto *Block : Blocks) {
60081ad6265SDimitry Andric auto BBNum = Block->getNumber();
60181ad6265SDimitry Andric
60281ad6265SDimitry Andric // collectWaterfallCandidateRegisters only collects registers that are dead
60381ad6265SDimitry Andric // after the loop. So we know that the old reg is no longer live throughout
60481ad6265SDimitry Andric // the waterfall loop.
60581ad6265SDimitry Andric OldVarInfo.AliveBlocks.reset(BBNum);
60681ad6265SDimitry Andric
60781ad6265SDimitry Andric // The new register is live up to (and including) the block that kills it.
60881ad6265SDimitry Andric PostKillBlock |= (Block == KillBlock);
60981ad6265SDimitry Andric if (PostKillBlock) {
61081ad6265SDimitry Andric NewVarInfo.AliveBlocks.reset(BBNum);
61181ad6265SDimitry Andric } else if (Block != LoopHeader) {
61281ad6265SDimitry Andric NewVarInfo.AliveBlocks.set(BBNum);
61381ad6265SDimitry Andric }
61481ad6265SDimitry Andric }
615fe6060f1SDimitry Andric }
616fe6060f1SDimitry Andric
617fe6060f1SDimitry Andric char SIOptimizeVGPRLiveRange::ID = 0;
618fe6060f1SDimitry Andric
619fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
620fe6060f1SDimitry Andric "SI Optimize VGPR LiveRange", false, false)
621*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
622*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
623*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
624fe6060f1SDimitry Andric INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
625fe6060f1SDimitry Andric "SI Optimize VGPR LiveRange", false, false)
626fe6060f1SDimitry Andric
627fe6060f1SDimitry Andric char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
628fe6060f1SDimitry Andric
createSIOptimizeVGPRLiveRangePass()629fe6060f1SDimitry Andric FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
630fe6060f1SDimitry Andric return new SIOptimizeVGPRLiveRange();
631fe6060f1SDimitry Andric }
632fe6060f1SDimitry Andric
runOnMachineFunction(MachineFunction & MF)633fe6060f1SDimitry Andric bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
634fe6060f1SDimitry Andric
635fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
636fe6060f1SDimitry Andric TII = ST.getInstrInfo();
637fe6060f1SDimitry Andric TRI = &TII->getRegisterInfo();
638*0fca6ea1SDimitry Andric MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
639*0fca6ea1SDimitry Andric Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
640*0fca6ea1SDimitry Andric LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
641fe6060f1SDimitry Andric MRI = &MF.getRegInfo();
642fe6060f1SDimitry Andric
643fe6060f1SDimitry Andric if (skipFunction(MF.getFunction()))
644fe6060f1SDimitry Andric return false;
645fe6060f1SDimitry Andric
646fe6060f1SDimitry Andric bool MadeChange = false;
647fe6060f1SDimitry Andric
648fe6060f1SDimitry Andric // TODO: we need to think about the order of visiting the blocks to get
649fe6060f1SDimitry Andric // optimal result for nesting if-else cases.
650fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
651fe6060f1SDimitry Andric for (auto &MI : MBB.terminators()) {
652fe6060f1SDimitry Andric // Detect the if-else blocks
653fe6060f1SDimitry Andric if (MI.getOpcode() == AMDGPU::SI_IF) {
654fe6060f1SDimitry Andric MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
655fe6060f1SDimitry Andric auto *Endif = getElseTarget(IfTarget);
656fe6060f1SDimitry Andric if (!Endif)
657fe6060f1SDimitry Andric continue;
658fe6060f1SDimitry Andric
65981ad6265SDimitry Andric // Skip unexpected control flow.
66081ad6265SDimitry Andric if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
66181ad6265SDimitry Andric continue;
66281ad6265SDimitry Andric
663fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
664fe6060f1SDimitry Andric SmallVector<Register> CandidateRegs;
665fe6060f1SDimitry Andric
666fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
667fe6060f1SDimitry Andric << printMBBReference(MBB) << ' '
668fe6060f1SDimitry Andric << printMBBReference(*IfTarget) << ' '
669fe6060f1SDimitry Andric << printMBBReference(*Endif) << '\n');
670fe6060f1SDimitry Andric
671fe6060f1SDimitry Andric // Collect all the blocks in the ELSE region
672fe6060f1SDimitry Andric collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
673fe6060f1SDimitry Andric
674fe6060f1SDimitry Andric // Collect the registers can be optimized
675fe6060f1SDimitry Andric collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
676fe6060f1SDimitry Andric CandidateRegs);
677fe6060f1SDimitry Andric MadeChange |= !CandidateRegs.empty();
678fe6060f1SDimitry Andric // Now we are safe to optimize.
679fe6060f1SDimitry Andric for (auto Reg : CandidateRegs)
680fe6060f1SDimitry Andric optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
681fe6060f1SDimitry Andric } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
68281ad6265SDimitry Andric auto *LoopHeader = MI.getOperand(0).getMBB();
68381ad6265SDimitry Andric auto *LoopEnd = &MBB;
68481ad6265SDimitry Andric
685fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
68681ad6265SDimitry Andric << printMBBReference(*LoopHeader) << '\n');
687fe6060f1SDimitry Andric
688fe6060f1SDimitry Andric SmallSetVector<Register, 16> CandidateRegs;
68981ad6265SDimitry Andric SmallVector<MachineInstr *, 16> Instructions;
69081ad6265SDimitry Andric SmallSetVector<MachineBasicBlock *, 2> Blocks;
69181ad6265SDimitry Andric
69281ad6265SDimitry Andric collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
69381ad6265SDimitry Andric Blocks, Instructions);
694fe6060f1SDimitry Andric MadeChange |= !CandidateRegs.empty();
695fe6060f1SDimitry Andric // Now we are safe to optimize.
696fe6060f1SDimitry Andric for (auto Reg : CandidateRegs)
69781ad6265SDimitry Andric optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
698fe6060f1SDimitry Andric }
699fe6060f1SDimitry Andric }
700fe6060f1SDimitry Andric }
701fe6060f1SDimitry Andric
702fe6060f1SDimitry Andric return MadeChange;
703fe6060f1SDimitry Andric }
704