xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric /// \file
10*fe6060f1SDimitry Andric /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11*fe6060f1SDimitry Andric /// structures and waterfall loops.
12*fe6060f1SDimitry Andric ///
13*fe6060f1SDimitry Andric /// When we do structurization, we usually transform an if-else into two
14*fe6060f1SDimitry Andric /// sucessive if-then (with a flow block to do predicate inversion). Consider a
15*fe6060f1SDimitry Andric /// simple case after structurization: A divergent value %a was defined before
16*fe6060f1SDimitry Andric /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17*fe6060f1SDimitry Andric ///    bb.if:
18*fe6060f1SDimitry Andric ///      %a = ...
19*fe6060f1SDimitry Andric ///      ...
20*fe6060f1SDimitry Andric ///    bb.then:
21*fe6060f1SDimitry Andric ///      ... = op %a
22*fe6060f1SDimitry Andric ///      ... // %a can be dead here
23*fe6060f1SDimitry Andric ///    bb.flow:
24*fe6060f1SDimitry Andric ///      ...
25*fe6060f1SDimitry Andric ///    bb.else:
26*fe6060f1SDimitry Andric ///      ... = %a
27*fe6060f1SDimitry Andric ///      ...
28*fe6060f1SDimitry Andric ///    bb.endif
29*fe6060f1SDimitry Andric ///
30*fe6060f1SDimitry Andric ///  As register allocator has no idea of the thread-control-flow, it will just
31*fe6060f1SDimitry Andric ///  assume %a would be alive in the whole range of bb.then because of a later
32*fe6060f1SDimitry Andric ///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33*fe6060f1SDimitry Andric ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34*fe6060f1SDimitry Andric ///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35*fe6060f1SDimitry Andric ///  after the last use in bb.then until the end of the block. The reason is
36*fe6060f1SDimitry Andric ///  the instructions in bb.then will only overwrite lanes that will never be
37*fe6060f1SDimitry Andric ///  accessed in bb.else.
38*fe6060f1SDimitry Andric ///
39*fe6060f1SDimitry Andric ///  This pass aims to to tell register allocator that %a is in-fact dead,
40*fe6060f1SDimitry Andric ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41*fe6060f1SDimitry Andric ///  from bb.then, and then replace the uses in the bb.else with the result of
42*fe6060f1SDimitry Andric ///  newly inserted phi.
43*fe6060f1SDimitry Andric ///
44*fe6060f1SDimitry Andric ///  Two key conditions must be met to ensure correctness:
45*fe6060f1SDimitry Andric ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46*fe6060f1SDimitry Andric ///      sure the second loop iteration still get correct data.
47*fe6060f1SDimitry Andric ///  2.) There should be no further uses after the IF-ELSE region.
48*fe6060f1SDimitry Andric ///
49*fe6060f1SDimitry Andric ///
50*fe6060f1SDimitry Andric /// Waterfall loops get inserted around instructions that use divergent values
51*fe6060f1SDimitry Andric /// but can only be executed with a uniform value. For example an indirect call
52*fe6060f1SDimitry Andric /// to a divergent address:
53*fe6060f1SDimitry Andric ///    bb.start:
54*fe6060f1SDimitry Andric ///      %a = ...
55*fe6060f1SDimitry Andric ///      %fun = ...
56*fe6060f1SDimitry Andric ///      ...
57*fe6060f1SDimitry Andric ///    bb.loop:
58*fe6060f1SDimitry Andric ///      call %fun (%a)
59*fe6060f1SDimitry Andric ///      ... // %a can be dead here
60*fe6060f1SDimitry Andric ///      loop %bb.loop
61*fe6060f1SDimitry Andric ///
62*fe6060f1SDimitry Andric ///  The loop block is executed multiple times, but it is run exactly once for
63*fe6060f1SDimitry Andric ///  each active lane. Similar to the if-else case, the register allocator
64*fe6060f1SDimitry Andric ///  assumes that %a is live throughout the loop as it is used again in the next
65*fe6060f1SDimitry Andric ///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66*fe6060f1SDimitry Andric ///  to be live after its last use in the loop block. By inserting a phi-node at
67*fe6060f1SDimitry Andric ///  the start of bb.loop that is undef when coming from bb.loop, the register
68*fe6060f1SDimitry Andric ///  allocation knows that the value of %a does not need to be preserved through
69*fe6060f1SDimitry Andric ///  iterations of the loop.
70*fe6060f1SDimitry Andric ///
71*fe6060f1SDimitry Andric //
72*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
73*fe6060f1SDimitry Andric 
74*fe6060f1SDimitry Andric #include "AMDGPU.h"
75*fe6060f1SDimitry Andric #include "GCNSubtarget.h"
76*fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77*fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
78*fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveVariables.h"
79*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
80*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
81*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
82*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
83*fe6060f1SDimitry Andric 
84*fe6060f1SDimitry Andric using namespace llvm;
85*fe6060f1SDimitry Andric 
86*fe6060f1SDimitry Andric #define DEBUG_TYPE "si-opt-vgpr-liverange"
87*fe6060f1SDimitry Andric 
88*fe6060f1SDimitry Andric namespace {
89*fe6060f1SDimitry Andric 
90*fe6060f1SDimitry Andric class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91*fe6060f1SDimitry Andric private:
92*fe6060f1SDimitry Andric   const SIRegisterInfo *TRI = nullptr;
93*fe6060f1SDimitry Andric   const SIInstrInfo *TII = nullptr;
94*fe6060f1SDimitry Andric   LiveVariables *LV = nullptr;
95*fe6060f1SDimitry Andric   MachineDominatorTree *MDT = nullptr;
96*fe6060f1SDimitry Andric   const MachineLoopInfo *Loops = nullptr;
97*fe6060f1SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
98*fe6060f1SDimitry Andric 
99*fe6060f1SDimitry Andric public:
100*fe6060f1SDimitry Andric   static char ID;
101*fe6060f1SDimitry Andric 
102*fe6060f1SDimitry Andric   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103*fe6060f1SDimitry Andric 
104*fe6060f1SDimitry Andric   void collectElseRegionBlocks(MachineBasicBlock *Flow,
105*fe6060f1SDimitry Andric                                MachineBasicBlock *Endif,
106*fe6060f1SDimitry Andric                                SmallSetVector<MachineBasicBlock *, 16> &) const;
107*fe6060f1SDimitry Andric 
108*fe6060f1SDimitry Andric   void
109*fe6060f1SDimitry Andric   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110*fe6060f1SDimitry Andric                             MachineBasicBlock *Endif,
111*fe6060f1SDimitry Andric                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112*fe6060f1SDimitry Andric                             SmallVectorImpl<Register> &CandidateRegs) const;
113*fe6060f1SDimitry Andric 
114*fe6060f1SDimitry Andric   void collectWaterfallCandidateRegisters(
115*fe6060f1SDimitry Andric       MachineBasicBlock *Loop,
116*fe6060f1SDimitry Andric       SmallSetVector<Register, 16> &CandidateRegs) const;
117*fe6060f1SDimitry Andric 
118*fe6060f1SDimitry Andric   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
119*fe6060f1SDimitry Andric                              SmallVectorImpl<MachineInstr *> &Uses) const;
120*fe6060f1SDimitry Andric 
121*fe6060f1SDimitry Andric   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
122*fe6060f1SDimitry Andric                                    MachineBasicBlock *Flow) const;
123*fe6060f1SDimitry Andric 
124*fe6060f1SDimitry Andric   void updateLiveRangeInElseRegion(
125*fe6060f1SDimitry Andric       Register Reg, Register NewReg, MachineBasicBlock *Flow,
126*fe6060f1SDimitry Andric       MachineBasicBlock *Endif,
127*fe6060f1SDimitry Andric       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
128*fe6060f1SDimitry Andric 
129*fe6060f1SDimitry Andric   void
130*fe6060f1SDimitry Andric   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
131*fe6060f1SDimitry Andric                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
132*fe6060f1SDimitry Andric                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
133*fe6060f1SDimitry Andric 
134*fe6060f1SDimitry Andric   void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const;
135*fe6060f1SDimitry Andric 
136*fe6060f1SDimitry Andric   SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
137*fe6060f1SDimitry Andric 
138*fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
139*fe6060f1SDimitry Andric 
140*fe6060f1SDimitry Andric   StringRef getPassName() const override {
141*fe6060f1SDimitry Andric     return "SI Optimize VGPR LiveRange";
142*fe6060f1SDimitry Andric   }
143*fe6060f1SDimitry Andric 
144*fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
145*fe6060f1SDimitry Andric     AU.addRequired<LiveVariables>();
146*fe6060f1SDimitry Andric     AU.addRequired<MachineDominatorTree>();
147*fe6060f1SDimitry Andric     AU.addRequired<MachineLoopInfo>();
148*fe6060f1SDimitry Andric     AU.addPreserved<LiveVariables>();
149*fe6060f1SDimitry Andric     AU.addPreserved<MachineDominatorTree>();
150*fe6060f1SDimitry Andric     AU.addPreserved<MachineLoopInfo>();
151*fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
152*fe6060f1SDimitry Andric   }
153*fe6060f1SDimitry Andric 
154*fe6060f1SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
155*fe6060f1SDimitry Andric     return MachineFunctionProperties().set(
156*fe6060f1SDimitry Andric         MachineFunctionProperties::Property::IsSSA);
157*fe6060f1SDimitry Andric   }
158*fe6060f1SDimitry Andric };
159*fe6060f1SDimitry Andric 
160*fe6060f1SDimitry Andric } // end anonymous namespace
161*fe6060f1SDimitry Andric 
162*fe6060f1SDimitry Andric // Check whether the MBB is a else flow block and get the branching target which
163*fe6060f1SDimitry Andric // is the Endif block
164*fe6060f1SDimitry Andric MachineBasicBlock *
165*fe6060f1SDimitry Andric SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
166*fe6060f1SDimitry Andric   for (auto &BR : MBB->terminators()) {
167*fe6060f1SDimitry Andric     if (BR.getOpcode() == AMDGPU::SI_ELSE)
168*fe6060f1SDimitry Andric       return BR.getOperand(2).getMBB();
169*fe6060f1SDimitry Andric   }
170*fe6060f1SDimitry Andric   return nullptr;
171*fe6060f1SDimitry Andric }
172*fe6060f1SDimitry Andric 
173*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
174*fe6060f1SDimitry Andric     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
175*fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
176*fe6060f1SDimitry Andric   assert(Flow != Endif);
177*fe6060f1SDimitry Andric 
178*fe6060f1SDimitry Andric   MachineBasicBlock *MBB = Endif;
179*fe6060f1SDimitry Andric   unsigned Cur = 0;
180*fe6060f1SDimitry Andric   while (MBB) {
181*fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
182*fe6060f1SDimitry Andric       if (Pred != Flow && !Blocks.contains(Pred))
183*fe6060f1SDimitry Andric         Blocks.insert(Pred);
184*fe6060f1SDimitry Andric     }
185*fe6060f1SDimitry Andric 
186*fe6060f1SDimitry Andric     if (Cur < Blocks.size())
187*fe6060f1SDimitry Andric       MBB = Blocks[Cur++];
188*fe6060f1SDimitry Andric     else
189*fe6060f1SDimitry Andric       MBB = nullptr;
190*fe6060f1SDimitry Andric   }
191*fe6060f1SDimitry Andric 
192*fe6060f1SDimitry Andric   LLVM_DEBUG({
193*fe6060f1SDimitry Andric     dbgs() << "Found Else blocks: ";
194*fe6060f1SDimitry Andric     for (auto *MBB : Blocks)
195*fe6060f1SDimitry Andric       dbgs() << printMBBReference(*MBB) << ' ';
196*fe6060f1SDimitry Andric     dbgs() << '\n';
197*fe6060f1SDimitry Andric   });
198*fe6060f1SDimitry Andric }
199*fe6060f1SDimitry Andric 
200*fe6060f1SDimitry Andric /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
201*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
202*fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *MBB,
203*fe6060f1SDimitry Andric     SmallVectorImpl<MachineInstr *> &Uses) const {
204*fe6060f1SDimitry Andric   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
205*fe6060f1SDimitry Andric     if (UseMI.getParent() == MBB && !UseMI.isPHI())
206*fe6060f1SDimitry Andric       Uses.push_back(&UseMI);
207*fe6060f1SDimitry Andric   }
208*fe6060f1SDimitry Andric }
209*fe6060f1SDimitry Andric 
210*fe6060f1SDimitry Andric /// Collect the killed registers in the ELSE region which are not alive through
211*fe6060f1SDimitry Andric /// the whole THEN region.
212*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
213*fe6060f1SDimitry Andric     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
214*fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
215*fe6060f1SDimitry Andric     SmallVectorImpl<Register> &CandidateRegs) const {
216*fe6060f1SDimitry Andric 
217*fe6060f1SDimitry Andric   SmallSet<Register, 8> KillsInElse;
218*fe6060f1SDimitry Andric 
219*fe6060f1SDimitry Andric   for (auto *Else : ElseBlocks) {
220*fe6060f1SDimitry Andric     for (auto &MI : Else->instrs()) {
221*fe6060f1SDimitry Andric       if (MI.isDebugInstr())
222*fe6060f1SDimitry Andric         continue;
223*fe6060f1SDimitry Andric 
224*fe6060f1SDimitry Andric       for (auto &MO : MI.operands()) {
225*fe6060f1SDimitry Andric         if (!MO.isReg() || !MO.getReg() || MO.isDef())
226*fe6060f1SDimitry Andric           continue;
227*fe6060f1SDimitry Andric 
228*fe6060f1SDimitry Andric         Register MOReg = MO.getReg();
229*fe6060f1SDimitry Andric         // We can only optimize AGPR/VGPR virtual register
230*fe6060f1SDimitry Andric         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
231*fe6060f1SDimitry Andric           continue;
232*fe6060f1SDimitry Andric 
233*fe6060f1SDimitry Andric         if (MO.readsReg()) {
234*fe6060f1SDimitry Andric           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
235*fe6060f1SDimitry Andric           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
236*fe6060f1SDimitry Andric           // Make sure two conditions are met:
237*fe6060f1SDimitry Andric           // a.) the value is defined before/in the IF block
238*fe6060f1SDimitry Andric           // b.) should be defined in the same loop-level.
239*fe6060f1SDimitry Andric           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
240*fe6060f1SDimitry Andric               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
241*fe6060f1SDimitry Andric             // Check if the register is live into the endif block. If not,
242*fe6060f1SDimitry Andric             // consider it killed in the else region.
243*fe6060f1SDimitry Andric             LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
244*fe6060f1SDimitry Andric             if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
245*fe6060f1SDimitry Andric               KillsInElse.insert(MOReg);
246*fe6060f1SDimitry Andric             } else {
247*fe6060f1SDimitry Andric               LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
248*fe6060f1SDimitry Andric                                 << " as Live in Endif\n");
249*fe6060f1SDimitry Andric             }
250*fe6060f1SDimitry Andric           }
251*fe6060f1SDimitry Andric         }
252*fe6060f1SDimitry Andric       }
253*fe6060f1SDimitry Andric     }
254*fe6060f1SDimitry Andric   }
255*fe6060f1SDimitry Andric 
256*fe6060f1SDimitry Andric   // Check the phis in the Endif, looking for value coming from the ELSE
257*fe6060f1SDimitry Andric   // region. Make sure the phi-use is the last use.
258*fe6060f1SDimitry Andric   for (auto &MI : Endif->phis()) {
259*fe6060f1SDimitry Andric     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
260*fe6060f1SDimitry Andric       auto &MO = MI.getOperand(Idx);
261*fe6060f1SDimitry Andric       auto *Pred = MI.getOperand(Idx + 1).getMBB();
262*fe6060f1SDimitry Andric       if (Pred == Flow)
263*fe6060f1SDimitry Andric         continue;
264*fe6060f1SDimitry Andric       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
265*fe6060f1SDimitry Andric 
266*fe6060f1SDimitry Andric       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
267*fe6060f1SDimitry Andric         continue;
268*fe6060f1SDimitry Andric 
269*fe6060f1SDimitry Andric       Register Reg = MO.getReg();
270*fe6060f1SDimitry Andric       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
271*fe6060f1SDimitry Andric         continue;
272*fe6060f1SDimitry Andric 
273*fe6060f1SDimitry Andric       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
274*fe6060f1SDimitry Andric 
275*fe6060f1SDimitry Andric       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
276*fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
277*fe6060f1SDimitry Andric                           << " as Live in Endif\n");
278*fe6060f1SDimitry Andric         continue;
279*fe6060f1SDimitry Andric       }
280*fe6060f1SDimitry Andric       // Make sure two conditions are met:
281*fe6060f1SDimitry Andric       // a.) the value is defined before/in the IF block
282*fe6060f1SDimitry Andric       // b.) should be defined in the same loop-level.
283*fe6060f1SDimitry Andric       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
284*fe6060f1SDimitry Andric       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
285*fe6060f1SDimitry Andric           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
286*fe6060f1SDimitry Andric         KillsInElse.insert(Reg);
287*fe6060f1SDimitry Andric     }
288*fe6060f1SDimitry Andric   }
289*fe6060f1SDimitry Andric 
290*fe6060f1SDimitry Andric   auto IsLiveThroughThen = [&](Register Reg) {
291*fe6060f1SDimitry Andric     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
292*fe6060f1SDimitry Andric          ++I) {
293*fe6060f1SDimitry Andric       if (!I->readsReg())
294*fe6060f1SDimitry Andric         continue;
295*fe6060f1SDimitry Andric       auto *UseMI = I->getParent();
296*fe6060f1SDimitry Andric       auto *UseMBB = UseMI->getParent();
297*fe6060f1SDimitry Andric       if (UseMBB == Flow || UseMBB == Endif) {
298*fe6060f1SDimitry Andric         if (!UseMI->isPHI())
299*fe6060f1SDimitry Andric           return true;
300*fe6060f1SDimitry Andric 
301*fe6060f1SDimitry Andric         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
302*fe6060f1SDimitry Andric         // The register is live through the path If->Flow or Flow->Endif.
303*fe6060f1SDimitry Andric         // we should not optimize for such cases.
304*fe6060f1SDimitry Andric         if ((UseMBB == Flow && IncomingMBB != If) ||
305*fe6060f1SDimitry Andric             (UseMBB == Endif && IncomingMBB == Flow))
306*fe6060f1SDimitry Andric           return true;
307*fe6060f1SDimitry Andric       }
308*fe6060f1SDimitry Andric     }
309*fe6060f1SDimitry Andric     return false;
310*fe6060f1SDimitry Andric   };
311*fe6060f1SDimitry Andric 
312*fe6060f1SDimitry Andric   for (auto Reg : KillsInElse) {
313*fe6060f1SDimitry Andric     if (!IsLiveThroughThen(Reg))
314*fe6060f1SDimitry Andric       CandidateRegs.push_back(Reg);
315*fe6060f1SDimitry Andric   }
316*fe6060f1SDimitry Andric }
317*fe6060f1SDimitry Andric 
318*fe6060f1SDimitry Andric /// Collect the registers used in the waterfall loop block that are defined
319*fe6060f1SDimitry Andric /// before.
320*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
321*fe6060f1SDimitry Andric     MachineBasicBlock *Loop,
322*fe6060f1SDimitry Andric     SmallSetVector<Register, 16> &CandidateRegs) const {
323*fe6060f1SDimitry Andric 
324*fe6060f1SDimitry Andric   for (auto &MI : Loop->instrs()) {
325*fe6060f1SDimitry Andric     if (MI.isDebugInstr())
326*fe6060f1SDimitry Andric       continue;
327*fe6060f1SDimitry Andric 
328*fe6060f1SDimitry Andric     for (auto &MO : MI.operands()) {
329*fe6060f1SDimitry Andric       if (!MO.isReg() || !MO.getReg() || MO.isDef())
330*fe6060f1SDimitry Andric         continue;
331*fe6060f1SDimitry Andric 
332*fe6060f1SDimitry Andric       Register MOReg = MO.getReg();
333*fe6060f1SDimitry Andric       // We can only optimize AGPR/VGPR virtual register
334*fe6060f1SDimitry Andric       if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
335*fe6060f1SDimitry Andric         continue;
336*fe6060f1SDimitry Andric 
337*fe6060f1SDimitry Andric       if (MO.readsReg()) {
338*fe6060f1SDimitry Andric         const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
339*fe6060f1SDimitry Andric         // Make sure the value is defined before the LOOP block
340*fe6060f1SDimitry Andric         if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) {
341*fe6060f1SDimitry Andric           // If the variable is used after the loop, the register coalescer will
342*fe6060f1SDimitry Andric           // merge the newly created register and remove the phi node again.
343*fe6060f1SDimitry Andric           // Just do nothing in that case.
344*fe6060f1SDimitry Andric           LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
345*fe6060f1SDimitry Andric           bool IsUsed = false;
346*fe6060f1SDimitry Andric           for (auto *Succ : Loop->successors()) {
347*fe6060f1SDimitry Andric             if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
348*fe6060f1SDimitry Andric               IsUsed = true;
349*fe6060f1SDimitry Andric               break;
350*fe6060f1SDimitry Andric             }
351*fe6060f1SDimitry Andric           }
352*fe6060f1SDimitry Andric           if (!IsUsed) {
353*fe6060f1SDimitry Andric             LLVM_DEBUG(dbgs() << "Found candidate reg: "
354*fe6060f1SDimitry Andric                               << printReg(MOReg, TRI, 0, MRI) << '\n');
355*fe6060f1SDimitry Andric             CandidateRegs.insert(MOReg);
356*fe6060f1SDimitry Andric           } else {
357*fe6060f1SDimitry Andric             LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
358*fe6060f1SDimitry Andric                               << printReg(MOReg, TRI, 0, MRI) << '\n');
359*fe6060f1SDimitry Andric           }
360*fe6060f1SDimitry Andric         }
361*fe6060f1SDimitry Andric       }
362*fe6060f1SDimitry Andric     }
363*fe6060f1SDimitry Andric   }
364*fe6060f1SDimitry Andric }
365*fe6060f1SDimitry Andric 
366*fe6060f1SDimitry Andric // Re-calculate the liveness of \p Reg in the THEN-region
367*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
368*fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
369*fe6060f1SDimitry Andric 
370*fe6060f1SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
371*fe6060f1SDimitry Andric 
372*fe6060f1SDimitry Andric   MachineBasicBlock *ThenEntry = nullptr;
373*fe6060f1SDimitry Andric   for (auto *Succ : If->successors()) {
374*fe6060f1SDimitry Andric     if (Succ != Flow) {
375*fe6060f1SDimitry Andric       ThenEntry = Succ;
376*fe6060f1SDimitry Andric       break;
377*fe6060f1SDimitry Andric     }
378*fe6060f1SDimitry Andric   }
379*fe6060f1SDimitry Andric   assert(ThenEntry && "No successor in Then region?");
380*fe6060f1SDimitry Andric 
381*fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
382*fe6060f1SDimitry Andric   df_iterator_default_set<MachineBasicBlock *, 16> Visited;
383*fe6060f1SDimitry Andric 
384*fe6060f1SDimitry Andric   for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
385*fe6060f1SDimitry Andric     if (MBB == Flow)
386*fe6060f1SDimitry Andric       break;
387*fe6060f1SDimitry Andric 
388*fe6060f1SDimitry Andric     // Clear Live bit, as we will recalculate afterwards
389*fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
390*fe6060f1SDimitry Andric                       << '\n');
391*fe6060f1SDimitry Andric     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
392*fe6060f1SDimitry Andric   }
393*fe6060f1SDimitry Andric 
394*fe6060f1SDimitry Andric   // Get the blocks the Reg should be alive through
395*fe6060f1SDimitry Andric   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
396*fe6060f1SDimitry Andric        ++I) {
397*fe6060f1SDimitry Andric     auto *UseMI = I->getParent();
398*fe6060f1SDimitry Andric     if (UseMI->isPHI() && I->readsReg()) {
399*fe6060f1SDimitry Andric       if (Visited.contains(UseMI->getParent()))
400*fe6060f1SDimitry Andric         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
401*fe6060f1SDimitry Andric     }
402*fe6060f1SDimitry Andric   }
403*fe6060f1SDimitry Andric 
404*fe6060f1SDimitry Andric   Visited.clear();
405*fe6060f1SDimitry Andric 
406*fe6060f1SDimitry Andric   for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
407*fe6060f1SDimitry Andric     if (MBB == Flow)
408*fe6060f1SDimitry Andric       break;
409*fe6060f1SDimitry Andric 
410*fe6060f1SDimitry Andric     SmallVector<MachineInstr *> Uses;
411*fe6060f1SDimitry Andric     // PHI instructions has been processed before.
412*fe6060f1SDimitry Andric     findNonPHIUsesInBlock(Reg, MBB, Uses);
413*fe6060f1SDimitry Andric 
414*fe6060f1SDimitry Andric     if (Uses.size() == 1) {
415*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
416*fe6060f1SDimitry Andric                         << printMBBReference(*MBB) << '\n');
417*fe6060f1SDimitry Andric       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
418*fe6060f1SDimitry Andric     } else if (Uses.size() > 1) {
419*fe6060f1SDimitry Andric       // Process the instructions in-order
420*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
421*fe6060f1SDimitry Andric                         << printMBBReference(*MBB) << '\n');
422*fe6060f1SDimitry Andric       for (MachineInstr &MI : *MBB) {
423*fe6060f1SDimitry Andric         if (llvm::is_contained(Uses, &MI))
424*fe6060f1SDimitry Andric           LV->HandleVirtRegUse(Reg, MBB, MI);
425*fe6060f1SDimitry Andric       }
426*fe6060f1SDimitry Andric     }
427*fe6060f1SDimitry Andric 
428*fe6060f1SDimitry Andric     // Mark Reg alive through the block if this is a PHI incoming block
429*fe6060f1SDimitry Andric     if (PHIIncoming.contains(MBB))
430*fe6060f1SDimitry Andric       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
431*fe6060f1SDimitry Andric                                   MBB);
432*fe6060f1SDimitry Andric   }
433*fe6060f1SDimitry Andric 
434*fe6060f1SDimitry Andric   // Set the isKilled flag if we get new Kills in the THEN region.
435*fe6060f1SDimitry Andric   for (auto *MI : OldVarInfo.Kills) {
436*fe6060f1SDimitry Andric     if (Visited.contains(MI->getParent()))
437*fe6060f1SDimitry Andric       MI->addRegisterKilled(Reg, TRI);
438*fe6060f1SDimitry Andric   }
439*fe6060f1SDimitry Andric }
440*fe6060f1SDimitry Andric 
441*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
442*fe6060f1SDimitry Andric     Register Reg, Register NewReg, MachineBasicBlock *Flow,
443*fe6060f1SDimitry Andric     MachineBasicBlock *Endif,
444*fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
445*fe6060f1SDimitry Andric   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
446*fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
447*fe6060f1SDimitry Andric 
448*fe6060f1SDimitry Andric   // Transfer aliveBlocks from Reg to NewReg
449*fe6060f1SDimitry Andric   for (auto *MBB : ElseBlocks) {
450*fe6060f1SDimitry Andric     unsigned BBNum = MBB->getNumber();
451*fe6060f1SDimitry Andric     if (OldVarInfo.AliveBlocks.test(BBNum)) {
452*fe6060f1SDimitry Andric       NewVarInfo.AliveBlocks.set(BBNum);
453*fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
454*fe6060f1SDimitry Andric                         << '\n');
455*fe6060f1SDimitry Andric       OldVarInfo.AliveBlocks.reset(BBNum);
456*fe6060f1SDimitry Andric     }
457*fe6060f1SDimitry Andric   }
458*fe6060f1SDimitry Andric 
459*fe6060f1SDimitry Andric   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
460*fe6060f1SDimitry Andric   auto I = OldVarInfo.Kills.begin();
461*fe6060f1SDimitry Andric   while (I != OldVarInfo.Kills.end()) {
462*fe6060f1SDimitry Andric     if (ElseBlocks.contains((*I)->getParent())) {
463*fe6060f1SDimitry Andric       NewVarInfo.Kills.push_back(*I);
464*fe6060f1SDimitry Andric       I = OldVarInfo.Kills.erase(I);
465*fe6060f1SDimitry Andric     } else {
466*fe6060f1SDimitry Andric       ++I;
467*fe6060f1SDimitry Andric     }
468*fe6060f1SDimitry Andric   }
469*fe6060f1SDimitry Andric }
470*fe6060f1SDimitry Andric 
471*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeLiveRange(
472*fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
473*fe6060f1SDimitry Andric     MachineBasicBlock *Endif,
474*fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
475*fe6060f1SDimitry Andric   // Insert a new PHI, marking the value from the THEN region being
476*fe6060f1SDimitry Andric   // undef.
477*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
478*fe6060f1SDimitry Andric   const auto *RC = MRI->getRegClass(Reg);
479*fe6060f1SDimitry Andric   Register NewReg = MRI->createVirtualRegister(RC);
480*fe6060f1SDimitry Andric   Register UndefReg = MRI->createVirtualRegister(RC);
481*fe6060f1SDimitry Andric   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
482*fe6060f1SDimitry Andric                                     TII->get(TargetOpcode::PHI), NewReg);
483*fe6060f1SDimitry Andric   for (auto *Pred : Flow->predecessors()) {
484*fe6060f1SDimitry Andric     if (Pred == If)
485*fe6060f1SDimitry Andric       PHI.addReg(Reg).addMBB(Pred);
486*fe6060f1SDimitry Andric     else
487*fe6060f1SDimitry Andric       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
488*fe6060f1SDimitry Andric   }
489*fe6060f1SDimitry Andric 
490*fe6060f1SDimitry Andric   // Replace all uses in the ELSE region or the PHIs in ENDIF block
491*fe6060f1SDimitry Andric   // Use early increment range because setReg() will update the linked list.
492*fe6060f1SDimitry Andric   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
493*fe6060f1SDimitry Andric     auto *UseMI = O.getParent();
494*fe6060f1SDimitry Andric     auto *UseBlock = UseMI->getParent();
495*fe6060f1SDimitry Andric     // Replace uses in Endif block
496*fe6060f1SDimitry Andric     if (UseBlock == Endif) {
497*fe6060f1SDimitry Andric       assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
498*fe6060f1SDimitry Andric       O.setReg(NewReg);
499*fe6060f1SDimitry Andric       continue;
500*fe6060f1SDimitry Andric     }
501*fe6060f1SDimitry Andric 
502*fe6060f1SDimitry Andric     // Replace uses in Else region
503*fe6060f1SDimitry Andric     if (ElseBlocks.contains(UseBlock))
504*fe6060f1SDimitry Andric       O.setReg(NewReg);
505*fe6060f1SDimitry Andric   }
506*fe6060f1SDimitry Andric 
507*fe6060f1SDimitry Andric   // The optimized Reg is not alive through Flow blocks anymore.
508*fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
509*fe6060f1SDimitry Andric   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
510*fe6060f1SDimitry Andric 
511*fe6060f1SDimitry Andric   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
512*fe6060f1SDimitry Andric   updateLiveRangeInThenRegion(Reg, If, Flow);
513*fe6060f1SDimitry Andric }
514*fe6060f1SDimitry Andric 
515*fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
516*fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *Loop) const {
517*fe6060f1SDimitry Andric   // Insert a new PHI, marking the value from the last loop iteration undef.
518*fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
519*fe6060f1SDimitry Andric   const auto *RC = MRI->getRegClass(Reg);
520*fe6060f1SDimitry Andric   Register NewReg = MRI->createVirtualRegister(RC);
521*fe6060f1SDimitry Andric   Register UndefReg = MRI->createVirtualRegister(RC);
522*fe6060f1SDimitry Andric 
523*fe6060f1SDimitry Andric   // Replace all uses in the LOOP region
524*fe6060f1SDimitry Andric   // Use early increment range because setReg() will update the linked list.
525*fe6060f1SDimitry Andric   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
526*fe6060f1SDimitry Andric     auto *UseMI = O.getParent();
527*fe6060f1SDimitry Andric     auto *UseBlock = UseMI->getParent();
528*fe6060f1SDimitry Andric     // Replace uses in Loop block
529*fe6060f1SDimitry Andric     if (UseBlock == Loop)
530*fe6060f1SDimitry Andric       O.setReg(NewReg);
531*fe6060f1SDimitry Andric   }
532*fe6060f1SDimitry Andric 
533*fe6060f1SDimitry Andric   MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(),
534*fe6060f1SDimitry Andric                                     TII->get(TargetOpcode::PHI), NewReg);
535*fe6060f1SDimitry Andric   for (auto *Pred : Loop->predecessors()) {
536*fe6060f1SDimitry Andric     if (Pred == Loop)
537*fe6060f1SDimitry Andric       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
538*fe6060f1SDimitry Andric     else
539*fe6060f1SDimitry Andric       PHI.addReg(Reg).addMBB(Pred);
540*fe6060f1SDimitry Andric   }
541*fe6060f1SDimitry Andric 
542*fe6060f1SDimitry Andric   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
543*fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
544*fe6060f1SDimitry Andric 
545*fe6060f1SDimitry Andric   // collectWaterfallCandidateRegisters only collects registers that are dead
546*fe6060f1SDimitry Andric   // after the loop. So we know that the old reg is not live throughout the
547*fe6060f1SDimitry Andric   // whole block anymore.
548*fe6060f1SDimitry Andric   OldVarInfo.AliveBlocks.reset(Loop->getNumber());
549*fe6060f1SDimitry Andric 
550*fe6060f1SDimitry Andric   // Mark the last use as kill
551*fe6060f1SDimitry Andric   for (auto &MI : reverse(Loop->instrs())) {
552*fe6060f1SDimitry Andric     if (MI.readsRegister(NewReg, TRI)) {
553*fe6060f1SDimitry Andric       MI.addRegisterKilled(NewReg, TRI);
554*fe6060f1SDimitry Andric       NewVarInfo.Kills.push_back(&MI);
555*fe6060f1SDimitry Andric       break;
556*fe6060f1SDimitry Andric     }
557*fe6060f1SDimitry Andric   }
558*fe6060f1SDimitry Andric   assert(!NewVarInfo.Kills.empty() &&
559*fe6060f1SDimitry Andric          "Failed to find last usage of register in loop");
560*fe6060f1SDimitry Andric }
561*fe6060f1SDimitry Andric 
562*fe6060f1SDimitry Andric char SIOptimizeVGPRLiveRange::ID = 0;
563*fe6060f1SDimitry Andric 
564*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
565*fe6060f1SDimitry Andric                       "SI Optimize VGPR LiveRange", false, false)
566*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
567*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
568*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveVariables)
569*fe6060f1SDimitry Andric INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
570*fe6060f1SDimitry Andric                     "SI Optimize VGPR LiveRange", false, false)
571*fe6060f1SDimitry Andric 
572*fe6060f1SDimitry Andric char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
573*fe6060f1SDimitry Andric 
574*fe6060f1SDimitry Andric FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
575*fe6060f1SDimitry Andric   return new SIOptimizeVGPRLiveRange();
576*fe6060f1SDimitry Andric }
577*fe6060f1SDimitry Andric 
578*fe6060f1SDimitry Andric bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
579*fe6060f1SDimitry Andric 
580*fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
581*fe6060f1SDimitry Andric   TII = ST.getInstrInfo();
582*fe6060f1SDimitry Andric   TRI = &TII->getRegisterInfo();
583*fe6060f1SDimitry Andric   MDT = &getAnalysis<MachineDominatorTree>();
584*fe6060f1SDimitry Andric   Loops = &getAnalysis<MachineLoopInfo>();
585*fe6060f1SDimitry Andric   LV = &getAnalysis<LiveVariables>();
586*fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
587*fe6060f1SDimitry Andric 
588*fe6060f1SDimitry Andric   if (skipFunction(MF.getFunction()))
589*fe6060f1SDimitry Andric     return false;
590*fe6060f1SDimitry Andric 
591*fe6060f1SDimitry Andric   bool MadeChange = false;
592*fe6060f1SDimitry Andric 
593*fe6060f1SDimitry Andric   // TODO: we need to think about the order of visiting the blocks to get
594*fe6060f1SDimitry Andric   // optimal result for nesting if-else cases.
595*fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
596*fe6060f1SDimitry Andric     for (auto &MI : MBB.terminators()) {
597*fe6060f1SDimitry Andric       // Detect the if-else blocks
598*fe6060f1SDimitry Andric       if (MI.getOpcode() == AMDGPU::SI_IF) {
599*fe6060f1SDimitry Andric         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
600*fe6060f1SDimitry Andric         auto *Endif = getElseTarget(IfTarget);
601*fe6060f1SDimitry Andric         if (!Endif)
602*fe6060f1SDimitry Andric           continue;
603*fe6060f1SDimitry Andric 
604*fe6060f1SDimitry Andric         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
605*fe6060f1SDimitry Andric         SmallVector<Register> CandidateRegs;
606*fe6060f1SDimitry Andric 
607*fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
608*fe6060f1SDimitry Andric                           << printMBBReference(MBB) << ' '
609*fe6060f1SDimitry Andric                           << printMBBReference(*IfTarget) << ' '
610*fe6060f1SDimitry Andric                           << printMBBReference(*Endif) << '\n');
611*fe6060f1SDimitry Andric 
612*fe6060f1SDimitry Andric         // Collect all the blocks in the ELSE region
613*fe6060f1SDimitry Andric         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
614*fe6060f1SDimitry Andric 
615*fe6060f1SDimitry Andric         // Collect the registers can be optimized
616*fe6060f1SDimitry Andric         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
617*fe6060f1SDimitry Andric                                   CandidateRegs);
618*fe6060f1SDimitry Andric         MadeChange |= !CandidateRegs.empty();
619*fe6060f1SDimitry Andric         // Now we are safe to optimize.
620*fe6060f1SDimitry Andric         for (auto Reg : CandidateRegs)
621*fe6060f1SDimitry Andric           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
622*fe6060f1SDimitry Andric       } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
623*fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
624*fe6060f1SDimitry Andric                           << printMBBReference(MBB) << '\n');
625*fe6060f1SDimitry Andric 
626*fe6060f1SDimitry Andric         SmallSetVector<Register, 16> CandidateRegs;
627*fe6060f1SDimitry Andric         collectWaterfallCandidateRegisters(&MBB, CandidateRegs);
628*fe6060f1SDimitry Andric         MadeChange |= !CandidateRegs.empty();
629*fe6060f1SDimitry Andric         // Now we are safe to optimize.
630*fe6060f1SDimitry Andric         for (auto Reg : CandidateRegs)
631*fe6060f1SDimitry Andric           optimizeWaterfallLiveRange(Reg, &MBB);
632*fe6060f1SDimitry Andric       }
633*fe6060f1SDimitry Andric     }
634*fe6060f1SDimitry Andric   }
635*fe6060f1SDimitry Andric 
636*fe6060f1SDimitry Andric   return MadeChange;
637*fe6060f1SDimitry Andric }
638