xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11fe6060f1SDimitry Andric /// structures and waterfall loops.
12fe6060f1SDimitry Andric ///
13fe6060f1SDimitry Andric /// When we do structurization, we usually transform an if-else into two
14349cc55cSDimitry Andric /// successive if-then (with a flow block to do predicate inversion). Consider a
15fe6060f1SDimitry Andric /// simple case after structurization: A divergent value %a was defined before
16fe6060f1SDimitry Andric /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17fe6060f1SDimitry Andric ///    bb.if:
18fe6060f1SDimitry Andric ///      %a = ...
19fe6060f1SDimitry Andric ///      ...
20fe6060f1SDimitry Andric ///    bb.then:
21fe6060f1SDimitry Andric ///      ... = op %a
22fe6060f1SDimitry Andric ///      ... // %a can be dead here
23fe6060f1SDimitry Andric ///    bb.flow:
24fe6060f1SDimitry Andric ///      ...
25fe6060f1SDimitry Andric ///    bb.else:
26fe6060f1SDimitry Andric ///      ... = %a
27fe6060f1SDimitry Andric ///      ...
28fe6060f1SDimitry Andric ///    bb.endif
29fe6060f1SDimitry Andric ///
30fe6060f1SDimitry Andric ///  As register allocator has no idea of the thread-control-flow, it will just
31fe6060f1SDimitry Andric ///  assume %a would be alive in the whole range of bb.then because of a later
32fe6060f1SDimitry Andric ///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33fe6060f1SDimitry Andric ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34fe6060f1SDimitry Andric ///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35fe6060f1SDimitry Andric ///  after the last use in bb.then until the end of the block. The reason is
36fe6060f1SDimitry Andric ///  the instructions in bb.then will only overwrite lanes that will never be
37fe6060f1SDimitry Andric ///  accessed in bb.else.
38fe6060f1SDimitry Andric ///
39bdd1243dSDimitry Andric ///  This pass aims to tell register allocator that %a is in-fact dead,
40fe6060f1SDimitry Andric ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41fe6060f1SDimitry Andric ///  from bb.then, and then replace the uses in the bb.else with the result of
42fe6060f1SDimitry Andric ///  newly inserted phi.
43fe6060f1SDimitry Andric ///
44fe6060f1SDimitry Andric ///  Two key conditions must be met to ensure correctness:
45fe6060f1SDimitry Andric ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46fe6060f1SDimitry Andric ///      sure the second loop iteration still get correct data.
47fe6060f1SDimitry Andric ///  2.) There should be no further uses after the IF-ELSE region.
48fe6060f1SDimitry Andric ///
49fe6060f1SDimitry Andric ///
50fe6060f1SDimitry Andric /// Waterfall loops get inserted around instructions that use divergent values
51fe6060f1SDimitry Andric /// but can only be executed with a uniform value. For example an indirect call
52fe6060f1SDimitry Andric /// to a divergent address:
53fe6060f1SDimitry Andric ///    bb.start:
54fe6060f1SDimitry Andric ///      %a = ...
55fe6060f1SDimitry Andric ///      %fun = ...
56fe6060f1SDimitry Andric ///      ...
57fe6060f1SDimitry Andric ///    bb.loop:
58fe6060f1SDimitry Andric ///      call %fun (%a)
59fe6060f1SDimitry Andric ///      ... // %a can be dead here
60fe6060f1SDimitry Andric ///      loop %bb.loop
61fe6060f1SDimitry Andric ///
62fe6060f1SDimitry Andric ///  The loop block is executed multiple times, but it is run exactly once for
63fe6060f1SDimitry Andric ///  each active lane. Similar to the if-else case, the register allocator
64fe6060f1SDimitry Andric ///  assumes that %a is live throughout the loop as it is used again in the next
65fe6060f1SDimitry Andric ///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66fe6060f1SDimitry Andric ///  to be live after its last use in the loop block. By inserting a phi-node at
67fe6060f1SDimitry Andric ///  the start of bb.loop that is undef when coming from bb.loop, the register
68fe6060f1SDimitry Andric ///  allocation knows that the value of %a does not need to be preserved through
69fe6060f1SDimitry Andric ///  iterations of the loop.
70fe6060f1SDimitry Andric ///
71fe6060f1SDimitry Andric //
72fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
73fe6060f1SDimitry Andric 
74fe6060f1SDimitry Andric #include "AMDGPU.h"
75fe6060f1SDimitry Andric #include "GCNSubtarget.h"
76fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
78fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveVariables.h"
79fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
80fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
81fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
82fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
83fe6060f1SDimitry Andric 
84fe6060f1SDimitry Andric using namespace llvm;
85fe6060f1SDimitry Andric 
86fe6060f1SDimitry Andric #define DEBUG_TYPE "si-opt-vgpr-liverange"
87fe6060f1SDimitry Andric 
88fe6060f1SDimitry Andric namespace {
89fe6060f1SDimitry Andric 
90fe6060f1SDimitry Andric class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91fe6060f1SDimitry Andric private:
92fe6060f1SDimitry Andric   const SIRegisterInfo *TRI = nullptr;
93fe6060f1SDimitry Andric   const SIInstrInfo *TII = nullptr;
94fe6060f1SDimitry Andric   LiveVariables *LV = nullptr;
95fe6060f1SDimitry Andric   MachineDominatorTree *MDT = nullptr;
96fe6060f1SDimitry Andric   const MachineLoopInfo *Loops = nullptr;
97fe6060f1SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
98fe6060f1SDimitry Andric 
99fe6060f1SDimitry Andric public:
100fe6060f1SDimitry Andric   static char ID;
101fe6060f1SDimitry Andric 
102fe6060f1SDimitry Andric   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103fe6060f1SDimitry Andric 
104fe6060f1SDimitry Andric   void collectElseRegionBlocks(MachineBasicBlock *Flow,
105fe6060f1SDimitry Andric                                MachineBasicBlock *Endif,
106fe6060f1SDimitry Andric                                SmallSetVector<MachineBasicBlock *, 16> &) const;
107fe6060f1SDimitry Andric 
108fe6060f1SDimitry Andric   void
109fe6060f1SDimitry Andric   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110fe6060f1SDimitry Andric                             MachineBasicBlock *Endif,
111fe6060f1SDimitry Andric                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112fe6060f1SDimitry Andric                             SmallVectorImpl<Register> &CandidateRegs) const;
113fe6060f1SDimitry Andric 
114fe6060f1SDimitry Andric   void collectWaterfallCandidateRegisters(
11581ad6265SDimitry Andric       MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
11681ad6265SDimitry Andric       SmallSetVector<Register, 16> &CandidateRegs,
11781ad6265SDimitry Andric       SmallSetVector<MachineBasicBlock *, 2> &Blocks,
11881ad6265SDimitry Andric       SmallVectorImpl<MachineInstr *> &Instructions) const;
119fe6060f1SDimitry Andric 
120fe6060f1SDimitry Andric   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121fe6060f1SDimitry Andric                              SmallVectorImpl<MachineInstr *> &Uses) const;
122fe6060f1SDimitry Andric 
123fe6060f1SDimitry Andric   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124fe6060f1SDimitry Andric                                    MachineBasicBlock *Flow) const;
125fe6060f1SDimitry Andric 
126fe6060f1SDimitry Andric   void updateLiveRangeInElseRegion(
127fe6060f1SDimitry Andric       Register Reg, Register NewReg, MachineBasicBlock *Flow,
128fe6060f1SDimitry Andric       MachineBasicBlock *Endif,
129fe6060f1SDimitry Andric       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130fe6060f1SDimitry Andric 
131fe6060f1SDimitry Andric   void
132fe6060f1SDimitry Andric   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133fe6060f1SDimitry Andric                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134fe6060f1SDimitry Andric                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135fe6060f1SDimitry Andric 
13681ad6265SDimitry Andric   void optimizeWaterfallLiveRange(
13781ad6265SDimitry Andric       Register Reg, MachineBasicBlock *LoopHeader,
13881ad6265SDimitry Andric       SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
13981ad6265SDimitry Andric       SmallVectorImpl<MachineInstr *> &Instructions) const;
140fe6060f1SDimitry Andric 
SIOptimizeVGPRLiveRange()141fe6060f1SDimitry Andric   SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142fe6060f1SDimitry Andric 
143fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
144fe6060f1SDimitry Andric 
getPassName() const145fe6060f1SDimitry Andric   StringRef getPassName() const override {
146fe6060f1SDimitry Andric     return "SI Optimize VGPR LiveRange";
147fe6060f1SDimitry Andric   }
148fe6060f1SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const149fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
150*0fca6ea1SDimitry Andric     AU.addRequired<LiveVariablesWrapperPass>();
151*0fca6ea1SDimitry Andric     AU.addRequired<MachineDominatorTreeWrapperPass>();
152*0fca6ea1SDimitry Andric     AU.addRequired<MachineLoopInfoWrapperPass>();
153*0fca6ea1SDimitry Andric     AU.addPreserved<LiveVariablesWrapperPass>();
154*0fca6ea1SDimitry Andric     AU.addPreserved<MachineDominatorTreeWrapperPass>();
155*0fca6ea1SDimitry Andric     AU.addPreserved<MachineLoopInfoWrapperPass>();
156fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
157fe6060f1SDimitry Andric   }
158fe6060f1SDimitry Andric 
getRequiredProperties() const159fe6060f1SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
160fe6060f1SDimitry Andric     return MachineFunctionProperties().set(
161fe6060f1SDimitry Andric         MachineFunctionProperties::Property::IsSSA);
162fe6060f1SDimitry Andric   }
16304eeddc0SDimitry Andric 
getClearedProperties() const16404eeddc0SDimitry Andric   MachineFunctionProperties getClearedProperties() const override {
16504eeddc0SDimitry Andric     return MachineFunctionProperties().set(
16604eeddc0SDimitry Andric         MachineFunctionProperties::Property::NoPHIs);
16704eeddc0SDimitry Andric   }
168fe6060f1SDimitry Andric };
169fe6060f1SDimitry Andric 
170fe6060f1SDimitry Andric } // end anonymous namespace
171fe6060f1SDimitry Andric 
172fe6060f1SDimitry Andric // Check whether the MBB is a else flow block and get the branching target which
173fe6060f1SDimitry Andric // is the Endif block
174fe6060f1SDimitry Andric MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const175fe6060f1SDimitry Andric SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176fe6060f1SDimitry Andric   for (auto &BR : MBB->terminators()) {
177fe6060f1SDimitry Andric     if (BR.getOpcode() == AMDGPU::SI_ELSE)
178fe6060f1SDimitry Andric       return BR.getOperand(2).getMBB();
179fe6060f1SDimitry Andric   }
180fe6060f1SDimitry Andric   return nullptr;
181fe6060f1SDimitry Andric }
182fe6060f1SDimitry Andric 
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const183fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184fe6060f1SDimitry Andric     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186fe6060f1SDimitry Andric   assert(Flow != Endif);
187fe6060f1SDimitry Andric 
188fe6060f1SDimitry Andric   MachineBasicBlock *MBB = Endif;
189fe6060f1SDimitry Andric   unsigned Cur = 0;
190fe6060f1SDimitry Andric   while (MBB) {
191fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors()) {
192fe6060f1SDimitry Andric       if (Pred != Flow && !Blocks.contains(Pred))
193fe6060f1SDimitry Andric         Blocks.insert(Pred);
194fe6060f1SDimitry Andric     }
195fe6060f1SDimitry Andric 
196fe6060f1SDimitry Andric     if (Cur < Blocks.size())
197fe6060f1SDimitry Andric       MBB = Blocks[Cur++];
198fe6060f1SDimitry Andric     else
199fe6060f1SDimitry Andric       MBB = nullptr;
200fe6060f1SDimitry Andric   }
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric   LLVM_DEBUG({
203fe6060f1SDimitry Andric     dbgs() << "Found Else blocks: ";
204fe6060f1SDimitry Andric     for (auto *MBB : Blocks)
205fe6060f1SDimitry Andric       dbgs() << printMBBReference(*MBB) << ' ';
206fe6060f1SDimitry Andric     dbgs() << '\n';
207fe6060f1SDimitry Andric   });
208fe6060f1SDimitry Andric }
209fe6060f1SDimitry Andric 
210fe6060f1SDimitry Andric /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const211fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *MBB,
213fe6060f1SDimitry Andric     SmallVectorImpl<MachineInstr *> &Uses) const {
214fe6060f1SDimitry Andric   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215fe6060f1SDimitry Andric     if (UseMI.getParent() == MBB && !UseMI.isPHI())
216fe6060f1SDimitry Andric       Uses.push_back(&UseMI);
217fe6060f1SDimitry Andric   }
218fe6060f1SDimitry Andric }
219fe6060f1SDimitry Andric 
220fe6060f1SDimitry Andric /// Collect the killed registers in the ELSE region which are not alive through
221fe6060f1SDimitry Andric /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const222fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223fe6060f1SDimitry Andric     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225fe6060f1SDimitry Andric     SmallVectorImpl<Register> &CandidateRegs) const {
226fe6060f1SDimitry Andric 
227fe6060f1SDimitry Andric   SmallSet<Register, 8> KillsInElse;
228fe6060f1SDimitry Andric 
229fe6060f1SDimitry Andric   for (auto *Else : ElseBlocks) {
230fe6060f1SDimitry Andric     for (auto &MI : Else->instrs()) {
231fe6060f1SDimitry Andric       if (MI.isDebugInstr())
232fe6060f1SDimitry Andric         continue;
233fe6060f1SDimitry Andric 
234fe6060f1SDimitry Andric       for (auto &MO : MI.operands()) {
235fe6060f1SDimitry Andric         if (!MO.isReg() || !MO.getReg() || MO.isDef())
236fe6060f1SDimitry Andric           continue;
237fe6060f1SDimitry Andric 
238fe6060f1SDimitry Andric         Register MOReg = MO.getReg();
239fe6060f1SDimitry Andric         // We can only optimize AGPR/VGPR virtual register
240fe6060f1SDimitry Andric         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241fe6060f1SDimitry Andric           continue;
242fe6060f1SDimitry Andric 
243fe6060f1SDimitry Andric         if (MO.readsReg()) {
244fe6060f1SDimitry Andric           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245fe6060f1SDimitry Andric           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246fe6060f1SDimitry Andric           // Make sure two conditions are met:
247fe6060f1SDimitry Andric           // a.) the value is defined before/in the IF block
248fe6060f1SDimitry Andric           // b.) should be defined in the same loop-level.
249fe6060f1SDimitry Andric           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250fe6060f1SDimitry Andric               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251fe6060f1SDimitry Andric             // Check if the register is live into the endif block. If not,
252fe6060f1SDimitry Andric             // consider it killed in the else region.
253fe6060f1SDimitry Andric             LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254fe6060f1SDimitry Andric             if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255fe6060f1SDimitry Andric               KillsInElse.insert(MOReg);
256fe6060f1SDimitry Andric             } else {
257fe6060f1SDimitry Andric               LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258fe6060f1SDimitry Andric                                 << " as Live in Endif\n");
259fe6060f1SDimitry Andric             }
260fe6060f1SDimitry Andric           }
261fe6060f1SDimitry Andric         }
262fe6060f1SDimitry Andric       }
263fe6060f1SDimitry Andric     }
264fe6060f1SDimitry Andric   }
265fe6060f1SDimitry Andric 
266fe6060f1SDimitry Andric   // Check the phis in the Endif, looking for value coming from the ELSE
267fe6060f1SDimitry Andric   // region. Make sure the phi-use is the last use.
268fe6060f1SDimitry Andric   for (auto &MI : Endif->phis()) {
269fe6060f1SDimitry Andric     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270fe6060f1SDimitry Andric       auto &MO = MI.getOperand(Idx);
271fe6060f1SDimitry Andric       auto *Pred = MI.getOperand(Idx + 1).getMBB();
272fe6060f1SDimitry Andric       if (Pred == Flow)
273fe6060f1SDimitry Andric         continue;
274fe6060f1SDimitry Andric       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275fe6060f1SDimitry Andric 
276fe6060f1SDimitry Andric       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277fe6060f1SDimitry Andric         continue;
278fe6060f1SDimitry Andric 
279fe6060f1SDimitry Andric       Register Reg = MO.getReg();
280fe6060f1SDimitry Andric       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281fe6060f1SDimitry Andric         continue;
282fe6060f1SDimitry Andric 
283fe6060f1SDimitry Andric       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284fe6060f1SDimitry Andric 
285fe6060f1SDimitry Andric       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287fe6060f1SDimitry Andric                           << " as Live in Endif\n");
288fe6060f1SDimitry Andric         continue;
289fe6060f1SDimitry Andric       }
290fe6060f1SDimitry Andric       // Make sure two conditions are met:
291fe6060f1SDimitry Andric       // a.) the value is defined before/in the IF block
292fe6060f1SDimitry Andric       // b.) should be defined in the same loop-level.
293fe6060f1SDimitry Andric       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294fe6060f1SDimitry Andric       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295fe6060f1SDimitry Andric           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296fe6060f1SDimitry Andric         KillsInElse.insert(Reg);
297fe6060f1SDimitry Andric     }
298fe6060f1SDimitry Andric   }
299fe6060f1SDimitry Andric 
300fe6060f1SDimitry Andric   auto IsLiveThroughThen = [&](Register Reg) {
301fe6060f1SDimitry Andric     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302fe6060f1SDimitry Andric          ++I) {
303fe6060f1SDimitry Andric       if (!I->readsReg())
304fe6060f1SDimitry Andric         continue;
305fe6060f1SDimitry Andric       auto *UseMI = I->getParent();
306fe6060f1SDimitry Andric       auto *UseMBB = UseMI->getParent();
307fe6060f1SDimitry Andric       if (UseMBB == Flow || UseMBB == Endif) {
308fe6060f1SDimitry Andric         if (!UseMI->isPHI())
309fe6060f1SDimitry Andric           return true;
310fe6060f1SDimitry Andric 
311fe6060f1SDimitry Andric         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312fe6060f1SDimitry Andric         // The register is live through the path If->Flow or Flow->Endif.
313fe6060f1SDimitry Andric         // we should not optimize for such cases.
314fe6060f1SDimitry Andric         if ((UseMBB == Flow && IncomingMBB != If) ||
315fe6060f1SDimitry Andric             (UseMBB == Endif && IncomingMBB == Flow))
316fe6060f1SDimitry Andric           return true;
317fe6060f1SDimitry Andric       }
318fe6060f1SDimitry Andric     }
319fe6060f1SDimitry Andric     return false;
320fe6060f1SDimitry Andric   };
321fe6060f1SDimitry Andric 
322fe6060f1SDimitry Andric   for (auto Reg : KillsInElse) {
323fe6060f1SDimitry Andric     if (!IsLiveThroughThen(Reg))
324fe6060f1SDimitry Andric       CandidateRegs.push_back(Reg);
325fe6060f1SDimitry Andric   }
326fe6060f1SDimitry Andric }
327fe6060f1SDimitry Andric 
328fe6060f1SDimitry Andric /// Collect the registers used in the waterfall loop block that are defined
329fe6060f1SDimitry Andric /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * LoopHeader,MachineBasicBlock * LoopEnd,SmallSetVector<Register,16> & CandidateRegs,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const330fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
33181ad6265SDimitry Andric     MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
33281ad6265SDimitry Andric     SmallSetVector<Register, 16> &CandidateRegs,
33381ad6265SDimitry Andric     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
33481ad6265SDimitry Andric     SmallVectorImpl<MachineInstr *> &Instructions) const {
335fe6060f1SDimitry Andric 
33681ad6265SDimitry Andric   // Collect loop instructions, potentially spanning multiple blocks
33781ad6265SDimitry Andric   auto *MBB = LoopHeader;
33881ad6265SDimitry Andric   for (;;) {
33981ad6265SDimitry Andric     Blocks.insert(MBB);
34081ad6265SDimitry Andric     for (auto &MI : *MBB) {
341fe6060f1SDimitry Andric       if (MI.isDebugInstr())
342fe6060f1SDimitry Andric         continue;
34381ad6265SDimitry Andric       Instructions.push_back(&MI);
34481ad6265SDimitry Andric     }
34581ad6265SDimitry Andric     if (MBB == LoopEnd)
34681ad6265SDimitry Andric       break;
34781ad6265SDimitry Andric 
34881ad6265SDimitry Andric     if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
34981ad6265SDimitry Andric         (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
35081ad6265SDimitry Andric       LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
35181ad6265SDimitry Andric       return;
35281ad6265SDimitry Andric     }
35381ad6265SDimitry Andric 
35481ad6265SDimitry Andric     MBB = *MBB->succ_begin();
35581ad6265SDimitry Andric   }
35681ad6265SDimitry Andric 
35781ad6265SDimitry Andric   for (auto *I : Instructions) {
35881ad6265SDimitry Andric     auto &MI = *I;
359fe6060f1SDimitry Andric 
36006c3fb27SDimitry Andric     for (auto &MO : MI.all_uses()) {
36106c3fb27SDimitry Andric       if (!MO.getReg())
362fe6060f1SDimitry Andric         continue;
363fe6060f1SDimitry Andric 
364fe6060f1SDimitry Andric       Register MOReg = MO.getReg();
365fe6060f1SDimitry Andric       // We can only optimize AGPR/VGPR virtual register
366fe6060f1SDimitry Andric       if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367fe6060f1SDimitry Andric         continue;
368fe6060f1SDimitry Andric 
369fe6060f1SDimitry Andric       if (MO.readsReg()) {
37081ad6265SDimitry Andric         MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371fe6060f1SDimitry Andric         // Make sure the value is defined before the LOOP block
37281ad6265SDimitry Andric         if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373fe6060f1SDimitry Andric           // If the variable is used after the loop, the register coalescer will
374fe6060f1SDimitry Andric           // merge the newly created register and remove the phi node again.
375fe6060f1SDimitry Andric           // Just do nothing in that case.
376fe6060f1SDimitry Andric           LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377fe6060f1SDimitry Andric           bool IsUsed = false;
37881ad6265SDimitry Andric           for (auto *Succ : LoopEnd->successors()) {
37981ad6265SDimitry Andric             if (!Blocks.contains(Succ) &&
38081ad6265SDimitry Andric                 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381fe6060f1SDimitry Andric               IsUsed = true;
382fe6060f1SDimitry Andric               break;
383fe6060f1SDimitry Andric             }
384fe6060f1SDimitry Andric           }
385fe6060f1SDimitry Andric           if (!IsUsed) {
386fe6060f1SDimitry Andric             LLVM_DEBUG(dbgs() << "Found candidate reg: "
387fe6060f1SDimitry Andric                               << printReg(MOReg, TRI, 0, MRI) << '\n');
388fe6060f1SDimitry Andric             CandidateRegs.insert(MOReg);
389fe6060f1SDimitry Andric           } else {
390fe6060f1SDimitry Andric             LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391fe6060f1SDimitry Andric                               << printReg(MOReg, TRI, 0, MRI) << '\n');
392fe6060f1SDimitry Andric           }
393fe6060f1SDimitry Andric         }
394fe6060f1SDimitry Andric       }
395fe6060f1SDimitry Andric     }
396fe6060f1SDimitry Andric   }
397fe6060f1SDimitry Andric }
398fe6060f1SDimitry Andric 
399fe6060f1SDimitry Andric // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const400fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
40204eeddc0SDimitry Andric   SetVector<MachineBasicBlock *> Blocks;
40304eeddc0SDimitry Andric   SmallVector<MachineBasicBlock *> WorkList({If});
404fe6060f1SDimitry Andric 
40504eeddc0SDimitry Andric   // Collect all successors until we see the flow block, where we should
40604eeddc0SDimitry Andric   // reconverge.
40704eeddc0SDimitry Andric   while (!WorkList.empty()) {
40804eeddc0SDimitry Andric     auto *MBB = WorkList.pop_back_val();
40904eeddc0SDimitry Andric     for (auto *Succ : MBB->successors()) {
41004eeddc0SDimitry Andric       if (Succ != Flow && !Blocks.contains(Succ)) {
41104eeddc0SDimitry Andric         WorkList.push_back(Succ);
41204eeddc0SDimitry Andric         Blocks.insert(Succ);
413fe6060f1SDimitry Andric       }
414fe6060f1SDimitry Andric     }
41504eeddc0SDimitry Andric   }
416fe6060f1SDimitry Andric 
417fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
41804eeddc0SDimitry Andric   for (MachineBasicBlock *MBB : Blocks) {
419fe6060f1SDimitry Andric     // Clear Live bit, as we will recalculate afterwards
420fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421fe6060f1SDimitry Andric                       << '\n');
422fe6060f1SDimitry Andric     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423fe6060f1SDimitry Andric   }
424fe6060f1SDimitry Andric 
42504eeddc0SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
42604eeddc0SDimitry Andric 
427fe6060f1SDimitry Andric   // Get the blocks the Reg should be alive through
428fe6060f1SDimitry Andric   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429fe6060f1SDimitry Andric        ++I) {
430fe6060f1SDimitry Andric     auto *UseMI = I->getParent();
431fe6060f1SDimitry Andric     if (UseMI->isPHI() && I->readsReg()) {
43204eeddc0SDimitry Andric       if (Blocks.contains(UseMI->getParent()))
433fe6060f1SDimitry Andric         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434fe6060f1SDimitry Andric     }
435fe6060f1SDimitry Andric   }
436fe6060f1SDimitry Andric 
43704eeddc0SDimitry Andric   for (MachineBasicBlock *MBB : Blocks) {
438fe6060f1SDimitry Andric     SmallVector<MachineInstr *> Uses;
439fe6060f1SDimitry Andric     // PHI instructions has been processed before.
440fe6060f1SDimitry Andric     findNonPHIUsesInBlock(Reg, MBB, Uses);
441fe6060f1SDimitry Andric 
442fe6060f1SDimitry Andric     if (Uses.size() == 1) {
443fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444fe6060f1SDimitry Andric                         << printMBBReference(*MBB) << '\n');
445fe6060f1SDimitry Andric       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446fe6060f1SDimitry Andric     } else if (Uses.size() > 1) {
447fe6060f1SDimitry Andric       // Process the instructions in-order
448fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449fe6060f1SDimitry Andric                         << printMBBReference(*MBB) << '\n');
450fe6060f1SDimitry Andric       for (MachineInstr &MI : *MBB) {
451fe6060f1SDimitry Andric         if (llvm::is_contained(Uses, &MI))
452fe6060f1SDimitry Andric           LV->HandleVirtRegUse(Reg, MBB, MI);
453fe6060f1SDimitry Andric       }
454fe6060f1SDimitry Andric     }
455fe6060f1SDimitry Andric 
456fe6060f1SDimitry Andric     // Mark Reg alive through the block if this is a PHI incoming block
457fe6060f1SDimitry Andric     if (PHIIncoming.contains(MBB))
458fe6060f1SDimitry Andric       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459fe6060f1SDimitry Andric                                   MBB);
460fe6060f1SDimitry Andric   }
461fe6060f1SDimitry Andric 
462fe6060f1SDimitry Andric   // Set the isKilled flag if we get new Kills in the THEN region.
463fe6060f1SDimitry Andric   for (auto *MI : OldVarInfo.Kills) {
46404eeddc0SDimitry Andric     if (Blocks.contains(MI->getParent()))
465fe6060f1SDimitry Andric       MI->addRegisterKilled(Reg, TRI);
466fe6060f1SDimitry Andric   }
467fe6060f1SDimitry Andric }
468fe6060f1SDimitry Andric 
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const469fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470fe6060f1SDimitry Andric     Register Reg, Register NewReg, MachineBasicBlock *Flow,
471fe6060f1SDimitry Andric     MachineBasicBlock *Endif,
472fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473fe6060f1SDimitry Andric   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475fe6060f1SDimitry Andric 
476fe6060f1SDimitry Andric   // Transfer aliveBlocks from Reg to NewReg
477fe6060f1SDimitry Andric   for (auto *MBB : ElseBlocks) {
478fe6060f1SDimitry Andric     unsigned BBNum = MBB->getNumber();
479fe6060f1SDimitry Andric     if (OldVarInfo.AliveBlocks.test(BBNum)) {
480fe6060f1SDimitry Andric       NewVarInfo.AliveBlocks.set(BBNum);
481fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482fe6060f1SDimitry Andric                         << '\n');
483fe6060f1SDimitry Andric       OldVarInfo.AliveBlocks.reset(BBNum);
484fe6060f1SDimitry Andric     }
485fe6060f1SDimitry Andric   }
486fe6060f1SDimitry Andric 
487fe6060f1SDimitry Andric   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488fe6060f1SDimitry Andric   auto I = OldVarInfo.Kills.begin();
489fe6060f1SDimitry Andric   while (I != OldVarInfo.Kills.end()) {
490fe6060f1SDimitry Andric     if (ElseBlocks.contains((*I)->getParent())) {
491fe6060f1SDimitry Andric       NewVarInfo.Kills.push_back(*I);
492fe6060f1SDimitry Andric       I = OldVarInfo.Kills.erase(I);
493fe6060f1SDimitry Andric     } else {
494fe6060f1SDimitry Andric       ++I;
495fe6060f1SDimitry Andric     }
496fe6060f1SDimitry Andric   }
497fe6060f1SDimitry Andric }
498fe6060f1SDimitry Andric 
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const499fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500fe6060f1SDimitry Andric     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501fe6060f1SDimitry Andric     MachineBasicBlock *Endif,
502fe6060f1SDimitry Andric     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503fe6060f1SDimitry Andric   // Insert a new PHI, marking the value from the THEN region being
504fe6060f1SDimitry Andric   // undef.
505fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506fe6060f1SDimitry Andric   const auto *RC = MRI->getRegClass(Reg);
507fe6060f1SDimitry Andric   Register NewReg = MRI->createVirtualRegister(RC);
508fe6060f1SDimitry Andric   Register UndefReg = MRI->createVirtualRegister(RC);
509fe6060f1SDimitry Andric   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510fe6060f1SDimitry Andric                                     TII->get(TargetOpcode::PHI), NewReg);
511fe6060f1SDimitry Andric   for (auto *Pred : Flow->predecessors()) {
512fe6060f1SDimitry Andric     if (Pred == If)
513fe6060f1SDimitry Andric       PHI.addReg(Reg).addMBB(Pred);
514fe6060f1SDimitry Andric     else
515fe6060f1SDimitry Andric       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516fe6060f1SDimitry Andric   }
517fe6060f1SDimitry Andric 
518fe6060f1SDimitry Andric   // Replace all uses in the ELSE region or the PHIs in ENDIF block
519fe6060f1SDimitry Andric   // Use early increment range because setReg() will update the linked list.
520fe6060f1SDimitry Andric   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521fe6060f1SDimitry Andric     auto *UseMI = O.getParent();
522fe6060f1SDimitry Andric     auto *UseBlock = UseMI->getParent();
523fe6060f1SDimitry Andric     // Replace uses in Endif block
524fe6060f1SDimitry Andric     if (UseBlock == Endif) {
5255f757f3fSDimitry Andric       if (UseMI->isPHI())
526fe6060f1SDimitry Andric         O.setReg(NewReg);
5275f757f3fSDimitry Andric       else if (UseMI->isDebugInstr())
5285f757f3fSDimitry Andric         continue;
5295f757f3fSDimitry Andric       else {
53006c3fb27SDimitry Andric         // DetectDeadLanes may mark register uses as undef without removing
53106c3fb27SDimitry Andric         // them, in which case a non-phi instruction using the original register
53206c3fb27SDimitry Andric         // may exist in the Endif block even though the register is not live
53306c3fb27SDimitry Andric         // into it.
53406c3fb27SDimitry Andric         assert(!O.readsReg());
53506c3fb27SDimitry Andric       }
536fe6060f1SDimitry Andric       continue;
537fe6060f1SDimitry Andric     }
538fe6060f1SDimitry Andric 
539fe6060f1SDimitry Andric     // Replace uses in Else region
540fe6060f1SDimitry Andric     if (ElseBlocks.contains(UseBlock))
541fe6060f1SDimitry Andric       O.setReg(NewReg);
542fe6060f1SDimitry Andric   }
543fe6060f1SDimitry Andric 
544fe6060f1SDimitry Andric   // The optimized Reg is not alive through Flow blocks anymore.
545fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
546fe6060f1SDimitry Andric   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
547fe6060f1SDimitry Andric 
548fe6060f1SDimitry Andric   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
549fe6060f1SDimitry Andric   updateLiveRangeInThenRegion(Reg, If, Flow);
550fe6060f1SDimitry Andric }
551fe6060f1SDimitry Andric 
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * LoopHeader,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const552fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
55381ad6265SDimitry Andric     Register Reg, MachineBasicBlock *LoopHeader,
55481ad6265SDimitry Andric     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
55581ad6265SDimitry Andric     SmallVectorImpl<MachineInstr *> &Instructions) const {
556fe6060f1SDimitry Andric   // Insert a new PHI, marking the value from the last loop iteration undef.
557fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
558fe6060f1SDimitry Andric   const auto *RC = MRI->getRegClass(Reg);
559fe6060f1SDimitry Andric   Register NewReg = MRI->createVirtualRegister(RC);
560fe6060f1SDimitry Andric   Register UndefReg = MRI->createVirtualRegister(RC);
561fe6060f1SDimitry Andric 
562fe6060f1SDimitry Andric   // Replace all uses in the LOOP region
563fe6060f1SDimitry Andric   // Use early increment range because setReg() will update the linked list.
564fe6060f1SDimitry Andric   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
565fe6060f1SDimitry Andric     auto *UseMI = O.getParent();
566fe6060f1SDimitry Andric     auto *UseBlock = UseMI->getParent();
56781ad6265SDimitry Andric     // Replace uses in Loop blocks
56881ad6265SDimitry Andric     if (Blocks.contains(UseBlock))
569fe6060f1SDimitry Andric       O.setReg(NewReg);
570fe6060f1SDimitry Andric   }
571fe6060f1SDimitry Andric 
57281ad6265SDimitry Andric   MachineInstrBuilder PHI =
57381ad6265SDimitry Andric       BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
574fe6060f1SDimitry Andric               TII->get(TargetOpcode::PHI), NewReg);
57581ad6265SDimitry Andric   for (auto *Pred : LoopHeader->predecessors()) {
57681ad6265SDimitry Andric     if (Blocks.contains(Pred))
577fe6060f1SDimitry Andric       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
578fe6060f1SDimitry Andric     else
579fe6060f1SDimitry Andric       PHI.addReg(Reg).addMBB(Pred);
580fe6060f1SDimitry Andric   }
581fe6060f1SDimitry Andric 
582fe6060f1SDimitry Andric   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
583fe6060f1SDimitry Andric   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
584fe6060f1SDimitry Andric 
58581ad6265SDimitry Andric   // Find last use and mark as kill
58681ad6265SDimitry Andric   MachineInstr *Kill = nullptr;
58781ad6265SDimitry Andric   for (auto *MI : reverse(Instructions)) {
58881ad6265SDimitry Andric     if (MI->readsRegister(NewReg, TRI)) {
58981ad6265SDimitry Andric       MI->addRegisterKilled(NewReg, TRI);
59081ad6265SDimitry Andric       NewVarInfo.Kills.push_back(MI);
59181ad6265SDimitry Andric       Kill = MI;
592fe6060f1SDimitry Andric       break;
593fe6060f1SDimitry Andric     }
594fe6060f1SDimitry Andric   }
59581ad6265SDimitry Andric   assert(Kill && "Failed to find last usage of register in loop");
59681ad6265SDimitry Andric 
59781ad6265SDimitry Andric   MachineBasicBlock *KillBlock = Kill->getParent();
59881ad6265SDimitry Andric   bool PostKillBlock = false;
59981ad6265SDimitry Andric   for (auto *Block : Blocks) {
60081ad6265SDimitry Andric     auto BBNum = Block->getNumber();
60181ad6265SDimitry Andric 
60281ad6265SDimitry Andric     // collectWaterfallCandidateRegisters only collects registers that are dead
60381ad6265SDimitry Andric     // after the loop. So we know that the old reg is no longer live throughout
60481ad6265SDimitry Andric     // the waterfall loop.
60581ad6265SDimitry Andric     OldVarInfo.AliveBlocks.reset(BBNum);
60681ad6265SDimitry Andric 
60781ad6265SDimitry Andric     // The new register is live up to (and including) the block that kills it.
60881ad6265SDimitry Andric     PostKillBlock |= (Block == KillBlock);
60981ad6265SDimitry Andric     if (PostKillBlock) {
61081ad6265SDimitry Andric       NewVarInfo.AliveBlocks.reset(BBNum);
61181ad6265SDimitry Andric     } else if (Block != LoopHeader) {
61281ad6265SDimitry Andric       NewVarInfo.AliveBlocks.set(BBNum);
61381ad6265SDimitry Andric     }
61481ad6265SDimitry Andric   }
615fe6060f1SDimitry Andric }
616fe6060f1SDimitry Andric 
617fe6060f1SDimitry Andric char SIOptimizeVGPRLiveRange::ID = 0;
618fe6060f1SDimitry Andric 
619fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
620fe6060f1SDimitry Andric                       "SI Optimize VGPR LiveRange", false, false)
621*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
622*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
623*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
624fe6060f1SDimitry Andric INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
625fe6060f1SDimitry Andric                     "SI Optimize VGPR LiveRange", false, false)
626fe6060f1SDimitry Andric 
627fe6060f1SDimitry Andric char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
628fe6060f1SDimitry Andric 
createSIOptimizeVGPRLiveRangePass()629fe6060f1SDimitry Andric FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
630fe6060f1SDimitry Andric   return new SIOptimizeVGPRLiveRange();
631fe6060f1SDimitry Andric }
632fe6060f1SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)633fe6060f1SDimitry Andric bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
634fe6060f1SDimitry Andric 
635fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
636fe6060f1SDimitry Andric   TII = ST.getInstrInfo();
637fe6060f1SDimitry Andric   TRI = &TII->getRegisterInfo();
638*0fca6ea1SDimitry Andric   MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
639*0fca6ea1SDimitry Andric   Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
640*0fca6ea1SDimitry Andric   LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
641fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
642fe6060f1SDimitry Andric 
643fe6060f1SDimitry Andric   if (skipFunction(MF.getFunction()))
644fe6060f1SDimitry Andric     return false;
645fe6060f1SDimitry Andric 
646fe6060f1SDimitry Andric   bool MadeChange = false;
647fe6060f1SDimitry Andric 
648fe6060f1SDimitry Andric   // TODO: we need to think about the order of visiting the blocks to get
649fe6060f1SDimitry Andric   // optimal result for nesting if-else cases.
650fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
651fe6060f1SDimitry Andric     for (auto &MI : MBB.terminators()) {
652fe6060f1SDimitry Andric       // Detect the if-else blocks
653fe6060f1SDimitry Andric       if (MI.getOpcode() == AMDGPU::SI_IF) {
654fe6060f1SDimitry Andric         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
655fe6060f1SDimitry Andric         auto *Endif = getElseTarget(IfTarget);
656fe6060f1SDimitry Andric         if (!Endif)
657fe6060f1SDimitry Andric           continue;
658fe6060f1SDimitry Andric 
65981ad6265SDimitry Andric         // Skip unexpected control flow.
66081ad6265SDimitry Andric         if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
66181ad6265SDimitry Andric           continue;
66281ad6265SDimitry Andric 
663fe6060f1SDimitry Andric         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
664fe6060f1SDimitry Andric         SmallVector<Register> CandidateRegs;
665fe6060f1SDimitry Andric 
666fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
667fe6060f1SDimitry Andric                           << printMBBReference(MBB) << ' '
668fe6060f1SDimitry Andric                           << printMBBReference(*IfTarget) << ' '
669fe6060f1SDimitry Andric                           << printMBBReference(*Endif) << '\n');
670fe6060f1SDimitry Andric 
671fe6060f1SDimitry Andric         // Collect all the blocks in the ELSE region
672fe6060f1SDimitry Andric         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
673fe6060f1SDimitry Andric 
674fe6060f1SDimitry Andric         // Collect the registers can be optimized
675fe6060f1SDimitry Andric         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
676fe6060f1SDimitry Andric                                   CandidateRegs);
677fe6060f1SDimitry Andric         MadeChange |= !CandidateRegs.empty();
678fe6060f1SDimitry Andric         // Now we are safe to optimize.
679fe6060f1SDimitry Andric         for (auto Reg : CandidateRegs)
680fe6060f1SDimitry Andric           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
681fe6060f1SDimitry Andric       } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
68281ad6265SDimitry Andric         auto *LoopHeader = MI.getOperand(0).getMBB();
68381ad6265SDimitry Andric         auto *LoopEnd = &MBB;
68481ad6265SDimitry Andric 
685fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
68681ad6265SDimitry Andric                           << printMBBReference(*LoopHeader) << '\n');
687fe6060f1SDimitry Andric 
688fe6060f1SDimitry Andric         SmallSetVector<Register, 16> CandidateRegs;
68981ad6265SDimitry Andric         SmallVector<MachineInstr *, 16> Instructions;
69081ad6265SDimitry Andric         SmallSetVector<MachineBasicBlock *, 2> Blocks;
69181ad6265SDimitry Andric 
69281ad6265SDimitry Andric         collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
69381ad6265SDimitry Andric                                            Blocks, Instructions);
694fe6060f1SDimitry Andric         MadeChange |= !CandidateRegs.empty();
695fe6060f1SDimitry Andric         // Now we are safe to optimize.
696fe6060f1SDimitry Andric         for (auto Reg : CandidateRegs)
69781ad6265SDimitry Andric           optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
698fe6060f1SDimitry Andric       }
699fe6060f1SDimitry Andric     }
700fe6060f1SDimitry Andric   }
701fe6060f1SDimitry Andric 
702fe6060f1SDimitry Andric   return MadeChange;
703fe6060f1SDimitry Andric }
704