xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1 //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11 /// structures and waterfall loops.
12 ///
13 /// When we do structurization, we usually transform an if-else into two
14 /// sucessive if-then (with a flow block to do predicate inversion). Consider a
15 /// simple case after structurization: A divergent value %a was defined before
16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17 ///    bb.if:
18 ///      %a = ...
19 ///      ...
20 ///    bb.then:
21 ///      ... = op %a
22 ///      ... // %a can be dead here
23 ///    bb.flow:
24 ///      ...
25 ///    bb.else:
26 ///      ... = %a
27 ///      ...
28 ///    bb.endif
29 ///
30 ///  As register allocator has no idea of the thread-control-flow, it will just
31 ///  assume %a would be alive in the whole range of bb.then because of a later
32 ///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33 ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34 ///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35 ///  after the last use in bb.then until the end of the block. The reason is
36 ///  the instructions in bb.then will only overwrite lanes that will never be
37 ///  accessed in bb.else.
38 ///
39 ///  This pass aims to to tell register allocator that %a is in-fact dead,
40 ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41 ///  from bb.then, and then replace the uses in the bb.else with the result of
42 ///  newly inserted phi.
43 ///
44 ///  Two key conditions must be met to ensure correctness:
45 ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46 ///      sure the second loop iteration still get correct data.
47 ///  2.) There should be no further uses after the IF-ELSE region.
48 ///
49 ///
50 /// Waterfall loops get inserted around instructions that use divergent values
51 /// but can only be executed with a uniform value. For example an indirect call
52 /// to a divergent address:
53 ///    bb.start:
54 ///      %a = ...
55 ///      %fun = ...
56 ///      ...
57 ///    bb.loop:
58 ///      call %fun (%a)
59 ///      ... // %a can be dead here
60 ///      loop %bb.loop
61 ///
62 ///  The loop block is executed multiple times, but it is run exactly once for
63 ///  each active lane. Similar to the if-else case, the register allocator
64 ///  assumes that %a is live throughout the loop as it is used again in the next
65 ///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66 ///  to be live after its last use in the loop block. By inserting a phi-node at
67 ///  the start of bb.loop that is undef when coming from bb.loop, the register
68 ///  allocation knows that the value of %a does not need to be preserved through
69 ///  iterations of the loop.
70 ///
71 //
72 //===----------------------------------------------------------------------===//
73 
74 #include "AMDGPU.h"
75 #include "GCNSubtarget.h"
76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77 #include "SIMachineFunctionInfo.h"
78 #include "llvm/CodeGen/LiveVariables.h"
79 #include "llvm/CodeGen/MachineDominators.h"
80 #include "llvm/CodeGen/MachineLoopInfo.h"
81 #include "llvm/CodeGen/TargetRegisterInfo.h"
82 #include "llvm/InitializePasses.h"
83 
84 using namespace llvm;
85 
86 #define DEBUG_TYPE "si-opt-vgpr-liverange"
87 
88 namespace {
89 
90 class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91 private:
92   const SIRegisterInfo *TRI = nullptr;
93   const SIInstrInfo *TII = nullptr;
94   LiveVariables *LV = nullptr;
95   MachineDominatorTree *MDT = nullptr;
96   const MachineLoopInfo *Loops = nullptr;
97   MachineRegisterInfo *MRI = nullptr;
98 
99 public:
100   static char ID;
101 
102   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103 
104   void collectElseRegionBlocks(MachineBasicBlock *Flow,
105                                MachineBasicBlock *Endif,
106                                SmallSetVector<MachineBasicBlock *, 16> &) const;
107 
108   void
109   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110                             MachineBasicBlock *Endif,
111                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112                             SmallVectorImpl<Register> &CandidateRegs) const;
113 
114   void collectWaterfallCandidateRegisters(
115       MachineBasicBlock *Loop,
116       SmallSetVector<Register, 16> &CandidateRegs) const;
117 
118   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
119                              SmallVectorImpl<MachineInstr *> &Uses) const;
120 
121   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
122                                    MachineBasicBlock *Flow) const;
123 
124   void updateLiveRangeInElseRegion(
125       Register Reg, Register NewReg, MachineBasicBlock *Flow,
126       MachineBasicBlock *Endif,
127       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
128 
129   void
130   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
131                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
132                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
133 
134   void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const;
135 
136   SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
137 
138   bool runOnMachineFunction(MachineFunction &MF) override;
139 
140   StringRef getPassName() const override {
141     return "SI Optimize VGPR LiveRange";
142   }
143 
144   void getAnalysisUsage(AnalysisUsage &AU) const override {
145     AU.addRequired<LiveVariables>();
146     AU.addRequired<MachineDominatorTree>();
147     AU.addRequired<MachineLoopInfo>();
148     AU.addPreserved<LiveVariables>();
149     AU.addPreserved<MachineDominatorTree>();
150     AU.addPreserved<MachineLoopInfo>();
151     MachineFunctionPass::getAnalysisUsage(AU);
152   }
153 
154   MachineFunctionProperties getRequiredProperties() const override {
155     return MachineFunctionProperties().set(
156         MachineFunctionProperties::Property::IsSSA);
157   }
158 };
159 
160 } // end anonymous namespace
161 
162 // Check whether the MBB is a else flow block and get the branching target which
163 // is the Endif block
164 MachineBasicBlock *
165 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
166   for (auto &BR : MBB->terminators()) {
167     if (BR.getOpcode() == AMDGPU::SI_ELSE)
168       return BR.getOperand(2).getMBB();
169   }
170   return nullptr;
171 }
172 
173 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
174     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
175     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
176   assert(Flow != Endif);
177 
178   MachineBasicBlock *MBB = Endif;
179   unsigned Cur = 0;
180   while (MBB) {
181     for (auto *Pred : MBB->predecessors()) {
182       if (Pred != Flow && !Blocks.contains(Pred))
183         Blocks.insert(Pred);
184     }
185 
186     if (Cur < Blocks.size())
187       MBB = Blocks[Cur++];
188     else
189       MBB = nullptr;
190   }
191 
192   LLVM_DEBUG({
193     dbgs() << "Found Else blocks: ";
194     for (auto *MBB : Blocks)
195       dbgs() << printMBBReference(*MBB) << ' ';
196     dbgs() << '\n';
197   });
198 }
199 
200 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
201 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
202     Register Reg, MachineBasicBlock *MBB,
203     SmallVectorImpl<MachineInstr *> &Uses) const {
204   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
205     if (UseMI.getParent() == MBB && !UseMI.isPHI())
206       Uses.push_back(&UseMI);
207   }
208 }
209 
210 /// Collect the killed registers in the ELSE region which are not alive through
211 /// the whole THEN region.
212 void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
213     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
214     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
215     SmallVectorImpl<Register> &CandidateRegs) const {
216 
217   SmallSet<Register, 8> KillsInElse;
218 
219   for (auto *Else : ElseBlocks) {
220     for (auto &MI : Else->instrs()) {
221       if (MI.isDebugInstr())
222         continue;
223 
224       for (auto &MO : MI.operands()) {
225         if (!MO.isReg() || !MO.getReg() || MO.isDef())
226           continue;
227 
228         Register MOReg = MO.getReg();
229         // We can only optimize AGPR/VGPR virtual register
230         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
231           continue;
232 
233         if (MO.readsReg()) {
234           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
235           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
236           // Make sure two conditions are met:
237           // a.) the value is defined before/in the IF block
238           // b.) should be defined in the same loop-level.
239           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
240               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
241             // Check if the register is live into the endif block. If not,
242             // consider it killed in the else region.
243             LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
244             if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
245               KillsInElse.insert(MOReg);
246             } else {
247               LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
248                                 << " as Live in Endif\n");
249             }
250           }
251         }
252       }
253     }
254   }
255 
256   // Check the phis in the Endif, looking for value coming from the ELSE
257   // region. Make sure the phi-use is the last use.
258   for (auto &MI : Endif->phis()) {
259     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
260       auto &MO = MI.getOperand(Idx);
261       auto *Pred = MI.getOperand(Idx + 1).getMBB();
262       if (Pred == Flow)
263         continue;
264       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
265 
266       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
267         continue;
268 
269       Register Reg = MO.getReg();
270       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
271         continue;
272 
273       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
274 
275       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
276         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
277                           << " as Live in Endif\n");
278         continue;
279       }
280       // Make sure two conditions are met:
281       // a.) the value is defined before/in the IF block
282       // b.) should be defined in the same loop-level.
283       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
284       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
285           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
286         KillsInElse.insert(Reg);
287     }
288   }
289 
290   auto IsLiveThroughThen = [&](Register Reg) {
291     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
292          ++I) {
293       if (!I->readsReg())
294         continue;
295       auto *UseMI = I->getParent();
296       auto *UseMBB = UseMI->getParent();
297       if (UseMBB == Flow || UseMBB == Endif) {
298         if (!UseMI->isPHI())
299           return true;
300 
301         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
302         // The register is live through the path If->Flow or Flow->Endif.
303         // we should not optimize for such cases.
304         if ((UseMBB == Flow && IncomingMBB != If) ||
305             (UseMBB == Endif && IncomingMBB == Flow))
306           return true;
307       }
308     }
309     return false;
310   };
311 
312   for (auto Reg : KillsInElse) {
313     if (!IsLiveThroughThen(Reg))
314       CandidateRegs.push_back(Reg);
315   }
316 }
317 
318 /// Collect the registers used in the waterfall loop block that are defined
319 /// before.
320 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
321     MachineBasicBlock *Loop,
322     SmallSetVector<Register, 16> &CandidateRegs) const {
323 
324   for (auto &MI : Loop->instrs()) {
325     if (MI.isDebugInstr())
326       continue;
327 
328     for (auto &MO : MI.operands()) {
329       if (!MO.isReg() || !MO.getReg() || MO.isDef())
330         continue;
331 
332       Register MOReg = MO.getReg();
333       // We can only optimize AGPR/VGPR virtual register
334       if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
335         continue;
336 
337       if (MO.readsReg()) {
338         const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
339         // Make sure the value is defined before the LOOP block
340         if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) {
341           // If the variable is used after the loop, the register coalescer will
342           // merge the newly created register and remove the phi node again.
343           // Just do nothing in that case.
344           LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
345           bool IsUsed = false;
346           for (auto *Succ : Loop->successors()) {
347             if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
348               IsUsed = true;
349               break;
350             }
351           }
352           if (!IsUsed) {
353             LLVM_DEBUG(dbgs() << "Found candidate reg: "
354                               << printReg(MOReg, TRI, 0, MRI) << '\n');
355             CandidateRegs.insert(MOReg);
356           } else {
357             LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
358                               << printReg(MOReg, TRI, 0, MRI) << '\n');
359           }
360         }
361       }
362     }
363   }
364 }
365 
366 // Re-calculate the liveness of \p Reg in the THEN-region
367 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
368     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
369 
370   SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
371 
372   MachineBasicBlock *ThenEntry = nullptr;
373   for (auto *Succ : If->successors()) {
374     if (Succ != Flow) {
375       ThenEntry = Succ;
376       break;
377     }
378   }
379   assert(ThenEntry && "No successor in Then region?");
380 
381   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
382   df_iterator_default_set<MachineBasicBlock *, 16> Visited;
383 
384   for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
385     if (MBB == Flow)
386       break;
387 
388     // Clear Live bit, as we will recalculate afterwards
389     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
390                       << '\n');
391     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
392   }
393 
394   // Get the blocks the Reg should be alive through
395   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
396        ++I) {
397     auto *UseMI = I->getParent();
398     if (UseMI->isPHI() && I->readsReg()) {
399       if (Visited.contains(UseMI->getParent()))
400         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
401     }
402   }
403 
404   Visited.clear();
405 
406   for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
407     if (MBB == Flow)
408       break;
409 
410     SmallVector<MachineInstr *> Uses;
411     // PHI instructions has been processed before.
412     findNonPHIUsesInBlock(Reg, MBB, Uses);
413 
414     if (Uses.size() == 1) {
415       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
416                         << printMBBReference(*MBB) << '\n');
417       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
418     } else if (Uses.size() > 1) {
419       // Process the instructions in-order
420       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
421                         << printMBBReference(*MBB) << '\n');
422       for (MachineInstr &MI : *MBB) {
423         if (llvm::is_contained(Uses, &MI))
424           LV->HandleVirtRegUse(Reg, MBB, MI);
425       }
426     }
427 
428     // Mark Reg alive through the block if this is a PHI incoming block
429     if (PHIIncoming.contains(MBB))
430       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
431                                   MBB);
432   }
433 
434   // Set the isKilled flag if we get new Kills in the THEN region.
435   for (auto *MI : OldVarInfo.Kills) {
436     if (Visited.contains(MI->getParent()))
437       MI->addRegisterKilled(Reg, TRI);
438   }
439 }
440 
441 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
442     Register Reg, Register NewReg, MachineBasicBlock *Flow,
443     MachineBasicBlock *Endif,
444     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
445   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
446   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
447 
448   // Transfer aliveBlocks from Reg to NewReg
449   for (auto *MBB : ElseBlocks) {
450     unsigned BBNum = MBB->getNumber();
451     if (OldVarInfo.AliveBlocks.test(BBNum)) {
452       NewVarInfo.AliveBlocks.set(BBNum);
453       LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
454                         << '\n');
455       OldVarInfo.AliveBlocks.reset(BBNum);
456     }
457   }
458 
459   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
460   auto I = OldVarInfo.Kills.begin();
461   while (I != OldVarInfo.Kills.end()) {
462     if (ElseBlocks.contains((*I)->getParent())) {
463       NewVarInfo.Kills.push_back(*I);
464       I = OldVarInfo.Kills.erase(I);
465     } else {
466       ++I;
467     }
468   }
469 }
470 
471 void SIOptimizeVGPRLiveRange::optimizeLiveRange(
472     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
473     MachineBasicBlock *Endif,
474     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
475   // Insert a new PHI, marking the value from the THEN region being
476   // undef.
477   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
478   const auto *RC = MRI->getRegClass(Reg);
479   Register NewReg = MRI->createVirtualRegister(RC);
480   Register UndefReg = MRI->createVirtualRegister(RC);
481   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
482                                     TII->get(TargetOpcode::PHI), NewReg);
483   for (auto *Pred : Flow->predecessors()) {
484     if (Pred == If)
485       PHI.addReg(Reg).addMBB(Pred);
486     else
487       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
488   }
489 
490   // Replace all uses in the ELSE region or the PHIs in ENDIF block
491   // Use early increment range because setReg() will update the linked list.
492   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
493     auto *UseMI = O.getParent();
494     auto *UseBlock = UseMI->getParent();
495     // Replace uses in Endif block
496     if (UseBlock == Endif) {
497       assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
498       O.setReg(NewReg);
499       continue;
500     }
501 
502     // Replace uses in Else region
503     if (ElseBlocks.contains(UseBlock))
504       O.setReg(NewReg);
505   }
506 
507   // The optimized Reg is not alive through Flow blocks anymore.
508   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
509   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
510 
511   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
512   updateLiveRangeInThenRegion(Reg, If, Flow);
513 }
514 
515 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
516     Register Reg, MachineBasicBlock *Loop) const {
517   // Insert a new PHI, marking the value from the last loop iteration undef.
518   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
519   const auto *RC = MRI->getRegClass(Reg);
520   Register NewReg = MRI->createVirtualRegister(RC);
521   Register UndefReg = MRI->createVirtualRegister(RC);
522 
523   // Replace all uses in the LOOP region
524   // Use early increment range because setReg() will update the linked list.
525   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
526     auto *UseMI = O.getParent();
527     auto *UseBlock = UseMI->getParent();
528     // Replace uses in Loop block
529     if (UseBlock == Loop)
530       O.setReg(NewReg);
531   }
532 
533   MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(),
534                                     TII->get(TargetOpcode::PHI), NewReg);
535   for (auto *Pred : Loop->predecessors()) {
536     if (Pred == Loop)
537       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
538     else
539       PHI.addReg(Reg).addMBB(Pred);
540   }
541 
542   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
543   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
544 
545   // collectWaterfallCandidateRegisters only collects registers that are dead
546   // after the loop. So we know that the old reg is not live throughout the
547   // whole block anymore.
548   OldVarInfo.AliveBlocks.reset(Loop->getNumber());
549 
550   // Mark the last use as kill
551   for (auto &MI : reverse(Loop->instrs())) {
552     if (MI.readsRegister(NewReg, TRI)) {
553       MI.addRegisterKilled(NewReg, TRI);
554       NewVarInfo.Kills.push_back(&MI);
555       break;
556     }
557   }
558   assert(!NewVarInfo.Kills.empty() &&
559          "Failed to find last usage of register in loop");
560 }
561 
562 char SIOptimizeVGPRLiveRange::ID = 0;
563 
564 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
565                       "SI Optimize VGPR LiveRange", false, false)
566 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
567 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
568 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
569 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
570                     "SI Optimize VGPR LiveRange", false, false)
571 
572 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
573 
574 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
575   return new SIOptimizeVGPRLiveRange();
576 }
577 
578 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
579 
580   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
581   TII = ST.getInstrInfo();
582   TRI = &TII->getRegisterInfo();
583   MDT = &getAnalysis<MachineDominatorTree>();
584   Loops = &getAnalysis<MachineLoopInfo>();
585   LV = &getAnalysis<LiveVariables>();
586   MRI = &MF.getRegInfo();
587 
588   if (skipFunction(MF.getFunction()))
589     return false;
590 
591   bool MadeChange = false;
592 
593   // TODO: we need to think about the order of visiting the blocks to get
594   // optimal result for nesting if-else cases.
595   for (MachineBasicBlock &MBB : MF) {
596     for (auto &MI : MBB.terminators()) {
597       // Detect the if-else blocks
598       if (MI.getOpcode() == AMDGPU::SI_IF) {
599         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
600         auto *Endif = getElseTarget(IfTarget);
601         if (!Endif)
602           continue;
603 
604         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
605         SmallVector<Register> CandidateRegs;
606 
607         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
608                           << printMBBReference(MBB) << ' '
609                           << printMBBReference(*IfTarget) << ' '
610                           << printMBBReference(*Endif) << '\n');
611 
612         // Collect all the blocks in the ELSE region
613         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
614 
615         // Collect the registers can be optimized
616         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
617                                   CandidateRegs);
618         MadeChange |= !CandidateRegs.empty();
619         // Now we are safe to optimize.
620         for (auto Reg : CandidateRegs)
621           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
622       } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
623         LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
624                           << printMBBReference(MBB) << '\n');
625 
626         SmallSetVector<Register, 16> CandidateRegs;
627         collectWaterfallCandidateRegisters(&MBB, CandidateRegs);
628         MadeChange |= !CandidateRegs.empty();
629         // Now we are safe to optimize.
630         for (auto Reg : CandidateRegs)
631           optimizeWaterfallLiveRange(Reg, &MBB);
632       }
633     }
634   }
635 
636   return MadeChange;
637 }
638