1 //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11 /// structures and waterfall loops.
12 ///
13 /// When we do structurization, we usually transform an if-else into two
14 /// successive if-then (with a flow block to do predicate inversion). Consider a
15 /// simple case after structurization: A divergent value %a was defined before
16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17 /// bb.if:
18 /// %a = ...
19 /// ...
20 /// bb.then:
21 /// ... = op %a
22 /// ... // %a can be dead here
23 /// bb.flow:
24 /// ...
25 /// bb.else:
26 /// ... = %a
27 /// ...
28 /// bb.endif
29 ///
30 /// As register allocator has no idea of the thread-control-flow, it will just
31 /// assume %a would be alive in the whole range of bb.then because of a later
32 /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33 /// to exec mask. For this if-else case, the lanes active in bb.then will be
34 /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35 /// after the last use in bb.then until the end of the block. The reason is
36 /// the instructions in bb.then will only overwrite lanes that will never be
37 /// accessed in bb.else.
38 ///
39 /// This pass aims to tell register allocator that %a is in-fact dead,
40 /// through inserting a phi-node in bb.flow saying that %a is undef when coming
41 /// from bb.then, and then replace the uses in the bb.else with the result of
42 /// newly inserted phi.
43 ///
44 /// Two key conditions must be met to ensure correctness:
45 /// 1.) The def-point should be in the same loop-level as if-else-endif to make
46 /// sure the second loop iteration still get correct data.
47 /// 2.) There should be no further uses after the IF-ELSE region.
48 ///
49 ///
50 /// Waterfall loops get inserted around instructions that use divergent values
51 /// but can only be executed with a uniform value. For example an indirect call
52 /// to a divergent address:
53 /// bb.start:
54 /// %a = ...
55 /// %fun = ...
56 /// ...
57 /// bb.loop:
58 /// call %fun (%a)
59 /// ... // %a can be dead here
60 /// loop %bb.loop
61 ///
62 /// The loop block is executed multiple times, but it is run exactly once for
63 /// each active lane. Similar to the if-else case, the register allocator
64 /// assumes that %a is live throughout the loop as it is used again in the next
65 /// iteration. If %a is a VGPR that is unused after the loop, it does not need
66 /// to be live after its last use in the loop block. By inserting a phi-node at
67 /// the start of bb.loop that is undef when coming from bb.loop, the register
68 /// allocation knows that the value of %a does not need to be preserved through
69 /// iterations of the loop.
70 ///
71 //
72 //===----------------------------------------------------------------------===//
73
74 #include "AMDGPU.h"
75 #include "GCNSubtarget.h"
76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77 #include "SIMachineFunctionInfo.h"
78 #include "llvm/CodeGen/LiveVariables.h"
79 #include "llvm/CodeGen/MachineDominators.h"
80 #include "llvm/CodeGen/MachineLoopInfo.h"
81 #include "llvm/CodeGen/TargetRegisterInfo.h"
82 #include "llvm/InitializePasses.h"
83
84 using namespace llvm;
85
86 #define DEBUG_TYPE "si-opt-vgpr-liverange"
87
88 namespace {
89
90 class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91 private:
92 const SIRegisterInfo *TRI = nullptr;
93 const SIInstrInfo *TII = nullptr;
94 LiveVariables *LV = nullptr;
95 MachineDominatorTree *MDT = nullptr;
96 const MachineLoopInfo *Loops = nullptr;
97 MachineRegisterInfo *MRI = nullptr;
98
99 public:
100 static char ID;
101
102 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103
104 void collectElseRegionBlocks(MachineBasicBlock *Flow,
105 MachineBasicBlock *Endif,
106 SmallSetVector<MachineBasicBlock *, 16> &) const;
107
108 void
109 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110 MachineBasicBlock *Endif,
111 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112 SmallVectorImpl<Register> &CandidateRegs) const;
113
114 void collectWaterfallCandidateRegisters(
115 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
116 SmallSetVector<Register, 16> &CandidateRegs,
117 SmallSetVector<MachineBasicBlock *, 2> &Blocks,
118 SmallVectorImpl<MachineInstr *> &Instructions) const;
119
120 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121 SmallVectorImpl<MachineInstr *> &Uses) const;
122
123 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124 MachineBasicBlock *Flow) const;
125
126 void updateLiveRangeInElseRegion(
127 Register Reg, Register NewReg, MachineBasicBlock *Flow,
128 MachineBasicBlock *Endif,
129 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130
131 void
132 optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133 MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135
136 void optimizeWaterfallLiveRange(
137 Register Reg, MachineBasicBlock *LoopHeader,
138 SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
139 SmallVectorImpl<MachineInstr *> &Instructions) const;
140
SIOptimizeVGPRLiveRange()141 SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142
143 bool runOnMachineFunction(MachineFunction &MF) override;
144
getPassName() const145 StringRef getPassName() const override {
146 return "SI Optimize VGPR LiveRange";
147 }
148
getAnalysisUsage(AnalysisUsage & AU) const149 void getAnalysisUsage(AnalysisUsage &AU) const override {
150 AU.addRequired<LiveVariablesWrapperPass>();
151 AU.addRequired<MachineDominatorTreeWrapperPass>();
152 AU.addRequired<MachineLoopInfoWrapperPass>();
153 AU.addPreserved<LiveVariablesWrapperPass>();
154 AU.addPreserved<MachineDominatorTreeWrapperPass>();
155 AU.addPreserved<MachineLoopInfoWrapperPass>();
156 MachineFunctionPass::getAnalysisUsage(AU);
157 }
158
getRequiredProperties() const159 MachineFunctionProperties getRequiredProperties() const override {
160 return MachineFunctionProperties().set(
161 MachineFunctionProperties::Property::IsSSA);
162 }
163
getClearedProperties() const164 MachineFunctionProperties getClearedProperties() const override {
165 return MachineFunctionProperties().set(
166 MachineFunctionProperties::Property::NoPHIs);
167 }
168 };
169
170 } // end anonymous namespace
171
172 // Check whether the MBB is a else flow block and get the branching target which
173 // is the Endif block
174 MachineBasicBlock *
getElseTarget(MachineBasicBlock * MBB) const175 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176 for (auto &BR : MBB->terminators()) {
177 if (BR.getOpcode() == AMDGPU::SI_ELSE)
178 return BR.getOperand(2).getMBB();
179 }
180 return nullptr;
181 }
182
collectElseRegionBlocks(MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & Blocks) const183 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184 MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186 assert(Flow != Endif);
187
188 MachineBasicBlock *MBB = Endif;
189 unsigned Cur = 0;
190 while (MBB) {
191 for (auto *Pred : MBB->predecessors()) {
192 if (Pred != Flow && !Blocks.contains(Pred))
193 Blocks.insert(Pred);
194 }
195
196 if (Cur < Blocks.size())
197 MBB = Blocks[Cur++];
198 else
199 MBB = nullptr;
200 }
201
202 LLVM_DEBUG({
203 dbgs() << "Found Else blocks: ";
204 for (auto *MBB : Blocks)
205 dbgs() << printMBBReference(*MBB) << ' ';
206 dbgs() << '\n';
207 });
208 }
209
210 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
findNonPHIUsesInBlock(Register Reg,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & Uses) const211 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212 Register Reg, MachineBasicBlock *MBB,
213 SmallVectorImpl<MachineInstr *> &Uses) const {
214 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215 if (UseMI.getParent() == MBB && !UseMI.isPHI())
216 Uses.push_back(&UseMI);
217 }
218 }
219
220 /// Collect the killed registers in the ELSE region which are not alive through
221 /// the whole THEN region.
collectCandidateRegisters(MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks,SmallVectorImpl<Register> & CandidateRegs) const222 void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225 SmallVectorImpl<Register> &CandidateRegs) const {
226
227 SmallSet<Register, 8> KillsInElse;
228
229 for (auto *Else : ElseBlocks) {
230 for (auto &MI : Else->instrs()) {
231 if (MI.isDebugInstr())
232 continue;
233
234 for (auto &MO : MI.operands()) {
235 if (!MO.isReg() || !MO.getReg() || MO.isDef())
236 continue;
237
238 Register MOReg = MO.getReg();
239 // We can only optimize AGPR/VGPR virtual register
240 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241 continue;
242
243 if (MO.readsReg()) {
244 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246 // Make sure two conditions are met:
247 // a.) the value is defined before/in the IF block
248 // b.) should be defined in the same loop-level.
249 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251 // Check if the register is live into the endif block. If not,
252 // consider it killed in the else region.
253 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255 KillsInElse.insert(MOReg);
256 } else {
257 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258 << " as Live in Endif\n");
259 }
260 }
261 }
262 }
263 }
264 }
265
266 // Check the phis in the Endif, looking for value coming from the ELSE
267 // region. Make sure the phi-use is the last use.
268 for (auto &MI : Endif->phis()) {
269 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270 auto &MO = MI.getOperand(Idx);
271 auto *Pred = MI.getOperand(Idx + 1).getMBB();
272 if (Pred == Flow)
273 continue;
274 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275
276 if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277 continue;
278
279 Register Reg = MO.getReg();
280 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281 continue;
282
283 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284
285 if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287 << " as Live in Endif\n");
288 continue;
289 }
290 // Make sure two conditions are met:
291 // a.) the value is defined before/in the IF block
292 // b.) should be defined in the same loop-level.
293 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296 KillsInElse.insert(Reg);
297 }
298 }
299
300 auto IsLiveThroughThen = [&](Register Reg) {
301 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302 ++I) {
303 if (!I->readsReg())
304 continue;
305 auto *UseMI = I->getParent();
306 auto *UseMBB = UseMI->getParent();
307 if (UseMBB == Flow || UseMBB == Endif) {
308 if (!UseMI->isPHI())
309 return true;
310
311 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312 // The register is live through the path If->Flow or Flow->Endif.
313 // we should not optimize for such cases.
314 if ((UseMBB == Flow && IncomingMBB != If) ||
315 (UseMBB == Endif && IncomingMBB == Flow))
316 return true;
317 }
318 }
319 return false;
320 };
321
322 for (auto Reg : KillsInElse) {
323 if (!IsLiveThroughThen(Reg))
324 CandidateRegs.push_back(Reg);
325 }
326 }
327
328 /// Collect the registers used in the waterfall loop block that are defined
329 /// before.
collectWaterfallCandidateRegisters(MachineBasicBlock * LoopHeader,MachineBasicBlock * LoopEnd,SmallSetVector<Register,16> & CandidateRegs,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const330 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
331 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
332 SmallSetVector<Register, 16> &CandidateRegs,
333 SmallSetVector<MachineBasicBlock *, 2> &Blocks,
334 SmallVectorImpl<MachineInstr *> &Instructions) const {
335
336 // Collect loop instructions, potentially spanning multiple blocks
337 auto *MBB = LoopHeader;
338 for (;;) {
339 Blocks.insert(MBB);
340 for (auto &MI : *MBB) {
341 if (MI.isDebugInstr())
342 continue;
343 Instructions.push_back(&MI);
344 }
345 if (MBB == LoopEnd)
346 break;
347
348 if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
349 (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
350 LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
351 return;
352 }
353
354 MBB = *MBB->succ_begin();
355 }
356
357 for (auto *I : Instructions) {
358 auto &MI = *I;
359
360 for (auto &MO : MI.all_uses()) {
361 if (!MO.getReg())
362 continue;
363
364 Register MOReg = MO.getReg();
365 // We can only optimize AGPR/VGPR virtual register
366 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367 continue;
368
369 if (MO.readsReg()) {
370 MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371 // Make sure the value is defined before the LOOP block
372 if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373 // If the variable is used after the loop, the register coalescer will
374 // merge the newly created register and remove the phi node again.
375 // Just do nothing in that case.
376 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377 bool IsUsed = false;
378 for (auto *Succ : LoopEnd->successors()) {
379 if (!Blocks.contains(Succ) &&
380 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381 IsUsed = true;
382 break;
383 }
384 }
385 if (!IsUsed) {
386 LLVM_DEBUG(dbgs() << "Found candidate reg: "
387 << printReg(MOReg, TRI, 0, MRI) << '\n');
388 CandidateRegs.insert(MOReg);
389 } else {
390 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391 << printReg(MOReg, TRI, 0, MRI) << '\n');
392 }
393 }
394 }
395 }
396 }
397 }
398
399 // Re-calculate the liveness of \p Reg in the THEN-region
updateLiveRangeInThenRegion(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow) const400 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
402 SetVector<MachineBasicBlock *> Blocks;
403 SmallVector<MachineBasicBlock *> WorkList({If});
404
405 // Collect all successors until we see the flow block, where we should
406 // reconverge.
407 while (!WorkList.empty()) {
408 auto *MBB = WorkList.pop_back_val();
409 for (auto *Succ : MBB->successors()) {
410 if (Succ != Flow && !Blocks.contains(Succ)) {
411 WorkList.push_back(Succ);
412 Blocks.insert(Succ);
413 }
414 }
415 }
416
417 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
418 for (MachineBasicBlock *MBB : Blocks) {
419 // Clear Live bit, as we will recalculate afterwards
420 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421 << '\n');
422 OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423 }
424
425 SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
426
427 // Get the blocks the Reg should be alive through
428 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429 ++I) {
430 auto *UseMI = I->getParent();
431 if (UseMI->isPHI() && I->readsReg()) {
432 if (Blocks.contains(UseMI->getParent()))
433 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434 }
435 }
436
437 for (MachineBasicBlock *MBB : Blocks) {
438 SmallVector<MachineInstr *> Uses;
439 // PHI instructions has been processed before.
440 findNonPHIUsesInBlock(Reg, MBB, Uses);
441
442 if (Uses.size() == 1) {
443 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444 << printMBBReference(*MBB) << '\n');
445 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446 } else if (Uses.size() > 1) {
447 // Process the instructions in-order
448 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449 << printMBBReference(*MBB) << '\n');
450 for (MachineInstr &MI : *MBB) {
451 if (llvm::is_contained(Uses, &MI))
452 LV->HandleVirtRegUse(Reg, MBB, MI);
453 }
454 }
455
456 // Mark Reg alive through the block if this is a PHI incoming block
457 if (PHIIncoming.contains(MBB))
458 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459 MBB);
460 }
461
462 // Set the isKilled flag if we get new Kills in the THEN region.
463 for (auto *MI : OldVarInfo.Kills) {
464 if (Blocks.contains(MI->getParent()))
465 MI->addRegisterKilled(Reg, TRI);
466 }
467 }
468
updateLiveRangeInElseRegion(Register Reg,Register NewReg,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const469 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470 Register Reg, Register NewReg, MachineBasicBlock *Flow,
471 MachineBasicBlock *Endif,
472 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475
476 // Transfer aliveBlocks from Reg to NewReg
477 for (auto *MBB : ElseBlocks) {
478 unsigned BBNum = MBB->getNumber();
479 if (OldVarInfo.AliveBlocks.test(BBNum)) {
480 NewVarInfo.AliveBlocks.set(BBNum);
481 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482 << '\n');
483 OldVarInfo.AliveBlocks.reset(BBNum);
484 }
485 }
486
487 // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488 auto I = OldVarInfo.Kills.begin();
489 while (I != OldVarInfo.Kills.end()) {
490 if (ElseBlocks.contains((*I)->getParent())) {
491 NewVarInfo.Kills.push_back(*I);
492 I = OldVarInfo.Kills.erase(I);
493 } else {
494 ++I;
495 }
496 }
497 }
498
optimizeLiveRange(Register Reg,MachineBasicBlock * If,MachineBasicBlock * Flow,MachineBasicBlock * Endif,SmallSetVector<MachineBasicBlock *,16> & ElseBlocks) const499 void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501 MachineBasicBlock *Endif,
502 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503 // Insert a new PHI, marking the value from the THEN region being
504 // undef.
505 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506 const auto *RC = MRI->getRegClass(Reg);
507 Register NewReg = MRI->createVirtualRegister(RC);
508 Register UndefReg = MRI->createVirtualRegister(RC);
509 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510 TII->get(TargetOpcode::PHI), NewReg);
511 for (auto *Pred : Flow->predecessors()) {
512 if (Pred == If)
513 PHI.addReg(Reg).addMBB(Pred);
514 else
515 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516 }
517
518 // Replace all uses in the ELSE region or the PHIs in ENDIF block
519 // Use early increment range because setReg() will update the linked list.
520 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521 auto *UseMI = O.getParent();
522 auto *UseBlock = UseMI->getParent();
523 // Replace uses in Endif block
524 if (UseBlock == Endif) {
525 if (UseMI->isPHI())
526 O.setReg(NewReg);
527 else if (UseMI->isDebugInstr())
528 continue;
529 else {
530 // DetectDeadLanes may mark register uses as undef without removing
531 // them, in which case a non-phi instruction using the original register
532 // may exist in the Endif block even though the register is not live
533 // into it.
534 assert(!O.readsReg());
535 }
536 continue;
537 }
538
539 // Replace uses in Else region
540 if (ElseBlocks.contains(UseBlock))
541 O.setReg(NewReg);
542 }
543
544 // The optimized Reg is not alive through Flow blocks anymore.
545 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
546 OldVarInfo.AliveBlocks.reset(Flow->getNumber());
547
548 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
549 updateLiveRangeInThenRegion(Reg, If, Flow);
550 }
551
optimizeWaterfallLiveRange(Register Reg,MachineBasicBlock * LoopHeader,SmallSetVector<MachineBasicBlock *,2> & Blocks,SmallVectorImpl<MachineInstr * > & Instructions) const552 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
553 Register Reg, MachineBasicBlock *LoopHeader,
554 SmallSetVector<MachineBasicBlock *, 2> &Blocks,
555 SmallVectorImpl<MachineInstr *> &Instructions) const {
556 // Insert a new PHI, marking the value from the last loop iteration undef.
557 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
558 const auto *RC = MRI->getRegClass(Reg);
559 Register NewReg = MRI->createVirtualRegister(RC);
560 Register UndefReg = MRI->createVirtualRegister(RC);
561
562 // Replace all uses in the LOOP region
563 // Use early increment range because setReg() will update the linked list.
564 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
565 auto *UseMI = O.getParent();
566 auto *UseBlock = UseMI->getParent();
567 // Replace uses in Loop blocks
568 if (Blocks.contains(UseBlock))
569 O.setReg(NewReg);
570 }
571
572 MachineInstrBuilder PHI =
573 BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
574 TII->get(TargetOpcode::PHI), NewReg);
575 for (auto *Pred : LoopHeader->predecessors()) {
576 if (Blocks.contains(Pred))
577 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
578 else
579 PHI.addReg(Reg).addMBB(Pred);
580 }
581
582 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
583 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
584
585 // Find last use and mark as kill
586 MachineInstr *Kill = nullptr;
587 for (auto *MI : reverse(Instructions)) {
588 if (MI->readsRegister(NewReg, TRI)) {
589 MI->addRegisterKilled(NewReg, TRI);
590 NewVarInfo.Kills.push_back(MI);
591 Kill = MI;
592 break;
593 }
594 }
595 assert(Kill && "Failed to find last usage of register in loop");
596
597 MachineBasicBlock *KillBlock = Kill->getParent();
598 bool PostKillBlock = false;
599 for (auto *Block : Blocks) {
600 auto BBNum = Block->getNumber();
601
602 // collectWaterfallCandidateRegisters only collects registers that are dead
603 // after the loop. So we know that the old reg is no longer live throughout
604 // the waterfall loop.
605 OldVarInfo.AliveBlocks.reset(BBNum);
606
607 // The new register is live up to (and including) the block that kills it.
608 PostKillBlock |= (Block == KillBlock);
609 if (PostKillBlock) {
610 NewVarInfo.AliveBlocks.reset(BBNum);
611 } else if (Block != LoopHeader) {
612 NewVarInfo.AliveBlocks.set(BBNum);
613 }
614 }
615 }
616
617 char SIOptimizeVGPRLiveRange::ID = 0;
618
619 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
620 "SI Optimize VGPR LiveRange", false, false)
621 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
622 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
623 INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
624 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
625 "SI Optimize VGPR LiveRange", false, false)
626
627 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
628
createSIOptimizeVGPRLiveRangePass()629 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
630 return new SIOptimizeVGPRLiveRange();
631 }
632
runOnMachineFunction(MachineFunction & MF)633 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
634
635 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
636 TII = ST.getInstrInfo();
637 TRI = &TII->getRegisterInfo();
638 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
639 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
640 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
641 MRI = &MF.getRegInfo();
642
643 if (skipFunction(MF.getFunction()))
644 return false;
645
646 bool MadeChange = false;
647
648 // TODO: we need to think about the order of visiting the blocks to get
649 // optimal result for nesting if-else cases.
650 for (MachineBasicBlock &MBB : MF) {
651 for (auto &MI : MBB.terminators()) {
652 // Detect the if-else blocks
653 if (MI.getOpcode() == AMDGPU::SI_IF) {
654 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
655 auto *Endif = getElseTarget(IfTarget);
656 if (!Endif)
657 continue;
658
659 // Skip unexpected control flow.
660 if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
661 continue;
662
663 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
664 SmallVector<Register> CandidateRegs;
665
666 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
667 << printMBBReference(MBB) << ' '
668 << printMBBReference(*IfTarget) << ' '
669 << printMBBReference(*Endif) << '\n');
670
671 // Collect all the blocks in the ELSE region
672 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
673
674 // Collect the registers can be optimized
675 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
676 CandidateRegs);
677 MadeChange |= !CandidateRegs.empty();
678 // Now we are safe to optimize.
679 for (auto Reg : CandidateRegs)
680 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
681 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
682 auto *LoopHeader = MI.getOperand(0).getMBB();
683 auto *LoopEnd = &MBB;
684
685 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
686 << printMBBReference(*LoopHeader) << '\n');
687
688 SmallSetVector<Register, 16> CandidateRegs;
689 SmallVector<MachineInstr *, 16> Instructions;
690 SmallSetVector<MachineBasicBlock *, 2> Blocks;
691
692 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
693 Blocks, Instructions);
694 MadeChange |= !CandidateRegs.empty();
695 // Now we are safe to optimize.
696 for (auto Reg : CandidateRegs)
697 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
698 }
699 }
700 }
701
702 return MadeChange;
703 }
704