1fe6060f1SDimitry Andric //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file 10fe6060f1SDimitry Andric /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else 11fe6060f1SDimitry Andric /// structures and waterfall loops. 12fe6060f1SDimitry Andric /// 13fe6060f1SDimitry Andric /// When we do structurization, we usually transform an if-else into two 14349cc55cSDimitry Andric /// successive if-then (with a flow block to do predicate inversion). Consider a 15fe6060f1SDimitry Andric /// simple case after structurization: A divergent value %a was defined before 16fe6060f1SDimitry Andric /// if-else and used in both THEN (use in THEN is optional) and ELSE part: 17fe6060f1SDimitry Andric /// bb.if: 18fe6060f1SDimitry Andric /// %a = ... 19fe6060f1SDimitry Andric /// ... 20fe6060f1SDimitry Andric /// bb.then: 21fe6060f1SDimitry Andric /// ... = op %a 22fe6060f1SDimitry Andric /// ... // %a can be dead here 23fe6060f1SDimitry Andric /// bb.flow: 24fe6060f1SDimitry Andric /// ... 25fe6060f1SDimitry Andric /// bb.else: 26fe6060f1SDimitry Andric /// ... = %a 27fe6060f1SDimitry Andric /// ... 28fe6060f1SDimitry Andric /// bb.endif 29fe6060f1SDimitry Andric /// 30fe6060f1SDimitry Andric /// As register allocator has no idea of the thread-control-flow, it will just 31fe6060f1SDimitry Andric /// assume %a would be alive in the whole range of bb.then because of a later 32fe6060f1SDimitry Andric /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect 33fe6060f1SDimitry Andric /// to exec mask. For this if-else case, the lanes active in bb.then will be 34fe6060f1SDimitry Andric /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead 35fe6060f1SDimitry Andric /// after the last use in bb.then until the end of the block. The reason is 36fe6060f1SDimitry Andric /// the instructions in bb.then will only overwrite lanes that will never be 37fe6060f1SDimitry Andric /// accessed in bb.else. 38fe6060f1SDimitry Andric /// 39fe6060f1SDimitry Andric /// This pass aims to to tell register allocator that %a is in-fact dead, 40fe6060f1SDimitry Andric /// through inserting a phi-node in bb.flow saying that %a is undef when coming 41fe6060f1SDimitry Andric /// from bb.then, and then replace the uses in the bb.else with the result of 42fe6060f1SDimitry Andric /// newly inserted phi. 43fe6060f1SDimitry Andric /// 44fe6060f1SDimitry Andric /// Two key conditions must be met to ensure correctness: 45fe6060f1SDimitry Andric /// 1.) The def-point should be in the same loop-level as if-else-endif to make 46fe6060f1SDimitry Andric /// sure the second loop iteration still get correct data. 47fe6060f1SDimitry Andric /// 2.) There should be no further uses after the IF-ELSE region. 48fe6060f1SDimitry Andric /// 49fe6060f1SDimitry Andric /// 50fe6060f1SDimitry Andric /// Waterfall loops get inserted around instructions that use divergent values 51fe6060f1SDimitry Andric /// but can only be executed with a uniform value. For example an indirect call 52fe6060f1SDimitry Andric /// to a divergent address: 53fe6060f1SDimitry Andric /// bb.start: 54fe6060f1SDimitry Andric /// %a = ... 55fe6060f1SDimitry Andric /// %fun = ... 56fe6060f1SDimitry Andric /// ... 57fe6060f1SDimitry Andric /// bb.loop: 58fe6060f1SDimitry Andric /// call %fun (%a) 59fe6060f1SDimitry Andric /// ... // %a can be dead here 60fe6060f1SDimitry Andric /// loop %bb.loop 61fe6060f1SDimitry Andric /// 62fe6060f1SDimitry Andric /// The loop block is executed multiple times, but it is run exactly once for 63fe6060f1SDimitry Andric /// each active lane. Similar to the if-else case, the register allocator 64fe6060f1SDimitry Andric /// assumes that %a is live throughout the loop as it is used again in the next 65fe6060f1SDimitry Andric /// iteration. If %a is a VGPR that is unused after the loop, it does not need 66fe6060f1SDimitry Andric /// to be live after its last use in the loop block. By inserting a phi-node at 67fe6060f1SDimitry Andric /// the start of bb.loop that is undef when coming from bb.loop, the register 68fe6060f1SDimitry Andric /// allocation knows that the value of %a does not need to be preserved through 69fe6060f1SDimitry Andric /// iterations of the loop. 70fe6060f1SDimitry Andric /// 71fe6060f1SDimitry Andric // 72fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 73fe6060f1SDimitry Andric 74fe6060f1SDimitry Andric #include "AMDGPU.h" 75fe6060f1SDimitry Andric #include "GCNSubtarget.h" 76fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 77fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h" 78fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveVariables.h" 79fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 80fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 81fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 82fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 83fe6060f1SDimitry Andric 84fe6060f1SDimitry Andric using namespace llvm; 85fe6060f1SDimitry Andric 86fe6060f1SDimitry Andric #define DEBUG_TYPE "si-opt-vgpr-liverange" 87fe6060f1SDimitry Andric 88fe6060f1SDimitry Andric namespace { 89fe6060f1SDimitry Andric 90fe6060f1SDimitry Andric class SIOptimizeVGPRLiveRange : public MachineFunctionPass { 91fe6060f1SDimitry Andric private: 92fe6060f1SDimitry Andric const SIRegisterInfo *TRI = nullptr; 93fe6060f1SDimitry Andric const SIInstrInfo *TII = nullptr; 94fe6060f1SDimitry Andric LiveVariables *LV = nullptr; 95fe6060f1SDimitry Andric MachineDominatorTree *MDT = nullptr; 96fe6060f1SDimitry Andric const MachineLoopInfo *Loops = nullptr; 97fe6060f1SDimitry Andric MachineRegisterInfo *MRI = nullptr; 98fe6060f1SDimitry Andric 99fe6060f1SDimitry Andric public: 100fe6060f1SDimitry Andric static char ID; 101fe6060f1SDimitry Andric 102fe6060f1SDimitry Andric MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; 103fe6060f1SDimitry Andric 104fe6060f1SDimitry Andric void collectElseRegionBlocks(MachineBasicBlock *Flow, 105fe6060f1SDimitry Andric MachineBasicBlock *Endif, 106fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &) const; 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric void 109fe6060f1SDimitry Andric collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, 110fe6060f1SDimitry Andric MachineBasicBlock *Endif, 111fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 112fe6060f1SDimitry Andric SmallVectorImpl<Register> &CandidateRegs) const; 113fe6060f1SDimitry Andric 114fe6060f1SDimitry Andric void collectWaterfallCandidateRegisters( 115fe6060f1SDimitry Andric MachineBasicBlock *Loop, 116fe6060f1SDimitry Andric SmallSetVector<Register, 16> &CandidateRegs) const; 117fe6060f1SDimitry Andric 118fe6060f1SDimitry Andric void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, 119fe6060f1SDimitry Andric SmallVectorImpl<MachineInstr *> &Uses) const; 120fe6060f1SDimitry Andric 121fe6060f1SDimitry Andric void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, 122fe6060f1SDimitry Andric MachineBasicBlock *Flow) const; 123fe6060f1SDimitry Andric 124fe6060f1SDimitry Andric void updateLiveRangeInElseRegion( 125fe6060f1SDimitry Andric Register Reg, Register NewReg, MachineBasicBlock *Flow, 126fe6060f1SDimitry Andric MachineBasicBlock *Endif, 127fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 128fe6060f1SDimitry Andric 129fe6060f1SDimitry Andric void 130fe6060f1SDimitry Andric optimizeLiveRange(Register Reg, MachineBasicBlock *If, 131fe6060f1SDimitry Andric MachineBasicBlock *Flow, MachineBasicBlock *Endif, 132fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 133fe6060f1SDimitry Andric 134fe6060f1SDimitry Andric void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const; 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} 137fe6060f1SDimitry Andric 138fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 139fe6060f1SDimitry Andric 140fe6060f1SDimitry Andric StringRef getPassName() const override { 141fe6060f1SDimitry Andric return "SI Optimize VGPR LiveRange"; 142fe6060f1SDimitry Andric } 143fe6060f1SDimitry Andric 144fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 145fe6060f1SDimitry Andric AU.addRequired<LiveVariables>(); 146fe6060f1SDimitry Andric AU.addRequired<MachineDominatorTree>(); 147fe6060f1SDimitry Andric AU.addRequired<MachineLoopInfo>(); 148fe6060f1SDimitry Andric AU.addPreserved<LiveVariables>(); 149fe6060f1SDimitry Andric AU.addPreserved<MachineDominatorTree>(); 150fe6060f1SDimitry Andric AU.addPreserved<MachineLoopInfo>(); 151fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 152fe6060f1SDimitry Andric } 153fe6060f1SDimitry Andric 154fe6060f1SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 155fe6060f1SDimitry Andric return MachineFunctionProperties().set( 156fe6060f1SDimitry Andric MachineFunctionProperties::Property::IsSSA); 157fe6060f1SDimitry Andric } 158*04eeddc0SDimitry Andric 159*04eeddc0SDimitry Andric MachineFunctionProperties getClearedProperties() const override { 160*04eeddc0SDimitry Andric return MachineFunctionProperties().set( 161*04eeddc0SDimitry Andric MachineFunctionProperties::Property::NoPHIs); 162*04eeddc0SDimitry Andric } 163fe6060f1SDimitry Andric }; 164fe6060f1SDimitry Andric 165fe6060f1SDimitry Andric } // end anonymous namespace 166fe6060f1SDimitry Andric 167fe6060f1SDimitry Andric // Check whether the MBB is a else flow block and get the branching target which 168fe6060f1SDimitry Andric // is the Endif block 169fe6060f1SDimitry Andric MachineBasicBlock * 170fe6060f1SDimitry Andric SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { 171fe6060f1SDimitry Andric for (auto &BR : MBB->terminators()) { 172fe6060f1SDimitry Andric if (BR.getOpcode() == AMDGPU::SI_ELSE) 173fe6060f1SDimitry Andric return BR.getOperand(2).getMBB(); 174fe6060f1SDimitry Andric } 175fe6060f1SDimitry Andric return nullptr; 176fe6060f1SDimitry Andric } 177fe6060f1SDimitry Andric 178fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( 179fe6060f1SDimitry Andric MachineBasicBlock *Flow, MachineBasicBlock *Endif, 180fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { 181fe6060f1SDimitry Andric assert(Flow != Endif); 182fe6060f1SDimitry Andric 183fe6060f1SDimitry Andric MachineBasicBlock *MBB = Endif; 184fe6060f1SDimitry Andric unsigned Cur = 0; 185fe6060f1SDimitry Andric while (MBB) { 186fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) { 187fe6060f1SDimitry Andric if (Pred != Flow && !Blocks.contains(Pred)) 188fe6060f1SDimitry Andric Blocks.insert(Pred); 189fe6060f1SDimitry Andric } 190fe6060f1SDimitry Andric 191fe6060f1SDimitry Andric if (Cur < Blocks.size()) 192fe6060f1SDimitry Andric MBB = Blocks[Cur++]; 193fe6060f1SDimitry Andric else 194fe6060f1SDimitry Andric MBB = nullptr; 195fe6060f1SDimitry Andric } 196fe6060f1SDimitry Andric 197fe6060f1SDimitry Andric LLVM_DEBUG({ 198fe6060f1SDimitry Andric dbgs() << "Found Else blocks: "; 199fe6060f1SDimitry Andric for (auto *MBB : Blocks) 200fe6060f1SDimitry Andric dbgs() << printMBBReference(*MBB) << ' '; 201fe6060f1SDimitry Andric dbgs() << '\n'; 202fe6060f1SDimitry Andric }); 203fe6060f1SDimitry Andric } 204fe6060f1SDimitry Andric 205fe6060f1SDimitry Andric /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. 206fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( 207fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *MBB, 208fe6060f1SDimitry Andric SmallVectorImpl<MachineInstr *> &Uses) const { 209fe6060f1SDimitry Andric for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { 210fe6060f1SDimitry Andric if (UseMI.getParent() == MBB && !UseMI.isPHI()) 211fe6060f1SDimitry Andric Uses.push_back(&UseMI); 212fe6060f1SDimitry Andric } 213fe6060f1SDimitry Andric } 214fe6060f1SDimitry Andric 215fe6060f1SDimitry Andric /// Collect the killed registers in the ELSE region which are not alive through 216fe6060f1SDimitry Andric /// the whole THEN region. 217fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectCandidateRegisters( 218fe6060f1SDimitry Andric MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, 219fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 220fe6060f1SDimitry Andric SmallVectorImpl<Register> &CandidateRegs) const { 221fe6060f1SDimitry Andric 222fe6060f1SDimitry Andric SmallSet<Register, 8> KillsInElse; 223fe6060f1SDimitry Andric 224fe6060f1SDimitry Andric for (auto *Else : ElseBlocks) { 225fe6060f1SDimitry Andric for (auto &MI : Else->instrs()) { 226fe6060f1SDimitry Andric if (MI.isDebugInstr()) 227fe6060f1SDimitry Andric continue; 228fe6060f1SDimitry Andric 229fe6060f1SDimitry Andric for (auto &MO : MI.operands()) { 230fe6060f1SDimitry Andric if (!MO.isReg() || !MO.getReg() || MO.isDef()) 231fe6060f1SDimitry Andric continue; 232fe6060f1SDimitry Andric 233fe6060f1SDimitry Andric Register MOReg = MO.getReg(); 234fe6060f1SDimitry Andric // We can only optimize AGPR/VGPR virtual register 235fe6060f1SDimitry Andric if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 236fe6060f1SDimitry Andric continue; 237fe6060f1SDimitry Andric 238fe6060f1SDimitry Andric if (MO.readsReg()) { 239fe6060f1SDimitry Andric LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 240fe6060f1SDimitry Andric const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 241fe6060f1SDimitry Andric // Make sure two conditions are met: 242fe6060f1SDimitry Andric // a.) the value is defined before/in the IF block 243fe6060f1SDimitry Andric // b.) should be defined in the same loop-level. 244fe6060f1SDimitry Andric if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 245fe6060f1SDimitry Andric Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) { 246fe6060f1SDimitry Andric // Check if the register is live into the endif block. If not, 247fe6060f1SDimitry Andric // consider it killed in the else region. 248fe6060f1SDimitry Andric LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 249fe6060f1SDimitry Andric if (!VI.isLiveIn(*Endif, MOReg, *MRI)) { 250fe6060f1SDimitry Andric KillsInElse.insert(MOReg); 251fe6060f1SDimitry Andric } else { 252fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) 253fe6060f1SDimitry Andric << " as Live in Endif\n"); 254fe6060f1SDimitry Andric } 255fe6060f1SDimitry Andric } 256fe6060f1SDimitry Andric } 257fe6060f1SDimitry Andric } 258fe6060f1SDimitry Andric } 259fe6060f1SDimitry Andric } 260fe6060f1SDimitry Andric 261fe6060f1SDimitry Andric // Check the phis in the Endif, looking for value coming from the ELSE 262fe6060f1SDimitry Andric // region. Make sure the phi-use is the last use. 263fe6060f1SDimitry Andric for (auto &MI : Endif->phis()) { 264fe6060f1SDimitry Andric for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 265fe6060f1SDimitry Andric auto &MO = MI.getOperand(Idx); 266fe6060f1SDimitry Andric auto *Pred = MI.getOperand(Idx + 1).getMBB(); 267fe6060f1SDimitry Andric if (Pred == Flow) 268fe6060f1SDimitry Andric continue; 269fe6060f1SDimitry Andric assert(ElseBlocks.contains(Pred) && "Should be from Else region\n"); 270fe6060f1SDimitry Andric 271fe6060f1SDimitry Andric if (!MO.isReg() || !MO.getReg() || MO.isUndef()) 272fe6060f1SDimitry Andric continue; 273fe6060f1SDimitry Andric 274fe6060f1SDimitry Andric Register Reg = MO.getReg(); 275fe6060f1SDimitry Andric if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg)) 276fe6060f1SDimitry Andric continue; 277fe6060f1SDimitry Andric 278fe6060f1SDimitry Andric LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 279fe6060f1SDimitry Andric 280fe6060f1SDimitry Andric if (VI.isLiveIn(*Endif, Reg, *MRI)) { 281fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) 282fe6060f1SDimitry Andric << " as Live in Endif\n"); 283fe6060f1SDimitry Andric continue; 284fe6060f1SDimitry Andric } 285fe6060f1SDimitry Andric // Make sure two conditions are met: 286fe6060f1SDimitry Andric // a.) the value is defined before/in the IF block 287fe6060f1SDimitry Andric // b.) should be defined in the same loop-level. 288fe6060f1SDimitry Andric const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); 289fe6060f1SDimitry Andric if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 290fe6060f1SDimitry Andric Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) 291fe6060f1SDimitry Andric KillsInElse.insert(Reg); 292fe6060f1SDimitry Andric } 293fe6060f1SDimitry Andric } 294fe6060f1SDimitry Andric 295fe6060f1SDimitry Andric auto IsLiveThroughThen = [&](Register Reg) { 296fe6060f1SDimitry Andric for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 297fe6060f1SDimitry Andric ++I) { 298fe6060f1SDimitry Andric if (!I->readsReg()) 299fe6060f1SDimitry Andric continue; 300fe6060f1SDimitry Andric auto *UseMI = I->getParent(); 301fe6060f1SDimitry Andric auto *UseMBB = UseMI->getParent(); 302fe6060f1SDimitry Andric if (UseMBB == Flow || UseMBB == Endif) { 303fe6060f1SDimitry Andric if (!UseMI->isPHI()) 304fe6060f1SDimitry Andric return true; 305fe6060f1SDimitry Andric 306fe6060f1SDimitry Andric auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB(); 307fe6060f1SDimitry Andric // The register is live through the path If->Flow or Flow->Endif. 308fe6060f1SDimitry Andric // we should not optimize for such cases. 309fe6060f1SDimitry Andric if ((UseMBB == Flow && IncomingMBB != If) || 310fe6060f1SDimitry Andric (UseMBB == Endif && IncomingMBB == Flow)) 311fe6060f1SDimitry Andric return true; 312fe6060f1SDimitry Andric } 313fe6060f1SDimitry Andric } 314fe6060f1SDimitry Andric return false; 315fe6060f1SDimitry Andric }; 316fe6060f1SDimitry Andric 317fe6060f1SDimitry Andric for (auto Reg : KillsInElse) { 318fe6060f1SDimitry Andric if (!IsLiveThroughThen(Reg)) 319fe6060f1SDimitry Andric CandidateRegs.push_back(Reg); 320fe6060f1SDimitry Andric } 321fe6060f1SDimitry Andric } 322fe6060f1SDimitry Andric 323fe6060f1SDimitry Andric /// Collect the registers used in the waterfall loop block that are defined 324fe6060f1SDimitry Andric /// before. 325fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( 326fe6060f1SDimitry Andric MachineBasicBlock *Loop, 327fe6060f1SDimitry Andric SmallSetVector<Register, 16> &CandidateRegs) const { 328fe6060f1SDimitry Andric 329fe6060f1SDimitry Andric for (auto &MI : Loop->instrs()) { 330fe6060f1SDimitry Andric if (MI.isDebugInstr()) 331fe6060f1SDimitry Andric continue; 332fe6060f1SDimitry Andric 333fe6060f1SDimitry Andric for (auto &MO : MI.operands()) { 334fe6060f1SDimitry Andric if (!MO.isReg() || !MO.getReg() || MO.isDef()) 335fe6060f1SDimitry Andric continue; 336fe6060f1SDimitry Andric 337fe6060f1SDimitry Andric Register MOReg = MO.getReg(); 338fe6060f1SDimitry Andric // We can only optimize AGPR/VGPR virtual register 339fe6060f1SDimitry Andric if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 340fe6060f1SDimitry Andric continue; 341fe6060f1SDimitry Andric 342fe6060f1SDimitry Andric if (MO.readsReg()) { 343fe6060f1SDimitry Andric const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 344fe6060f1SDimitry Andric // Make sure the value is defined before the LOOP block 345fe6060f1SDimitry Andric if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) { 346fe6060f1SDimitry Andric // If the variable is used after the loop, the register coalescer will 347fe6060f1SDimitry Andric // merge the newly created register and remove the phi node again. 348fe6060f1SDimitry Andric // Just do nothing in that case. 349fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); 350fe6060f1SDimitry Andric bool IsUsed = false; 351fe6060f1SDimitry Andric for (auto *Succ : Loop->successors()) { 352fe6060f1SDimitry Andric if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { 353fe6060f1SDimitry Andric IsUsed = true; 354fe6060f1SDimitry Andric break; 355fe6060f1SDimitry Andric } 356fe6060f1SDimitry Andric } 357fe6060f1SDimitry Andric if (!IsUsed) { 358fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found candidate reg: " 359fe6060f1SDimitry Andric << printReg(MOReg, TRI, 0, MRI) << '\n'); 360fe6060f1SDimitry Andric CandidateRegs.insert(MOReg); 361fe6060f1SDimitry Andric } else { 362fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " 363fe6060f1SDimitry Andric << printReg(MOReg, TRI, 0, MRI) << '\n'); 364fe6060f1SDimitry Andric } 365fe6060f1SDimitry Andric } 366fe6060f1SDimitry Andric } 367fe6060f1SDimitry Andric } 368fe6060f1SDimitry Andric } 369fe6060f1SDimitry Andric } 370fe6060f1SDimitry Andric 371fe6060f1SDimitry Andric // Re-calculate the liveness of \p Reg in the THEN-region 372fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( 373fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { 374*04eeddc0SDimitry Andric SetVector<MachineBasicBlock *> Blocks; 375*04eeddc0SDimitry Andric SmallVector<MachineBasicBlock *> WorkList({If}); 376fe6060f1SDimitry Andric 377*04eeddc0SDimitry Andric // Collect all successors until we see the flow block, where we should 378*04eeddc0SDimitry Andric // reconverge. 379*04eeddc0SDimitry Andric while (!WorkList.empty()) { 380*04eeddc0SDimitry Andric auto *MBB = WorkList.pop_back_val(); 381*04eeddc0SDimitry Andric for (auto *Succ : MBB->successors()) { 382*04eeddc0SDimitry Andric if (Succ != Flow && !Blocks.contains(Succ)) { 383*04eeddc0SDimitry Andric WorkList.push_back(Succ); 384*04eeddc0SDimitry Andric Blocks.insert(Succ); 385fe6060f1SDimitry Andric } 386fe6060f1SDimitry Andric } 387*04eeddc0SDimitry Andric } 388fe6060f1SDimitry Andric 389fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 390*04eeddc0SDimitry Andric for (MachineBasicBlock *MBB : Blocks) { 391fe6060f1SDimitry Andric // Clear Live bit, as we will recalculate afterwards 392fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) 393fe6060f1SDimitry Andric << '\n'); 394fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(MBB->getNumber()); 395fe6060f1SDimitry Andric } 396fe6060f1SDimitry Andric 397*04eeddc0SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming; 398*04eeddc0SDimitry Andric 399fe6060f1SDimitry Andric // Get the blocks the Reg should be alive through 400fe6060f1SDimitry Andric for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 401fe6060f1SDimitry Andric ++I) { 402fe6060f1SDimitry Andric auto *UseMI = I->getParent(); 403fe6060f1SDimitry Andric if (UseMI->isPHI() && I->readsReg()) { 404*04eeddc0SDimitry Andric if (Blocks.contains(UseMI->getParent())) 405fe6060f1SDimitry Andric PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB()); 406fe6060f1SDimitry Andric } 407fe6060f1SDimitry Andric } 408fe6060f1SDimitry Andric 409*04eeddc0SDimitry Andric for (MachineBasicBlock *MBB : Blocks) { 410fe6060f1SDimitry Andric SmallVector<MachineInstr *> Uses; 411fe6060f1SDimitry Andric // PHI instructions has been processed before. 412fe6060f1SDimitry Andric findNonPHIUsesInBlock(Reg, MBB, Uses); 413fe6060f1SDimitry Andric 414fe6060f1SDimitry Andric if (Uses.size() == 1) { 415fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " 416fe6060f1SDimitry Andric << printMBBReference(*MBB) << '\n'); 417fe6060f1SDimitry Andric LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin())); 418fe6060f1SDimitry Andric } else if (Uses.size() > 1) { 419fe6060f1SDimitry Andric // Process the instructions in-order 420fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " 421fe6060f1SDimitry Andric << printMBBReference(*MBB) << '\n'); 422fe6060f1SDimitry Andric for (MachineInstr &MI : *MBB) { 423fe6060f1SDimitry Andric if (llvm::is_contained(Uses, &MI)) 424fe6060f1SDimitry Andric LV->HandleVirtRegUse(Reg, MBB, MI); 425fe6060f1SDimitry Andric } 426fe6060f1SDimitry Andric } 427fe6060f1SDimitry Andric 428fe6060f1SDimitry Andric // Mark Reg alive through the block if this is a PHI incoming block 429fe6060f1SDimitry Andric if (PHIIncoming.contains(MBB)) 430fe6060f1SDimitry Andric LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(), 431fe6060f1SDimitry Andric MBB); 432fe6060f1SDimitry Andric } 433fe6060f1SDimitry Andric 434fe6060f1SDimitry Andric // Set the isKilled flag if we get new Kills in the THEN region. 435fe6060f1SDimitry Andric for (auto *MI : OldVarInfo.Kills) { 436*04eeddc0SDimitry Andric if (Blocks.contains(MI->getParent())) 437fe6060f1SDimitry Andric MI->addRegisterKilled(Reg, TRI); 438fe6060f1SDimitry Andric } 439fe6060f1SDimitry Andric } 440fe6060f1SDimitry Andric 441fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( 442fe6060f1SDimitry Andric Register Reg, Register NewReg, MachineBasicBlock *Flow, 443fe6060f1SDimitry Andric MachineBasicBlock *Endif, 444fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 445fe6060f1SDimitry Andric LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 446fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 447fe6060f1SDimitry Andric 448fe6060f1SDimitry Andric // Transfer aliveBlocks from Reg to NewReg 449fe6060f1SDimitry Andric for (auto *MBB : ElseBlocks) { 450fe6060f1SDimitry Andric unsigned BBNum = MBB->getNumber(); 451fe6060f1SDimitry Andric if (OldVarInfo.AliveBlocks.test(BBNum)) { 452fe6060f1SDimitry Andric NewVarInfo.AliveBlocks.set(BBNum); 453fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) 454fe6060f1SDimitry Andric << '\n'); 455fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(BBNum); 456fe6060f1SDimitry Andric } 457fe6060f1SDimitry Andric } 458fe6060f1SDimitry Andric 459fe6060f1SDimitry Andric // Transfer the possible Kills in ElseBlocks from Reg to NewReg 460fe6060f1SDimitry Andric auto I = OldVarInfo.Kills.begin(); 461fe6060f1SDimitry Andric while (I != OldVarInfo.Kills.end()) { 462fe6060f1SDimitry Andric if (ElseBlocks.contains((*I)->getParent())) { 463fe6060f1SDimitry Andric NewVarInfo.Kills.push_back(*I); 464fe6060f1SDimitry Andric I = OldVarInfo.Kills.erase(I); 465fe6060f1SDimitry Andric } else { 466fe6060f1SDimitry Andric ++I; 467fe6060f1SDimitry Andric } 468fe6060f1SDimitry Andric } 469fe6060f1SDimitry Andric } 470fe6060f1SDimitry Andric 471fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeLiveRange( 472fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, 473fe6060f1SDimitry Andric MachineBasicBlock *Endif, 474fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 475fe6060f1SDimitry Andric // Insert a new PHI, marking the value from the THEN region being 476fe6060f1SDimitry Andric // undef. 477fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 478fe6060f1SDimitry Andric const auto *RC = MRI->getRegClass(Reg); 479fe6060f1SDimitry Andric Register NewReg = MRI->createVirtualRegister(RC); 480fe6060f1SDimitry Andric Register UndefReg = MRI->createVirtualRegister(RC); 481fe6060f1SDimitry Andric MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(), 482fe6060f1SDimitry Andric TII->get(TargetOpcode::PHI), NewReg); 483fe6060f1SDimitry Andric for (auto *Pred : Flow->predecessors()) { 484fe6060f1SDimitry Andric if (Pred == If) 485fe6060f1SDimitry Andric PHI.addReg(Reg).addMBB(Pred); 486fe6060f1SDimitry Andric else 487fe6060f1SDimitry Andric PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 488fe6060f1SDimitry Andric } 489fe6060f1SDimitry Andric 490fe6060f1SDimitry Andric // Replace all uses in the ELSE region or the PHIs in ENDIF block 491fe6060f1SDimitry Andric // Use early increment range because setReg() will update the linked list. 492fe6060f1SDimitry Andric for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 493fe6060f1SDimitry Andric auto *UseMI = O.getParent(); 494fe6060f1SDimitry Andric auto *UseBlock = UseMI->getParent(); 495fe6060f1SDimitry Andric // Replace uses in Endif block 496fe6060f1SDimitry Andric if (UseBlock == Endif) { 497fe6060f1SDimitry Andric assert(UseMI->isPHI() && "Uses should be PHI in Endif block"); 498fe6060f1SDimitry Andric O.setReg(NewReg); 499fe6060f1SDimitry Andric continue; 500fe6060f1SDimitry Andric } 501fe6060f1SDimitry Andric 502fe6060f1SDimitry Andric // Replace uses in Else region 503fe6060f1SDimitry Andric if (ElseBlocks.contains(UseBlock)) 504fe6060f1SDimitry Andric O.setReg(NewReg); 505fe6060f1SDimitry Andric } 506fe6060f1SDimitry Andric 507fe6060f1SDimitry Andric // The optimized Reg is not alive through Flow blocks anymore. 508fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 509fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(Flow->getNumber()); 510fe6060f1SDimitry Andric 511fe6060f1SDimitry Andric updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); 512fe6060f1SDimitry Andric updateLiveRangeInThenRegion(Reg, If, Flow); 513fe6060f1SDimitry Andric } 514fe6060f1SDimitry Andric 515fe6060f1SDimitry Andric void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( 516fe6060f1SDimitry Andric Register Reg, MachineBasicBlock *Loop) const { 517fe6060f1SDimitry Andric // Insert a new PHI, marking the value from the last loop iteration undef. 518fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 519fe6060f1SDimitry Andric const auto *RC = MRI->getRegClass(Reg); 520fe6060f1SDimitry Andric Register NewReg = MRI->createVirtualRegister(RC); 521fe6060f1SDimitry Andric Register UndefReg = MRI->createVirtualRegister(RC); 522fe6060f1SDimitry Andric 523fe6060f1SDimitry Andric // Replace all uses in the LOOP region 524fe6060f1SDimitry Andric // Use early increment range because setReg() will update the linked list. 525fe6060f1SDimitry Andric for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 526fe6060f1SDimitry Andric auto *UseMI = O.getParent(); 527fe6060f1SDimitry Andric auto *UseBlock = UseMI->getParent(); 528fe6060f1SDimitry Andric // Replace uses in Loop block 529fe6060f1SDimitry Andric if (UseBlock == Loop) 530fe6060f1SDimitry Andric O.setReg(NewReg); 531fe6060f1SDimitry Andric } 532fe6060f1SDimitry Andric 533fe6060f1SDimitry Andric MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(), 534fe6060f1SDimitry Andric TII->get(TargetOpcode::PHI), NewReg); 535fe6060f1SDimitry Andric for (auto *Pred : Loop->predecessors()) { 536fe6060f1SDimitry Andric if (Pred == Loop) 537fe6060f1SDimitry Andric PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 538fe6060f1SDimitry Andric else 539fe6060f1SDimitry Andric PHI.addReg(Reg).addMBB(Pred); 540fe6060f1SDimitry Andric } 541fe6060f1SDimitry Andric 542fe6060f1SDimitry Andric LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 543fe6060f1SDimitry Andric LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 544fe6060f1SDimitry Andric 545fe6060f1SDimitry Andric // collectWaterfallCandidateRegisters only collects registers that are dead 546fe6060f1SDimitry Andric // after the loop. So we know that the old reg is not live throughout the 547fe6060f1SDimitry Andric // whole block anymore. 548fe6060f1SDimitry Andric OldVarInfo.AliveBlocks.reset(Loop->getNumber()); 549fe6060f1SDimitry Andric 550fe6060f1SDimitry Andric // Mark the last use as kill 551fe6060f1SDimitry Andric for (auto &MI : reverse(Loop->instrs())) { 552fe6060f1SDimitry Andric if (MI.readsRegister(NewReg, TRI)) { 553fe6060f1SDimitry Andric MI.addRegisterKilled(NewReg, TRI); 554fe6060f1SDimitry Andric NewVarInfo.Kills.push_back(&MI); 555fe6060f1SDimitry Andric break; 556fe6060f1SDimitry Andric } 557fe6060f1SDimitry Andric } 558fe6060f1SDimitry Andric assert(!NewVarInfo.Kills.empty() && 559fe6060f1SDimitry Andric "Failed to find last usage of register in loop"); 560fe6060f1SDimitry Andric } 561fe6060f1SDimitry Andric 562fe6060f1SDimitry Andric char SIOptimizeVGPRLiveRange::ID = 0; 563fe6060f1SDimitry Andric 564fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 565fe6060f1SDimitry Andric "SI Optimize VGPR LiveRange", false, false) 566fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 567fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 568fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveVariables) 569fe6060f1SDimitry Andric INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 570fe6060f1SDimitry Andric "SI Optimize VGPR LiveRange", false, false) 571fe6060f1SDimitry Andric 572fe6060f1SDimitry Andric char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID; 573fe6060f1SDimitry Andric 574fe6060f1SDimitry Andric FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { 575fe6060f1SDimitry Andric return new SIOptimizeVGPRLiveRange(); 576fe6060f1SDimitry Andric } 577fe6060f1SDimitry Andric 578fe6060f1SDimitry Andric bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { 579fe6060f1SDimitry Andric 580fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 581fe6060f1SDimitry Andric TII = ST.getInstrInfo(); 582fe6060f1SDimitry Andric TRI = &TII->getRegisterInfo(); 583fe6060f1SDimitry Andric MDT = &getAnalysis<MachineDominatorTree>(); 584fe6060f1SDimitry Andric Loops = &getAnalysis<MachineLoopInfo>(); 585fe6060f1SDimitry Andric LV = &getAnalysis<LiveVariables>(); 586fe6060f1SDimitry Andric MRI = &MF.getRegInfo(); 587fe6060f1SDimitry Andric 588fe6060f1SDimitry Andric if (skipFunction(MF.getFunction())) 589fe6060f1SDimitry Andric return false; 590fe6060f1SDimitry Andric 591fe6060f1SDimitry Andric bool MadeChange = false; 592fe6060f1SDimitry Andric 593fe6060f1SDimitry Andric // TODO: we need to think about the order of visiting the blocks to get 594fe6060f1SDimitry Andric // optimal result for nesting if-else cases. 595fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 596fe6060f1SDimitry Andric for (auto &MI : MBB.terminators()) { 597fe6060f1SDimitry Andric // Detect the if-else blocks 598fe6060f1SDimitry Andric if (MI.getOpcode() == AMDGPU::SI_IF) { 599fe6060f1SDimitry Andric MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB(); 600fe6060f1SDimitry Andric auto *Endif = getElseTarget(IfTarget); 601fe6060f1SDimitry Andric if (!Endif) 602fe6060f1SDimitry Andric continue; 603fe6060f1SDimitry Andric 604fe6060f1SDimitry Andric SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; 605fe6060f1SDimitry Andric SmallVector<Register> CandidateRegs; 606fe6060f1SDimitry Andric 607fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " 608fe6060f1SDimitry Andric << printMBBReference(MBB) << ' ' 609fe6060f1SDimitry Andric << printMBBReference(*IfTarget) << ' ' 610fe6060f1SDimitry Andric << printMBBReference(*Endif) << '\n'); 611fe6060f1SDimitry Andric 612fe6060f1SDimitry Andric // Collect all the blocks in the ELSE region 613fe6060f1SDimitry Andric collectElseRegionBlocks(IfTarget, Endif, ElseBlocks); 614fe6060f1SDimitry Andric 615fe6060f1SDimitry Andric // Collect the registers can be optimized 616fe6060f1SDimitry Andric collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks, 617fe6060f1SDimitry Andric CandidateRegs); 618fe6060f1SDimitry Andric MadeChange |= !CandidateRegs.empty(); 619fe6060f1SDimitry Andric // Now we are safe to optimize. 620fe6060f1SDimitry Andric for (auto Reg : CandidateRegs) 621fe6060f1SDimitry Andric optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); 622fe6060f1SDimitry Andric } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { 623fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " 624fe6060f1SDimitry Andric << printMBBReference(MBB) << '\n'); 625fe6060f1SDimitry Andric 626fe6060f1SDimitry Andric SmallSetVector<Register, 16> CandidateRegs; 627fe6060f1SDimitry Andric collectWaterfallCandidateRegisters(&MBB, CandidateRegs); 628fe6060f1SDimitry Andric MadeChange |= !CandidateRegs.empty(); 629fe6060f1SDimitry Andric // Now we are safe to optimize. 630fe6060f1SDimitry Andric for (auto Reg : CandidateRegs) 631fe6060f1SDimitry Andric optimizeWaterfallLiveRange(Reg, &MBB); 632fe6060f1SDimitry Andric } 633fe6060f1SDimitry Andric } 634fe6060f1SDimitry Andric } 635fe6060f1SDimitry Andric 636fe6060f1SDimitry Andric return MadeChange; 637fe6060f1SDimitry Andric } 638