1 //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else 11 /// structures and waterfall loops. 12 /// 13 /// When we do structurization, we usually transform an if-else into two 14 /// successive if-then (with a flow block to do predicate inversion). Consider a 15 /// simple case after structurization: A divergent value %a was defined before 16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part: 17 /// bb.if: 18 /// %a = ... 19 /// ... 20 /// bb.then: 21 /// ... = op %a 22 /// ... // %a can be dead here 23 /// bb.flow: 24 /// ... 25 /// bb.else: 26 /// ... = %a 27 /// ... 28 /// bb.endif 29 /// 30 /// As register allocator has no idea of the thread-control-flow, it will just 31 /// assume %a would be alive in the whole range of bb.then because of a later 32 /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect 33 /// to exec mask. For this if-else case, the lanes active in bb.then will be 34 /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead 35 /// after the last use in bb.then until the end of the block. The reason is 36 /// the instructions in bb.then will only overwrite lanes that will never be 37 /// accessed in bb.else. 38 /// 39 /// This pass aims to tell register allocator that %a is in-fact dead, 40 /// through inserting a phi-node in bb.flow saying that %a is undef when coming 41 /// from bb.then, and then replace the uses in the bb.else with the result of 42 /// newly inserted phi. 43 /// 44 /// Two key conditions must be met to ensure correctness: 45 /// 1.) The def-point should be in the same loop-level as if-else-endif to make 46 /// sure the second loop iteration still get correct data. 47 /// 2.) There should be no further uses after the IF-ELSE region. 48 /// 49 /// 50 /// Waterfall loops get inserted around instructions that use divergent values 51 /// but can only be executed with a uniform value. For example an indirect call 52 /// to a divergent address: 53 /// bb.start: 54 /// %a = ... 55 /// %fun = ... 56 /// ... 57 /// bb.loop: 58 /// call %fun (%a) 59 /// ... // %a can be dead here 60 /// loop %bb.loop 61 /// 62 /// The loop block is executed multiple times, but it is run exactly once for 63 /// each active lane. Similar to the if-else case, the register allocator 64 /// assumes that %a is live throughout the loop as it is used again in the next 65 /// iteration. If %a is a VGPR that is unused after the loop, it does not need 66 /// to be live after its last use in the loop block. By inserting a phi-node at 67 /// the start of bb.loop that is undef when coming from bb.loop, the register 68 /// allocation knows that the value of %a does not need to be preserved through 69 /// iterations of the loop. 70 /// 71 // 72 //===----------------------------------------------------------------------===// 73 74 #include "AMDGPU.h" 75 #include "GCNSubtarget.h" 76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 77 #include "SIMachineFunctionInfo.h" 78 #include "llvm/CodeGen/LiveVariables.h" 79 #include "llvm/CodeGen/MachineDominators.h" 80 #include "llvm/CodeGen/MachineLoopInfo.h" 81 #include "llvm/CodeGen/TargetRegisterInfo.h" 82 #include "llvm/InitializePasses.h" 83 84 using namespace llvm; 85 86 #define DEBUG_TYPE "si-opt-vgpr-liverange" 87 88 namespace { 89 90 class SIOptimizeVGPRLiveRange : public MachineFunctionPass { 91 private: 92 const SIRegisterInfo *TRI = nullptr; 93 const SIInstrInfo *TII = nullptr; 94 LiveVariables *LV = nullptr; 95 MachineDominatorTree *MDT = nullptr; 96 const MachineLoopInfo *Loops = nullptr; 97 MachineRegisterInfo *MRI = nullptr; 98 99 public: 100 static char ID; 101 102 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; 103 104 void collectElseRegionBlocks(MachineBasicBlock *Flow, 105 MachineBasicBlock *Endif, 106 SmallSetVector<MachineBasicBlock *, 16> &) const; 107 108 void 109 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, 110 MachineBasicBlock *Endif, 111 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 112 SmallVectorImpl<Register> &CandidateRegs) const; 113 114 void collectWaterfallCandidateRegisters( 115 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, 116 SmallSetVector<Register, 16> &CandidateRegs, 117 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 118 SmallVectorImpl<MachineInstr *> &Instructions) const; 119 120 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, 121 SmallVectorImpl<MachineInstr *> &Uses) const; 122 123 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, 124 MachineBasicBlock *Flow) const; 125 126 void updateLiveRangeInElseRegion( 127 Register Reg, Register NewReg, MachineBasicBlock *Flow, 128 MachineBasicBlock *Endif, 129 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 130 131 void 132 optimizeLiveRange(Register Reg, MachineBasicBlock *If, 133 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 134 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 135 136 void optimizeWaterfallLiveRange( 137 Register Reg, MachineBasicBlock *LoopHeader, 138 SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks, 139 SmallVectorImpl<MachineInstr *> &Instructions) const; 140 141 SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} 142 143 bool runOnMachineFunction(MachineFunction &MF) override; 144 145 StringRef getPassName() const override { 146 return "SI Optimize VGPR LiveRange"; 147 } 148 149 void getAnalysisUsage(AnalysisUsage &AU) const override { 150 AU.addRequired<LiveVariablesWrapperPass>(); 151 AU.addRequired<MachineDominatorTreeWrapperPass>(); 152 AU.addRequired<MachineLoopInfoWrapperPass>(); 153 AU.addPreserved<LiveVariablesWrapperPass>(); 154 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 155 AU.addPreserved<MachineLoopInfoWrapperPass>(); 156 MachineFunctionPass::getAnalysisUsage(AU); 157 } 158 159 MachineFunctionProperties getRequiredProperties() const override { 160 return MachineFunctionProperties().set( 161 MachineFunctionProperties::Property::IsSSA); 162 } 163 164 MachineFunctionProperties getClearedProperties() const override { 165 return MachineFunctionProperties().set( 166 MachineFunctionProperties::Property::NoPHIs); 167 } 168 }; 169 170 } // end anonymous namespace 171 172 // Check whether the MBB is a else flow block and get the branching target which 173 // is the Endif block 174 MachineBasicBlock * 175 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { 176 for (auto &BR : MBB->terminators()) { 177 if (BR.getOpcode() == AMDGPU::SI_ELSE) 178 return BR.getOperand(2).getMBB(); 179 } 180 return nullptr; 181 } 182 183 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( 184 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 185 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { 186 assert(Flow != Endif); 187 188 MachineBasicBlock *MBB = Endif; 189 unsigned Cur = 0; 190 while (MBB) { 191 for (auto *Pred : MBB->predecessors()) { 192 if (Pred != Flow && !Blocks.contains(Pred)) 193 Blocks.insert(Pred); 194 } 195 196 if (Cur < Blocks.size()) 197 MBB = Blocks[Cur++]; 198 else 199 MBB = nullptr; 200 } 201 202 LLVM_DEBUG({ 203 dbgs() << "Found Else blocks: "; 204 for (auto *MBB : Blocks) 205 dbgs() << printMBBReference(*MBB) << ' '; 206 dbgs() << '\n'; 207 }); 208 } 209 210 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. 211 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( 212 Register Reg, MachineBasicBlock *MBB, 213 SmallVectorImpl<MachineInstr *> &Uses) const { 214 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { 215 if (UseMI.getParent() == MBB && !UseMI.isPHI()) 216 Uses.push_back(&UseMI); 217 } 218 } 219 220 /// Collect the killed registers in the ELSE region which are not alive through 221 /// the whole THEN region. 222 void SIOptimizeVGPRLiveRange::collectCandidateRegisters( 223 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, 224 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 225 SmallVectorImpl<Register> &CandidateRegs) const { 226 227 SmallSet<Register, 8> KillsInElse; 228 229 for (auto *Else : ElseBlocks) { 230 for (auto &MI : Else->instrs()) { 231 if (MI.isDebugInstr()) 232 continue; 233 234 for (auto &MO : MI.operands()) { 235 if (!MO.isReg() || !MO.getReg() || MO.isDef()) 236 continue; 237 238 Register MOReg = MO.getReg(); 239 // We can only optimize AGPR/VGPR virtual register 240 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 241 continue; 242 243 if (MO.readsReg()) { 244 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 245 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 246 // Make sure two conditions are met: 247 // a.) the value is defined before/in the IF block 248 // b.) should be defined in the same loop-level. 249 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 250 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) { 251 // Check if the register is live into the endif block. If not, 252 // consider it killed in the else region. 253 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 254 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) { 255 KillsInElse.insert(MOReg); 256 } else { 257 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) 258 << " as Live in Endif\n"); 259 } 260 } 261 } 262 } 263 } 264 } 265 266 // Check the phis in the Endif, looking for value coming from the ELSE 267 // region. Make sure the phi-use is the last use. 268 for (auto &MI : Endif->phis()) { 269 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 270 auto &MO = MI.getOperand(Idx); 271 auto *Pred = MI.getOperand(Idx + 1).getMBB(); 272 if (Pred == Flow) 273 continue; 274 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n"); 275 276 if (!MO.isReg() || !MO.getReg() || MO.isUndef()) 277 continue; 278 279 Register Reg = MO.getReg(); 280 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg)) 281 continue; 282 283 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 284 285 if (VI.isLiveIn(*Endif, Reg, *MRI)) { 286 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) 287 << " as Live in Endif\n"); 288 continue; 289 } 290 // Make sure two conditions are met: 291 // a.) the value is defined before/in the IF block 292 // b.) should be defined in the same loop-level. 293 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); 294 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 295 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) 296 KillsInElse.insert(Reg); 297 } 298 } 299 300 auto IsLiveThroughThen = [&](Register Reg) { 301 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 302 ++I) { 303 if (!I->readsReg()) 304 continue; 305 auto *UseMI = I->getParent(); 306 auto *UseMBB = UseMI->getParent(); 307 if (UseMBB == Flow || UseMBB == Endif) { 308 if (!UseMI->isPHI()) 309 return true; 310 311 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB(); 312 // The register is live through the path If->Flow or Flow->Endif. 313 // we should not optimize for such cases. 314 if ((UseMBB == Flow && IncomingMBB != If) || 315 (UseMBB == Endif && IncomingMBB == Flow)) 316 return true; 317 } 318 } 319 return false; 320 }; 321 322 for (auto Reg : KillsInElse) { 323 if (!IsLiveThroughThen(Reg)) 324 CandidateRegs.push_back(Reg); 325 } 326 } 327 328 /// Collect the registers used in the waterfall loop block that are defined 329 /// before. 330 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( 331 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, 332 SmallSetVector<Register, 16> &CandidateRegs, 333 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 334 SmallVectorImpl<MachineInstr *> &Instructions) const { 335 336 // Collect loop instructions, potentially spanning multiple blocks 337 auto *MBB = LoopHeader; 338 for (;;) { 339 Blocks.insert(MBB); 340 for (auto &MI : *MBB) { 341 if (MI.isDebugInstr()) 342 continue; 343 Instructions.push_back(&MI); 344 } 345 if (MBB == LoopEnd) 346 break; 347 348 if ((MBB != LoopHeader && MBB->pred_size() != 1) || 349 (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) { 350 LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n"); 351 return; 352 } 353 354 MBB = *MBB->succ_begin(); 355 } 356 357 for (auto *I : Instructions) { 358 auto &MI = *I; 359 360 for (auto &MO : MI.all_uses()) { 361 if (!MO.getReg()) 362 continue; 363 364 Register MOReg = MO.getReg(); 365 // We can only optimize AGPR/VGPR virtual register 366 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 367 continue; 368 369 if (MO.readsReg()) { 370 MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 371 // Make sure the value is defined before the LOOP block 372 if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) { 373 // If the variable is used after the loop, the register coalescer will 374 // merge the newly created register and remove the phi node again. 375 // Just do nothing in that case. 376 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); 377 bool IsUsed = false; 378 for (auto *Succ : LoopEnd->successors()) { 379 if (!Blocks.contains(Succ) && 380 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { 381 IsUsed = true; 382 break; 383 } 384 } 385 if (!IsUsed) { 386 LLVM_DEBUG(dbgs() << "Found candidate reg: " 387 << printReg(MOReg, TRI, 0, MRI) << '\n'); 388 CandidateRegs.insert(MOReg); 389 } else { 390 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " 391 << printReg(MOReg, TRI, 0, MRI) << '\n'); 392 } 393 } 394 } 395 } 396 } 397 } 398 399 // Re-calculate the liveness of \p Reg in the THEN-region 400 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( 401 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { 402 SetVector<MachineBasicBlock *> Blocks; 403 SmallVector<MachineBasicBlock *> WorkList({If}); 404 405 // Collect all successors until we see the flow block, where we should 406 // reconverge. 407 while (!WorkList.empty()) { 408 auto *MBB = WorkList.pop_back_val(); 409 for (auto *Succ : MBB->successors()) { 410 if (Succ != Flow && !Blocks.contains(Succ)) { 411 WorkList.push_back(Succ); 412 Blocks.insert(Succ); 413 } 414 } 415 } 416 417 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 418 for (MachineBasicBlock *MBB : Blocks) { 419 // Clear Live bit, as we will recalculate afterwards 420 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) 421 << '\n'); 422 OldVarInfo.AliveBlocks.reset(MBB->getNumber()); 423 } 424 425 SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming; 426 427 // Get the blocks the Reg should be alive through 428 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 429 ++I) { 430 auto *UseMI = I->getParent(); 431 if (UseMI->isPHI() && I->readsReg()) { 432 if (Blocks.contains(UseMI->getParent())) 433 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB()); 434 } 435 } 436 437 for (MachineBasicBlock *MBB : Blocks) { 438 SmallVector<MachineInstr *> Uses; 439 // PHI instructions has been processed before. 440 findNonPHIUsesInBlock(Reg, MBB, Uses); 441 442 if (Uses.size() == 1) { 443 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " 444 << printMBBReference(*MBB) << '\n'); 445 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin())); 446 } else if (Uses.size() > 1) { 447 // Process the instructions in-order 448 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " 449 << printMBBReference(*MBB) << '\n'); 450 for (MachineInstr &MI : *MBB) { 451 if (llvm::is_contained(Uses, &MI)) 452 LV->HandleVirtRegUse(Reg, MBB, MI); 453 } 454 } 455 456 // Mark Reg alive through the block if this is a PHI incoming block 457 if (PHIIncoming.contains(MBB)) 458 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(), 459 MBB); 460 } 461 462 // Set the isKilled flag if we get new Kills in the THEN region. 463 for (auto *MI : OldVarInfo.Kills) { 464 if (Blocks.contains(MI->getParent())) 465 MI->addRegisterKilled(Reg, TRI); 466 } 467 } 468 469 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( 470 Register Reg, Register NewReg, MachineBasicBlock *Flow, 471 MachineBasicBlock *Endif, 472 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 473 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 474 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 475 476 // Transfer aliveBlocks from Reg to NewReg 477 for (auto *MBB : ElseBlocks) { 478 unsigned BBNum = MBB->getNumber(); 479 if (OldVarInfo.AliveBlocks.test(BBNum)) { 480 NewVarInfo.AliveBlocks.set(BBNum); 481 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) 482 << '\n'); 483 OldVarInfo.AliveBlocks.reset(BBNum); 484 } 485 } 486 487 // Transfer the possible Kills in ElseBlocks from Reg to NewReg 488 auto I = OldVarInfo.Kills.begin(); 489 while (I != OldVarInfo.Kills.end()) { 490 if (ElseBlocks.contains((*I)->getParent())) { 491 NewVarInfo.Kills.push_back(*I); 492 I = OldVarInfo.Kills.erase(I); 493 } else { 494 ++I; 495 } 496 } 497 } 498 499 void SIOptimizeVGPRLiveRange::optimizeLiveRange( 500 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, 501 MachineBasicBlock *Endif, 502 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 503 // Insert a new PHI, marking the value from the THEN region being 504 // undef. 505 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 506 const auto *RC = MRI->getRegClass(Reg); 507 Register NewReg = MRI->createVirtualRegister(RC); 508 Register UndefReg = MRI->createVirtualRegister(RC); 509 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(), 510 TII->get(TargetOpcode::PHI), NewReg); 511 for (auto *Pred : Flow->predecessors()) { 512 if (Pred == If) 513 PHI.addReg(Reg).addMBB(Pred); 514 else 515 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 516 } 517 518 // Replace all uses in the ELSE region or the PHIs in ENDIF block 519 // Use early increment range because setReg() will update the linked list. 520 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 521 auto *UseMI = O.getParent(); 522 auto *UseBlock = UseMI->getParent(); 523 // Replace uses in Endif block 524 if (UseBlock == Endif) { 525 if (UseMI->isPHI()) 526 O.setReg(NewReg); 527 else if (UseMI->isDebugInstr()) 528 continue; 529 else { 530 // DetectDeadLanes may mark register uses as undef without removing 531 // them, in which case a non-phi instruction using the original register 532 // may exist in the Endif block even though the register is not live 533 // into it. 534 assert(!O.readsReg()); 535 } 536 continue; 537 } 538 539 // Replace uses in Else region 540 if (ElseBlocks.contains(UseBlock)) 541 O.setReg(NewReg); 542 } 543 544 // The optimized Reg is not alive through Flow blocks anymore. 545 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 546 OldVarInfo.AliveBlocks.reset(Flow->getNumber()); 547 548 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); 549 updateLiveRangeInThenRegion(Reg, If, Flow); 550 } 551 552 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( 553 Register Reg, MachineBasicBlock *LoopHeader, 554 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 555 SmallVectorImpl<MachineInstr *> &Instructions) const { 556 // Insert a new PHI, marking the value from the last loop iteration undef. 557 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 558 const auto *RC = MRI->getRegClass(Reg); 559 Register NewReg = MRI->createVirtualRegister(RC); 560 Register UndefReg = MRI->createVirtualRegister(RC); 561 562 // Replace all uses in the LOOP region 563 // Use early increment range because setReg() will update the linked list. 564 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 565 auto *UseMI = O.getParent(); 566 auto *UseBlock = UseMI->getParent(); 567 // Replace uses in Loop blocks 568 if (Blocks.contains(UseBlock)) 569 O.setReg(NewReg); 570 } 571 572 MachineInstrBuilder PHI = 573 BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(), 574 TII->get(TargetOpcode::PHI), NewReg); 575 for (auto *Pred : LoopHeader->predecessors()) { 576 if (Blocks.contains(Pred)) 577 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 578 else 579 PHI.addReg(Reg).addMBB(Pred); 580 } 581 582 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 583 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 584 585 // Find last use and mark as kill 586 MachineInstr *Kill = nullptr; 587 for (auto *MI : reverse(Instructions)) { 588 if (MI->readsRegister(NewReg, TRI)) { 589 MI->addRegisterKilled(NewReg, TRI); 590 NewVarInfo.Kills.push_back(MI); 591 Kill = MI; 592 break; 593 } 594 } 595 assert(Kill && "Failed to find last usage of register in loop"); 596 597 MachineBasicBlock *KillBlock = Kill->getParent(); 598 bool PostKillBlock = false; 599 for (auto *Block : Blocks) { 600 auto BBNum = Block->getNumber(); 601 602 // collectWaterfallCandidateRegisters only collects registers that are dead 603 // after the loop. So we know that the old reg is no longer live throughout 604 // the waterfall loop. 605 OldVarInfo.AliveBlocks.reset(BBNum); 606 607 // The new register is live up to (and including) the block that kills it. 608 PostKillBlock |= (Block == KillBlock); 609 if (PostKillBlock) { 610 NewVarInfo.AliveBlocks.reset(BBNum); 611 } else if (Block != LoopHeader) { 612 NewVarInfo.AliveBlocks.set(BBNum); 613 } 614 } 615 } 616 617 char SIOptimizeVGPRLiveRange::ID = 0; 618 619 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 620 "SI Optimize VGPR LiveRange", false, false) 621 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 622 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 623 INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) 624 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 625 "SI Optimize VGPR LiveRange", false, false) 626 627 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID; 628 629 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { 630 return new SIOptimizeVGPRLiveRange(); 631 } 632 633 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { 634 635 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 636 TII = ST.getInstrInfo(); 637 TRI = &TII->getRegisterInfo(); 638 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 639 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 640 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV(); 641 MRI = &MF.getRegInfo(); 642 643 if (skipFunction(MF.getFunction())) 644 return false; 645 646 bool MadeChange = false; 647 648 // TODO: we need to think about the order of visiting the blocks to get 649 // optimal result for nesting if-else cases. 650 for (MachineBasicBlock &MBB : MF) { 651 for (auto &MI : MBB.terminators()) { 652 // Detect the if-else blocks 653 if (MI.getOpcode() == AMDGPU::SI_IF) { 654 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB(); 655 auto *Endif = getElseTarget(IfTarget); 656 if (!Endif) 657 continue; 658 659 // Skip unexpected control flow. 660 if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif)) 661 continue; 662 663 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; 664 SmallVector<Register> CandidateRegs; 665 666 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " 667 << printMBBReference(MBB) << ' ' 668 << printMBBReference(*IfTarget) << ' ' 669 << printMBBReference(*Endif) << '\n'); 670 671 // Collect all the blocks in the ELSE region 672 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks); 673 674 // Collect the registers can be optimized 675 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks, 676 CandidateRegs); 677 MadeChange |= !CandidateRegs.empty(); 678 // Now we are safe to optimize. 679 for (auto Reg : CandidateRegs) 680 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); 681 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { 682 auto *LoopHeader = MI.getOperand(0).getMBB(); 683 auto *LoopEnd = &MBB; 684 685 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " 686 << printMBBReference(*LoopHeader) << '\n'); 687 688 SmallSetVector<Register, 16> CandidateRegs; 689 SmallVector<MachineInstr *, 16> Instructions; 690 SmallSetVector<MachineBasicBlock *, 2> Blocks; 691 692 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, 693 Blocks, Instructions); 694 MadeChange |= !CandidateRegs.empty(); 695 // Now we are safe to optimize. 696 for (auto Reg : CandidateRegs) 697 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); 698 } 699 } 700 } 701 702 return MadeChange; 703 } 704