1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "SILowerSGPRSpills.h" 19 #include "AMDGPU.h" 20 #include "GCNSubtarget.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "si-lower-sgpr-spills" 31 32 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 33 34 namespace { 35 36 static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation( 37 "amdgpu-num-vgprs-for-wwm-alloc", 38 cl::desc("Max num VGPRs for whole-wave register allocation."), 39 cl::ReallyHidden, cl::init(10)); 40 41 class SILowerSGPRSpills { 42 private: 43 const SIRegisterInfo *TRI = nullptr; 44 const SIInstrInfo *TII = nullptr; 45 LiveIntervals *LIS = nullptr; 46 SlotIndexes *Indexes = nullptr; 47 MachineDominatorTree *MDT = nullptr; 48 49 // Save and Restore blocks of the current function. Typically there is a 50 // single save block, unless Windows EH funclets are involved. 51 MBBVector SaveBlocks; 52 MBBVector RestoreBlocks; 53 54 public: 55 SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes, 56 MachineDominatorTree *MDT) 57 : LIS(LIS), Indexes(Indexes), MDT(MDT) {} 58 bool run(MachineFunction &MF); 59 void calculateSaveRestoreBlocks(MachineFunction &MF); 60 bool spillCalleeSavedRegs(MachineFunction &MF, 61 SmallVectorImpl<int> &CalleeSavedFIs); 62 void updateLaneVGPRDomInstr( 63 int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt, 64 DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr); 65 void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask); 66 }; 67 68 class SILowerSGPRSpillsLegacy : public MachineFunctionPass { 69 public: 70 static char ID; 71 72 SILowerSGPRSpillsLegacy() : MachineFunctionPass(ID) {} 73 74 bool runOnMachineFunction(MachineFunction &MF) override; 75 76 void getAnalysisUsage(AnalysisUsage &AU) const override { 77 AU.addRequired<MachineDominatorTreeWrapperPass>(); 78 AU.setPreservesAll(); 79 MachineFunctionPass::getAnalysisUsage(AU); 80 } 81 82 MachineFunctionProperties getClearedProperties() const override { 83 // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs. 84 return MachineFunctionProperties().setIsSSA().setNoVRegs(); 85 } 86 }; 87 88 } // end anonymous namespace 89 90 char SILowerSGPRSpillsLegacy::ID = 0; 91 92 INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE, 93 "SI lower SGPR spill instructions", false, false) 94 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 95 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 96 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) 97 INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE, 98 "SI lower SGPR spill instructions", false, false) 99 100 char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID; 101 102 static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB, 103 const TargetRegisterInfo *TRI) { 104 for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) { 105 if (MBB.isLiveIn(*R)) { 106 return true; 107 } 108 } 109 return false; 110 } 111 112 /// Insert spill code for the callee-saved registers used in the function. 113 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 114 ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes, 115 LiveIntervals *LIS) { 116 MachineFunction &MF = *SaveBlock.getParent(); 117 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 118 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 119 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 120 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 121 const SIRegisterInfo *RI = ST.getRegisterInfo(); 122 123 MachineBasicBlock::iterator I = SaveBlock.begin(); 124 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 125 for (const CalleeSavedInfo &CS : CSI) { 126 // Insert the spill to the stack frame. 127 MCRegister Reg = CS.getReg(); 128 129 MachineInstrSpan MIS(I, &SaveBlock); 130 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 131 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 132 133 // If this value was already livein, we probably have a direct use of the 134 // incoming register value, so don't kill at the spill point. This happens 135 // since we pass some special inputs (workgroup IDs) in the callee saved 136 // range. 137 const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, TRI); 138 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 139 RC, TRI, Register()); 140 141 if (Indexes) { 142 assert(std::distance(MIS.begin(), I) == 1); 143 MachineInstr &Inst = *std::prev(I); 144 Indexes->insertMachineInstrInMaps(Inst); 145 } 146 147 if (LIS) 148 LIS->removeAllRegUnitsForPhysReg(Reg); 149 } 150 } else { 151 // TFI doesn't update Indexes and LIS, so we have to do it separately. 152 if (Indexes) 153 Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I); 154 155 if (LIS) 156 for (const CalleeSavedInfo &CS : CSI) 157 LIS->removeAllRegUnitsForPhysReg(CS.getReg()); 158 } 159 } 160 161 /// Insert restore code for the callee-saved registers used in the function. 162 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 163 MutableArrayRef<CalleeSavedInfo> CSI, 164 SlotIndexes *Indexes, LiveIntervals *LIS) { 165 MachineFunction &MF = *RestoreBlock.getParent(); 166 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 167 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 168 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 169 // Restore all registers immediately before the return and any 170 // terminators that precede it. 171 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 172 const MachineBasicBlock::iterator BeforeRestoresI = 173 I == RestoreBlock.begin() ? I : std::prev(I); 174 175 // FIXME: Just emit the readlane/writelane directly 176 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 177 for (const CalleeSavedInfo &CI : reverse(CSI)) { 178 // Insert in reverse order. loadRegFromStackSlot can insert 179 // multiple instructions. 180 TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, &TII, TRI); 181 182 if (Indexes) { 183 MachineInstr &Inst = *std::prev(I); 184 Indexes->insertMachineInstrInMaps(Inst); 185 } 186 187 if (LIS) 188 LIS->removeAllRegUnitsForPhysReg(CI.getReg()); 189 } 190 } else { 191 // TFI doesn't update Indexes and LIS, so we have to do it separately. 192 if (Indexes) 193 Indexes->repairIndexesInRange(&RestoreBlock, BeforeRestoresI, 194 RestoreBlock.getFirstTerminator()); 195 196 if (LIS) 197 for (const CalleeSavedInfo &CS : CSI) 198 LIS->removeAllRegUnitsForPhysReg(CS.getReg()); 199 } 200 } 201 202 /// Compute the sets of entry and return blocks for saving and restoring 203 /// callee-saved registers, and placing prolog and epilog code. 204 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 205 const MachineFrameInfo &MFI = MF.getFrameInfo(); 206 207 // Even when we do not change any CSR, we still want to insert the 208 // prologue and epilogue of the function. 209 // So set the save points for those. 210 211 // Use the points found by shrink-wrapping, if any. 212 if (MFI.getSavePoint()) { 213 SaveBlocks.push_back(MFI.getSavePoint()); 214 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 215 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 216 // If RestoreBlock does not have any successor and is not a return block 217 // then the end point is unreachable and we do not need to insert any 218 // epilogue. 219 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 220 RestoreBlocks.push_back(RestoreBlock); 221 return; 222 } 223 224 // Save refs to entry and return blocks. 225 SaveBlocks.push_back(&MF.front()); 226 for (MachineBasicBlock &MBB : MF) { 227 if (MBB.isEHFuncletEntry()) 228 SaveBlocks.push_back(&MBB); 229 if (MBB.isReturnBlock()) 230 RestoreBlocks.push_back(&MBB); 231 } 232 } 233 234 // TODO: To support shrink wrapping, this would need to copy 235 // PrologEpilogInserter's updateLiveness. 236 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 237 MachineBasicBlock &EntryBB = MF.front(); 238 239 for (const CalleeSavedInfo &CSIReg : CSI) 240 EntryBB.addLiveIn(CSIReg.getReg()); 241 EntryBB.sortUniqueLiveIns(); 242 } 243 244 bool SILowerSGPRSpills::spillCalleeSavedRegs( 245 MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) { 246 MachineRegisterInfo &MRI = MF.getRegInfo(); 247 const Function &F = MF.getFunction(); 248 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 249 const SIFrameLowering *TFI = ST.getFrameLowering(); 250 MachineFrameInfo &MFI = MF.getFrameInfo(); 251 RegScavenger *RS = nullptr; 252 253 // Determine which of the registers in the callee save list should be saved. 254 BitVector SavedRegs; 255 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 256 257 // Add the code to save and restore the callee saved registers. 258 if (!F.hasFnAttribute(Attribute::Naked)) { 259 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 260 // necessary for verifier liveness checks. 261 MFI.setCalleeSavedInfoValid(true); 262 263 std::vector<CalleeSavedInfo> CSI; 264 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 265 266 for (unsigned I = 0; CSRegs[I]; ++I) { 267 MCRegister Reg = CSRegs[I]; 268 269 if (SavedRegs.test(Reg)) { 270 const TargetRegisterClass *RC = 271 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 272 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 273 TRI->getSpillAlign(*RC), true); 274 275 CSI.emplace_back(Reg, JunkFI); 276 CalleeSavedFIs.push_back(JunkFI); 277 } 278 } 279 280 if (!CSI.empty()) { 281 for (MachineBasicBlock *SaveBlock : SaveBlocks) 282 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); 283 284 // Add live ins to save blocks. 285 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 286 updateLiveness(MF, CSI); 287 288 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 289 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS); 290 return true; 291 } 292 } 293 294 return false; 295 } 296 297 void SILowerSGPRSpills::updateLaneVGPRDomInstr( 298 int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt, 299 DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) { 300 // For the Def of a virtual LaneVPGR to dominate all its uses, we should 301 // insert an IMPLICIT_DEF before the dominating spill. Switching to a 302 // depth first order doesn't really help since the machine function can be in 303 // the unstructured control flow post-SSA. For each virtual register, hence 304 // finding the common dominator to get either the dominating spill or a block 305 // dominating all spills. 306 SIMachineFunctionInfo *FuncInfo = 307 MBB->getParent()->getInfo<SIMachineFunctionInfo>(); 308 ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills = 309 FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI); 310 Register PrevLaneVGPR; 311 for (auto &Spill : VGPRSpills) { 312 if (PrevLaneVGPR == Spill.VGPR) 313 continue; 314 315 PrevLaneVGPR = Spill.VGPR; 316 auto I = LaneVGPRDomInstr.find(Spill.VGPR); 317 if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) { 318 // Initially add the spill instruction itself for Insertion point. 319 LaneVGPRDomInstr[Spill.VGPR] = InsertPt; 320 } else { 321 assert(I != LaneVGPRDomInstr.end()); 322 auto PrevInsertPt = I->second; 323 MachineBasicBlock *DomMBB = PrevInsertPt->getParent(); 324 if (DomMBB == MBB) { 325 // The insertion point earlier selected in a predecessor block whose 326 // spills are currently being lowered. The earlier InsertPt would be 327 // the one just before the block terminator and it should be changed 328 // if we insert any new spill in it. 329 if (MDT->dominates(&*InsertPt, &*PrevInsertPt)) 330 I->second = InsertPt; 331 332 continue; 333 } 334 335 // Find the common dominator block between PrevInsertPt and the 336 // current spill. 337 DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB); 338 if (DomMBB == MBB) 339 I->second = InsertPt; 340 else if (DomMBB != PrevInsertPt->getParent()) 341 I->second = &(*DomMBB->getFirstTerminator()); 342 } 343 } 344 } 345 346 void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF, 347 BitVector &RegMask) { 348 // Determine an optimal number of VGPRs for WWM allocation. The complement 349 // list will be available for allocating other VGPR virtual registers. 350 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 351 MachineRegisterInfo &MRI = MF.getRegInfo(); 352 BitVector ReservedRegs = TRI->getReservedRegs(MF); 353 BitVector NonWwmAllocMask(TRI->getNumRegs()); 354 355 // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future 356 // to have a balanced allocation between WWM values and per-thread vector 357 // register operands. 358 unsigned NumRegs = MaxNumVGPRsForWwmAllocation; 359 NumRegs = 360 std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs); 361 362 auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF); 363 // Try to use the highest available registers for now. Later after 364 // vgpr-regalloc, they can be shifted to the lowest range. 365 unsigned I = 0; 366 for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1; 367 (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) { 368 if (!ReservedRegs.test(Reg) && 369 !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) { 370 TRI->markSuperRegs(RegMask, Reg); 371 ++I; 372 } 373 } 374 375 if (I != NumRegs) { 376 // Reserve an arbitrary register and report the error. 377 TRI->markSuperRegs(RegMask, AMDGPU::VGPR0); 378 MF.getFunction().getContext().emitError( 379 "can't find enough VGPRs for wwm-regalloc"); 380 } 381 } 382 383 bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) { 384 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 385 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 386 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); 387 SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; 388 MachineDominatorTree *MDT = 389 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 390 return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF); 391 } 392 393 bool SILowerSGPRSpills::run(MachineFunction &MF) { 394 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 395 TII = ST.getInstrInfo(); 396 TRI = &TII->getRegisterInfo(); 397 398 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 399 400 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 401 // does, but somewhat simpler. 402 calculateSaveRestoreBlocks(MF); 403 SmallVector<int> CalleeSavedFIs; 404 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs); 405 406 MachineFrameInfo &MFI = MF.getFrameInfo(); 407 MachineRegisterInfo &MRI = MF.getRegInfo(); 408 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 409 410 if (!MFI.hasStackObjects() && !HasCSRs) { 411 SaveBlocks.clear(); 412 RestoreBlocks.clear(); 413 return false; 414 } 415 416 bool MadeChange = false; 417 bool SpilledToVirtVGPRLanes = false; 418 419 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 420 // handled as SpilledToReg in regular PrologEpilogInserter. 421 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 422 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 423 if (HasSGPRSpillToVGPR) { 424 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 425 // are spilled to VGPRs, in which case we can eliminate the stack usage. 426 // 427 // This operates under the assumption that only other SGPR spills are users 428 // of the frame index. 429 430 // To track the spill frame indices handled in this pass. 431 BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 432 433 // To track the IMPLICIT_DEF insertion point for the lane vgprs. 434 DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr; 435 436 for (MachineBasicBlock &MBB : MF) { 437 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 438 if (!TII->isSGPRSpill(MI)) 439 continue; 440 441 if (MI.getOperand(0).isUndef()) { 442 if (Indexes) 443 Indexes->removeMachineInstrFromMaps(MI); 444 MI.eraseFromParent(); 445 continue; 446 } 447 448 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 449 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 450 451 bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI); 452 if (IsCalleeSaveSGPRSpill) { 453 // Spill callee-saved SGPRs into physical VGPR lanes. 454 455 // TODO: This is to ensure the CFIs are static for efficient frame 456 // unwinding in the debugger. Spilling them into virtual VGPR lanes 457 // involve regalloc to allocate the physical VGPRs and that might 458 // cause intermediate spill/split of such liveranges for successful 459 // allocation. This would result in broken CFI encoding unless the 460 // regalloc aware CFI generation to insert new CFIs along with the 461 // intermediate spills is implemented. There is no such support 462 // currently exist in the LLVM compiler. 463 if (FuncInfo->allocateSGPRSpillToVGPRLane( 464 MF, FI, /*SpillToPhysVGPRLane=*/true)) { 465 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 466 MI, FI, nullptr, Indexes, LIS, true); 467 if (!Spilled) 468 llvm_unreachable( 469 "failed to spill SGPR to physical VGPR lane when allocated"); 470 } 471 } else { 472 MachineInstrSpan MIS(&MI, &MBB); 473 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { 474 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 475 MI, FI, nullptr, Indexes, LIS); 476 if (!Spilled) 477 llvm_unreachable( 478 "failed to spill SGPR to virtual VGPR lane when allocated"); 479 SpillFIs.set(FI); 480 updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr); 481 SpilledToVirtVGPRLanes = true; 482 } 483 } 484 } 485 } 486 487 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) { 488 auto InsertPt = LaneVGPRDomInstr[Reg]; 489 // Insert the IMPLICIT_DEF at the identified points. 490 MachineBasicBlock &Block = *InsertPt->getParent(); 491 DebugLoc DL = Block.findDebugLoc(InsertPt); 492 auto MIB = 493 BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg); 494 495 // Add WWM flag to the virtual register. 496 FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); 497 498 // Set SGPR_SPILL asm printer flag 499 MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); 500 if (LIS) { 501 LIS->InsertMachineInstrInMaps(*MIB); 502 LIS->createAndComputeVirtRegInterval(Reg); 503 } 504 } 505 506 // Determine the registers for WWM allocation and also compute the register 507 // mask for non-wwm VGPR allocation. 508 if (FuncInfo->getSGPRSpillVGPRs().size()) { 509 BitVector WwmRegMask(TRI->getNumRegs()); 510 511 determineRegsForWWMAllocation(MF, WwmRegMask); 512 513 BitVector NonWwmRegMask(WwmRegMask); 514 NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask()); 515 516 // The complement set will be the registers for non-wwm (per-thread) vgpr 517 // allocation. 518 FuncInfo->updateNonWWMRegMask(NonWwmRegMask); 519 } 520 521 for (MachineBasicBlock &MBB : MF) { 522 // FIXME: The dead frame indices are replaced with a null register from 523 // the debug value instructions. We should instead, update it with the 524 // correct register value. But not sure the register value alone is 525 // adequate to lower the DIExpression. It should be worked out later. 526 for (MachineInstr &MI : MBB) { 527 if (MI.isDebugValue()) { 528 uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0; 529 if (MI.getOperand(StackOperandIdx).isFI() && 530 !MFI.isFixedObjectIndex( 531 MI.getOperand(StackOperandIdx).getIndex()) && 532 SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) { 533 MI.getOperand(StackOperandIdx) 534 .ChangeToRegister(Register(), false /*isDef*/); 535 } 536 } 537 } 538 } 539 540 // All those frame indices which are dead by now should be removed from the 541 // function frame. Otherwise, there is a side effect such as re-mapping of 542 // free frame index ids by the later pass(es) like "stack slot coloring" 543 // which in turn could mess-up with the book keeping of "frame index to VGPR 544 // lane". 545 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); 546 547 MadeChange = true; 548 } 549 550 if (SpilledToVirtVGPRLanes) { 551 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass(); 552 // Shift back the reserved SGPR for EXEC copy into the lowest range. 553 // This SGPR is reserved to handle the whole-wave spill/copy operations 554 // that might get inserted during vgpr regalloc. 555 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF); 556 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < 557 TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) 558 FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); 559 } else { 560 // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM 561 // spills/copies. Reset the SGPR reserved for EXEC copy. 562 FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); 563 } 564 565 SaveBlocks.clear(); 566 RestoreBlocks.clear(); 567 568 return MadeChange; 569 } 570 571 PreservedAnalyses 572 SILowerSGPRSpillsPass::run(MachineFunction &MF, 573 MachineFunctionAnalysisManager &MFAM) { 574 MFPropsModifier _(*this, MF); 575 auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF); 576 auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF); 577 MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF); 578 SILowerSGPRSpills(LIS, Indexes, MDT).run(MF); 579 return PreservedAnalyses::all(); 580 } 581