xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "SILowerSGPRSpills.h"
19 #include "AMDGPU.h"
20 #include "GCNSubtarget.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "SIMachineFunctionInfo.h"
23 #include "llvm/CodeGen/LiveIntervals.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "si-lower-sgpr-spills"
31 
32 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
33 
34 namespace {
35 
36 static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
37     "amdgpu-num-vgprs-for-wwm-alloc",
38     cl::desc("Max num VGPRs for whole-wave register allocation."),
39     cl::ReallyHidden, cl::init(10));
40 
41 class SILowerSGPRSpills {
42 private:
43   const SIRegisterInfo *TRI = nullptr;
44   const SIInstrInfo *TII = nullptr;
45   LiveIntervals *LIS = nullptr;
46   SlotIndexes *Indexes = nullptr;
47   MachineDominatorTree *MDT = nullptr;
48 
49   // Save and Restore blocks of the current function. Typically there is a
50   // single save block, unless Windows EH funclets are involved.
51   MBBVector SaveBlocks;
52   MBBVector RestoreBlocks;
53 
54 public:
55   SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes,
56                     MachineDominatorTree *MDT)
57       : LIS(LIS), Indexes(Indexes), MDT(MDT) {}
58   bool run(MachineFunction &MF);
59   void calculateSaveRestoreBlocks(MachineFunction &MF);
60   bool spillCalleeSavedRegs(MachineFunction &MF,
61                             SmallVectorImpl<int> &CalleeSavedFIs);
62   void updateLaneVGPRDomInstr(
63       int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
64       DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
65   void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
66 };
67 
68 class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
69 public:
70   static char ID;
71 
72   SILowerSGPRSpillsLegacy() : MachineFunctionPass(ID) {}
73 
74   bool runOnMachineFunction(MachineFunction &MF) override;
75 
76   void getAnalysisUsage(AnalysisUsage &AU) const override {
77     AU.addRequired<MachineDominatorTreeWrapperPass>();
78     AU.setPreservesAll();
79     MachineFunctionPass::getAnalysisUsage(AU);
80   }
81 
82   MachineFunctionProperties getClearedProperties() const override {
83     // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
84     return MachineFunctionProperties().setIsSSA().setNoVRegs();
85   }
86 };
87 
88 } // end anonymous namespace
89 
90 char SILowerSGPRSpillsLegacy::ID = 0;
91 
92 INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
93                       "SI lower SGPR spill instructions", false, false)
94 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
95 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
96 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
97 INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
98                     "SI lower SGPR spill instructions", false, false)
99 
100 char &llvm::SILowerSGPRSpillsLegacyID = SILowerSGPRSpillsLegacy::ID;
101 
102 static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
103                           const TargetRegisterInfo *TRI) {
104   for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
105     if (MBB.isLiveIn(*R)) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
112 /// Insert spill code for the callee-saved registers used in the function.
113 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
114                            ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
115                            LiveIntervals *LIS) {
116   MachineFunction &MF = *SaveBlock.getParent();
117   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
118   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
119   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
120   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
121   const SIRegisterInfo *RI = ST.getRegisterInfo();
122 
123   MachineBasicBlock::iterator I = SaveBlock.begin();
124   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
125     for (const CalleeSavedInfo &CS : CSI) {
126       // Insert the spill to the stack frame.
127       MCRegister Reg = CS.getReg();
128 
129       MachineInstrSpan MIS(I, &SaveBlock);
130       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
131           Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
132 
133       // If this value was already livein, we probably have a direct use of the
134       // incoming register value, so don't kill at the spill point. This happens
135       // since we pass some special inputs (workgroup IDs) in the callee saved
136       // range.
137       const bool IsLiveIn = isLiveIntoMBB(Reg, SaveBlock, TRI);
138       TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
139                               RC, TRI, Register());
140 
141       if (Indexes) {
142         assert(std::distance(MIS.begin(), I) == 1);
143         MachineInstr &Inst = *std::prev(I);
144         Indexes->insertMachineInstrInMaps(Inst);
145       }
146 
147       if (LIS)
148         LIS->removeAllRegUnitsForPhysReg(Reg);
149     }
150   } else {
151     // TFI doesn't update Indexes and LIS, so we have to do it separately.
152     if (Indexes)
153       Indexes->repairIndexesInRange(&SaveBlock, SaveBlock.begin(), I);
154 
155     if (LIS)
156       for (const CalleeSavedInfo &CS : CSI)
157         LIS->removeAllRegUnitsForPhysReg(CS.getReg());
158   }
159 }
160 
161 /// Insert restore code for the callee-saved registers used in the function.
162 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
163                               MutableArrayRef<CalleeSavedInfo> CSI,
164                               SlotIndexes *Indexes, LiveIntervals *LIS) {
165   MachineFunction &MF = *RestoreBlock.getParent();
166   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
167   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
168   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
169   // Restore all registers immediately before the return and any
170   // terminators that precede it.
171   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
172   const MachineBasicBlock::iterator BeforeRestoresI =
173       I == RestoreBlock.begin() ? I : std::prev(I);
174 
175   // FIXME: Just emit the readlane/writelane directly
176   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
177     for (const CalleeSavedInfo &CI : reverse(CSI)) {
178       // Insert in reverse order.  loadRegFromStackSlot can insert
179       // multiple instructions.
180       TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, &TII, TRI);
181 
182       if (Indexes) {
183         MachineInstr &Inst = *std::prev(I);
184         Indexes->insertMachineInstrInMaps(Inst);
185       }
186 
187       if (LIS)
188         LIS->removeAllRegUnitsForPhysReg(CI.getReg());
189     }
190   } else {
191     // TFI doesn't update Indexes and LIS, so we have to do it separately.
192     if (Indexes)
193       Indexes->repairIndexesInRange(&RestoreBlock, BeforeRestoresI,
194                                     RestoreBlock.getFirstTerminator());
195 
196     if (LIS)
197       for (const CalleeSavedInfo &CS : CSI)
198         LIS->removeAllRegUnitsForPhysReg(CS.getReg());
199   }
200 }
201 
202 /// Compute the sets of entry and return blocks for saving and restoring
203 /// callee-saved registers, and placing prolog and epilog code.
204 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
205   const MachineFrameInfo &MFI = MF.getFrameInfo();
206 
207   // Even when we do not change any CSR, we still want to insert the
208   // prologue and epilogue of the function.
209   // So set the save points for those.
210 
211   // Use the points found by shrink-wrapping, if any.
212   if (MFI.getSavePoint()) {
213     SaveBlocks.push_back(MFI.getSavePoint());
214     assert(MFI.getRestorePoint() && "Both restore and save must be set");
215     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
216     // If RestoreBlock does not have any successor and is not a return block
217     // then the end point is unreachable and we do not need to insert any
218     // epilogue.
219     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
220       RestoreBlocks.push_back(RestoreBlock);
221     return;
222   }
223 
224   // Save refs to entry and return blocks.
225   SaveBlocks.push_back(&MF.front());
226   for (MachineBasicBlock &MBB : MF) {
227     if (MBB.isEHFuncletEntry())
228       SaveBlocks.push_back(&MBB);
229     if (MBB.isReturnBlock())
230       RestoreBlocks.push_back(&MBB);
231   }
232 }
233 
234 // TODO: To support shrink wrapping, this would need to copy
235 // PrologEpilogInserter's updateLiveness.
236 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
237   MachineBasicBlock &EntryBB = MF.front();
238 
239   for (const CalleeSavedInfo &CSIReg : CSI)
240     EntryBB.addLiveIn(CSIReg.getReg());
241   EntryBB.sortUniqueLiveIns();
242 }
243 
244 bool SILowerSGPRSpills::spillCalleeSavedRegs(
245     MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
246   MachineRegisterInfo &MRI = MF.getRegInfo();
247   const Function &F = MF.getFunction();
248   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
249   const SIFrameLowering *TFI = ST.getFrameLowering();
250   MachineFrameInfo &MFI = MF.getFrameInfo();
251   RegScavenger *RS = nullptr;
252 
253   // Determine which of the registers in the callee save list should be saved.
254   BitVector SavedRegs;
255   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
256 
257   // Add the code to save and restore the callee saved registers.
258   if (!F.hasFnAttribute(Attribute::Naked)) {
259     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
260     // necessary for verifier liveness checks.
261     MFI.setCalleeSavedInfoValid(true);
262 
263     std::vector<CalleeSavedInfo> CSI;
264     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
265 
266     for (unsigned I = 0; CSRegs[I]; ++I) {
267       MCRegister Reg = CSRegs[I];
268 
269       if (SavedRegs.test(Reg)) {
270         const TargetRegisterClass *RC =
271           TRI->getMinimalPhysRegClass(Reg, MVT::i32);
272         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
273                                            TRI->getSpillAlign(*RC), true);
274 
275         CSI.emplace_back(Reg, JunkFI);
276         CalleeSavedFIs.push_back(JunkFI);
277       }
278     }
279 
280     if (!CSI.empty()) {
281       for (MachineBasicBlock *SaveBlock : SaveBlocks)
282         insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
283 
284       // Add live ins to save blocks.
285       assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
286       updateLiveness(MF, CSI);
287 
288       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
289         insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
290       return true;
291     }
292   }
293 
294   return false;
295 }
296 
297 void SILowerSGPRSpills::updateLaneVGPRDomInstr(
298     int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
299     DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
300   // For the Def of a virtual LaneVPGR to dominate all its uses, we should
301   // insert an IMPLICIT_DEF before the dominating spill. Switching to a
302   // depth first order doesn't really help since the machine function can be in
303   // the unstructured control flow post-SSA. For each virtual register, hence
304   // finding the common dominator to get either the dominating spill or a block
305   // dominating all spills.
306   SIMachineFunctionInfo *FuncInfo =
307       MBB->getParent()->getInfo<SIMachineFunctionInfo>();
308   ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
309       FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI);
310   Register PrevLaneVGPR;
311   for (auto &Spill : VGPRSpills) {
312     if (PrevLaneVGPR == Spill.VGPR)
313       continue;
314 
315     PrevLaneVGPR = Spill.VGPR;
316     auto I = LaneVGPRDomInstr.find(Spill.VGPR);
317     if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
318       // Initially add the spill instruction itself for Insertion point.
319       LaneVGPRDomInstr[Spill.VGPR] = InsertPt;
320     } else {
321       assert(I != LaneVGPRDomInstr.end());
322       auto PrevInsertPt = I->second;
323       MachineBasicBlock *DomMBB = PrevInsertPt->getParent();
324       if (DomMBB == MBB) {
325         // The insertion point earlier selected in a predecessor block whose
326         // spills are currently being lowered. The earlier InsertPt would be
327         // the one just before the block terminator and it should be changed
328         // if we insert any new spill in it.
329         if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
330           I->second = InsertPt;
331 
332         continue;
333       }
334 
335       // Find the common dominator block between PrevInsertPt and the
336       // current spill.
337       DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
338       if (DomMBB == MBB)
339         I->second = InsertPt;
340       else if (DomMBB != PrevInsertPt->getParent())
341         I->second = &(*DomMBB->getFirstTerminator());
342     }
343   }
344 }
345 
346 void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
347                                                       BitVector &RegMask) {
348   // Determine an optimal number of VGPRs for WWM allocation. The complement
349   // list will be available for allocating other VGPR virtual registers.
350   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
351   MachineRegisterInfo &MRI = MF.getRegInfo();
352   BitVector ReservedRegs = TRI->getReservedRegs(MF);
353   BitVector NonWwmAllocMask(TRI->getNumRegs());
354 
355   // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
356   // to have a balanced allocation between WWM values and per-thread vector
357   // register operands.
358   unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
359   NumRegs =
360       std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
361 
362   auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
363   // Try to use the highest available registers for now. Later after
364   // vgpr-regalloc, they can be shifted to the lowest range.
365   unsigned I = 0;
366   for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
367        (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
368     if (!ReservedRegs.test(Reg) &&
369         !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) {
370       TRI->markSuperRegs(RegMask, Reg);
371       ++I;
372     }
373   }
374 
375   if (I != NumRegs) {
376     // Reserve an arbitrary register and report the error.
377     TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
378     MF.getFunction().getContext().emitError(
379         "can't find enough VGPRs for wwm-regalloc");
380   }
381 }
382 
383 bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
384   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
385   LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
386   auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
387   SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
388   MachineDominatorTree *MDT =
389       &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
390   return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
391 }
392 
393 bool SILowerSGPRSpills::run(MachineFunction &MF) {
394   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
395   TII = ST.getInstrInfo();
396   TRI = &TII->getRegisterInfo();
397 
398   assert(SaveBlocks.empty() && RestoreBlocks.empty());
399 
400   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
401   // does, but somewhat simpler.
402   calculateSaveRestoreBlocks(MF);
403   SmallVector<int> CalleeSavedFIs;
404   bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
405 
406   MachineFrameInfo &MFI = MF.getFrameInfo();
407   MachineRegisterInfo &MRI = MF.getRegInfo();
408   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
409 
410   if (!MFI.hasStackObjects() && !HasCSRs) {
411     SaveBlocks.clear();
412     RestoreBlocks.clear();
413     return false;
414   }
415 
416   bool MadeChange = false;
417   bool SpilledToVirtVGPRLanes = false;
418 
419   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
420   // handled as SpilledToReg in regular PrologEpilogInserter.
421   const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
422                                   (HasCSRs || FuncInfo->hasSpilledSGPRs());
423   if (HasSGPRSpillToVGPR) {
424     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
425     // are spilled to VGPRs, in which case we can eliminate the stack usage.
426     //
427     // This operates under the assumption that only other SGPR spills are users
428     // of the frame index.
429 
430     // To track the spill frame indices handled in this pass.
431     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
432 
433     // To track the IMPLICIT_DEF insertion point for the lane vgprs.
434     DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
435 
436     for (MachineBasicBlock &MBB : MF) {
437       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
438         if (!TII->isSGPRSpill(MI))
439           continue;
440 
441         if (MI.getOperand(0).isUndef()) {
442           if (Indexes)
443             Indexes->removeMachineInstrFromMaps(MI);
444           MI.eraseFromParent();
445           continue;
446         }
447 
448         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
449         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
450 
451         bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
452         if (IsCalleeSaveSGPRSpill) {
453           // Spill callee-saved SGPRs into physical VGPR lanes.
454 
455           // TODO: This is to ensure the CFIs are static for efficient frame
456           // unwinding in the debugger. Spilling them into virtual VGPR lanes
457           // involve regalloc to allocate the physical VGPRs and that might
458           // cause intermediate spill/split of such liveranges for successful
459           // allocation. This would result in broken CFI encoding unless the
460           // regalloc aware CFI generation to insert new CFIs along with the
461           // intermediate spills is implemented. There is no such support
462           // currently exist in the LLVM compiler.
463           if (FuncInfo->allocateSGPRSpillToVGPRLane(
464                   MF, FI, /*SpillToPhysVGPRLane=*/true)) {
465             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
466                 MI, FI, nullptr, Indexes, LIS, true);
467             if (!Spilled)
468               llvm_unreachable(
469                   "failed to spill SGPR to physical VGPR lane when allocated");
470           }
471         } else {
472           MachineInstrSpan MIS(&MI, &MBB);
473           if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
474             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
475                 MI, FI, nullptr, Indexes, LIS);
476             if (!Spilled)
477               llvm_unreachable(
478                   "failed to spill SGPR to virtual VGPR lane when allocated");
479             SpillFIs.set(FI);
480             updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
481             SpilledToVirtVGPRLanes = true;
482           }
483         }
484       }
485     }
486 
487     for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
488       auto InsertPt = LaneVGPRDomInstr[Reg];
489       // Insert the IMPLICIT_DEF at the identified points.
490       MachineBasicBlock &Block = *InsertPt->getParent();
491       DebugLoc DL = Block.findDebugLoc(InsertPt);
492       auto MIB =
493           BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
494 
495       // Add WWM flag to the virtual register.
496       FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
497 
498       // Set SGPR_SPILL asm printer flag
499       MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
500       if (LIS) {
501         LIS->InsertMachineInstrInMaps(*MIB);
502         LIS->createAndComputeVirtRegInterval(Reg);
503       }
504     }
505 
506     // Determine the registers for WWM allocation and also compute the register
507     // mask for non-wwm VGPR allocation.
508     if (FuncInfo->getSGPRSpillVGPRs().size()) {
509       BitVector WwmRegMask(TRI->getNumRegs());
510 
511       determineRegsForWWMAllocation(MF, WwmRegMask);
512 
513       BitVector NonWwmRegMask(WwmRegMask);
514       NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
515 
516       // The complement set will be the registers for non-wwm (per-thread) vgpr
517       // allocation.
518       FuncInfo->updateNonWWMRegMask(NonWwmRegMask);
519     }
520 
521     for (MachineBasicBlock &MBB : MF) {
522       // FIXME: The dead frame indices are replaced with a null register from
523       // the debug value instructions. We should instead, update it with the
524       // correct register value. But not sure the register value alone is
525       // adequate to lower the DIExpression. It should be worked out later.
526       for (MachineInstr &MI : MBB) {
527         if (MI.isDebugValue()) {
528           uint32_t StackOperandIdx = MI.isDebugValueList() ? 2 : 0;
529           if (MI.getOperand(StackOperandIdx).isFI() &&
530               !MFI.isFixedObjectIndex(
531                   MI.getOperand(StackOperandIdx).getIndex()) &&
532               SpillFIs[MI.getOperand(StackOperandIdx).getIndex()]) {
533             MI.getOperand(StackOperandIdx)
534                 .ChangeToRegister(Register(), false /*isDef*/);
535           }
536         }
537       }
538     }
539 
540     // All those frame indices which are dead by now should be removed from the
541     // function frame. Otherwise, there is a side effect such as re-mapping of
542     // free frame index ids by the later pass(es) like "stack slot coloring"
543     // which in turn could mess-up with the book keeping of "frame index to VGPR
544     // lane".
545     FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
546 
547     MadeChange = true;
548   }
549 
550   if (SpilledToVirtVGPRLanes) {
551     const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
552     // Shift back the reserved SGPR for EXEC copy into the lowest range.
553     // This SGPR is reserved to handle the whole-wave spill/copy operations
554     // that might get inserted during vgpr regalloc.
555     Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
556     if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
557                              TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
558       FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
559   } else {
560     // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
561     // spills/copies. Reset the SGPR reserved for EXEC copy.
562     FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
563   }
564 
565   SaveBlocks.clear();
566   RestoreBlocks.clear();
567 
568   return MadeChange;
569 }
570 
571 PreservedAnalyses
572 SILowerSGPRSpillsPass::run(MachineFunction &MF,
573                            MachineFunctionAnalysisManager &MFAM) {
574   MFPropsModifier _(*this, MF);
575   auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
576   auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
577   MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
578   SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
579   return PreservedAnalyses::all();
580 }
581