xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/Target/TargetMachine.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "si-lower-sgpr-spills"
35 
36 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
37 
38 namespace {
39 
40 static cl::opt<bool> EnableSpillVGPRToAGPR(
41   "amdgpu-spill-vgpr-to-agpr",
42   cl::desc("Enable spilling VGPRs to AGPRs"),
43   cl::ReallyHidden,
44   cl::init(true));
45 
46 class SILowerSGPRSpills : public MachineFunctionPass {
47 private:
48   const SIRegisterInfo *TRI = nullptr;
49   const SIInstrInfo *TII = nullptr;
50   VirtRegMap *VRM = nullptr;
51   LiveIntervals *LIS = nullptr;
52 
53   // Save and Restore blocks of the current function. Typically there is a
54   // single save block, unless Windows EH funclets are involved.
55   MBBVector SaveBlocks;
56   MBBVector RestoreBlocks;
57 
58 public:
59   static char ID;
60 
61   SILowerSGPRSpills() : MachineFunctionPass(ID) {}
62 
63   void calculateSaveRestoreBlocks(MachineFunction &MF);
64   bool spillCalleeSavedRegs(MachineFunction &MF);
65 
66   bool runOnMachineFunction(MachineFunction &MF) override;
67 
68   void getAnalysisUsage(AnalysisUsage &AU) const override {
69     AU.setPreservesAll();
70     MachineFunctionPass::getAnalysisUsage(AU);
71   }
72 };
73 
74 } // end anonymous namespace
75 
76 char SILowerSGPRSpills::ID = 0;
77 
78 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
79                       "SI lower SGPR spill instructions", false, false)
80 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
81 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
82                     "SI lower SGPR spill instructions", false, false)
83 
84 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
85 
86 /// Insert restore code for the callee-saved registers used in the function.
87 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
88                            ArrayRef<CalleeSavedInfo> CSI,
89                            LiveIntervals *LIS) {
90   MachineFunction &MF = *SaveBlock.getParent();
91   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
92   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
93   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
94 
95   MachineBasicBlock::iterator I = SaveBlock.begin();
96   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
97     for (const CalleeSavedInfo &CS : CSI) {
98       // Insert the spill to the stack frame.
99       unsigned Reg = CS.getReg();
100 
101       MachineInstrSpan MIS(I, &SaveBlock);
102       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
103 
104       TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
105                               TRI);
106 
107       if (LIS) {
108         assert(std::distance(MIS.begin(), I) == 1);
109         MachineInstr &Inst = *std::prev(I);
110 
111         LIS->InsertMachineInstrInMaps(Inst);
112         LIS->removeAllRegUnitsForPhysReg(Reg);
113       }
114     }
115   }
116 }
117 
118 /// Insert restore code for the callee-saved registers used in the function.
119 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
120                               std::vector<CalleeSavedInfo> &CSI,
121                               LiveIntervals *LIS) {
122   MachineFunction &MF = *RestoreBlock.getParent();
123   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
124   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
125   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
126 
127   // Restore all registers immediately before the return and any
128   // terminators that precede it.
129   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
130 
131   // FIXME: Just emit the readlane/writelane directly
132   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
133     for (const CalleeSavedInfo &CI : reverse(CSI)) {
134       unsigned Reg = CI.getReg();
135       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
136 
137       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
138       assert(I != RestoreBlock.begin() &&
139              "loadRegFromStackSlot didn't insert any code!");
140       // Insert in reverse order.  loadRegFromStackSlot can insert
141       // multiple instructions.
142 
143       if (LIS) {
144         MachineInstr &Inst = *std::prev(I);
145         LIS->InsertMachineInstrInMaps(Inst);
146         LIS->removeAllRegUnitsForPhysReg(Reg);
147       }
148     }
149   }
150 }
151 
152 /// Compute the sets of entry and return blocks for saving and restoring
153 /// callee-saved registers, and placing prolog and epilog code.
154 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
155   const MachineFrameInfo &MFI = MF.getFrameInfo();
156 
157   // Even when we do not change any CSR, we still want to insert the
158   // prologue and epilogue of the function.
159   // So set the save points for those.
160 
161   // Use the points found by shrink-wrapping, if any.
162   if (MFI.getSavePoint()) {
163     SaveBlocks.push_back(MFI.getSavePoint());
164     assert(MFI.getRestorePoint() && "Both restore and save must be set");
165     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
166     // If RestoreBlock does not have any successor and is not a return block
167     // then the end point is unreachable and we do not need to insert any
168     // epilogue.
169     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
170       RestoreBlocks.push_back(RestoreBlock);
171     return;
172   }
173 
174   // Save refs to entry and return blocks.
175   SaveBlocks.push_back(&MF.front());
176   for (MachineBasicBlock &MBB : MF) {
177     if (MBB.isEHFuncletEntry())
178       SaveBlocks.push_back(&MBB);
179     if (MBB.isReturnBlock())
180       RestoreBlocks.push_back(&MBB);
181   }
182 }
183 
184 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
185   MachineRegisterInfo &MRI = MF.getRegInfo();
186   const Function &F = MF.getFunction();
187   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
188   const SIFrameLowering *TFI = ST.getFrameLowering();
189   MachineFrameInfo &MFI = MF.getFrameInfo();
190   RegScavenger *RS = nullptr;
191 
192   // Determine which of the registers in the callee save list should be saved.
193   BitVector SavedRegs;
194   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
195 
196   // Add the code to save and restore the callee saved registers.
197   if (!F.hasFnAttribute(Attribute::Naked)) {
198     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
199     // necessary for verifier liveness checks.
200     MFI.setCalleeSavedInfoValid(true);
201 
202     std::vector<CalleeSavedInfo> CSI;
203     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
204 
205     for (unsigned I = 0; CSRegs[I]; ++I) {
206       unsigned Reg = CSRegs[I];
207       if (SavedRegs.test(Reg)) {
208         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
209         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
210                                            TRI->getSpillAlignment(*RC),
211                                            true);
212 
213         CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
214       }
215     }
216 
217     if (!CSI.empty()) {
218       for (MachineBasicBlock *SaveBlock : SaveBlocks)
219         insertCSRSaves(*SaveBlock, CSI, LIS);
220 
221       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
222         insertCSRRestores(*RestoreBlock, CSI, LIS);
223       return true;
224     }
225   }
226 
227   return false;
228 }
229 
230 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
231   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
232   TII = ST.getInstrInfo();
233   TRI = &TII->getRegisterInfo();
234 
235   VRM = getAnalysisIfAvailable<VirtRegMap>();
236 
237   assert(SaveBlocks.empty() && RestoreBlocks.empty());
238 
239   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
240   // does, but somewhat simpler.
241   calculateSaveRestoreBlocks(MF);
242   bool HasCSRs = spillCalleeSavedRegs(MF);
243 
244   MachineFrameInfo &MFI = MF.getFrameInfo();
245   if (!MFI.hasStackObjects() && !HasCSRs) {
246     SaveBlocks.clear();
247     RestoreBlocks.clear();
248     return false;
249   }
250 
251   MachineRegisterInfo &MRI = MF.getRegInfo();
252   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
253   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
254     && EnableSpillVGPRToAGPR;
255 
256   bool MadeChange = false;
257 
258   const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
259 
260   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
261   // handled as SpilledToReg in regular PrologEpilogInserter.
262   if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
263       SpillVGPRToAGPR) {
264     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
265     // are spilled to VGPRs, in which case we can eliminate the stack usage.
266     //
267     // This operates under the assumption that only other SGPR spills are users
268     // of the frame index.
269     for (MachineBasicBlock &MBB : MF) {
270       MachineBasicBlock::iterator Next;
271       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
272         MachineInstr &MI = *I;
273         Next = std::next(I);
274 
275         if (SpillToAGPR && TII->isVGPRSpill(MI)) {
276           // Try to eliminate stack used by VGPR spills before frame
277           // finalization.
278           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
279                                                      AMDGPU::OpName::vaddr);
280           int FI = MI.getOperand(FIOp).getIndex();
281           Register VReg =
282               TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
283           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
284                                                 TRI->isAGPR(MRI, VReg))) {
285             TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
286             continue;
287           }
288         }
289 
290         if (!TII->isSGPRSpill(MI))
291           continue;
292 
293         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
294         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
295         if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
296           bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
297           (void)Spilled;
298           assert(Spilled && "failed to spill SGPR to VGPR when allocated");
299         }
300       }
301     }
302 
303     for (MachineBasicBlock &MBB : MF) {
304       for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
305         MBB.addLiveIn(SSpill.VGPR);
306 
307       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
308         MBB.addLiveIn(Reg);
309 
310       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
311         MBB.addLiveIn(Reg);
312 
313       MBB.sortUniqueLiveIns();
314     }
315 
316     MadeChange = true;
317   }
318 
319   SaveBlocks.clear();
320   RestoreBlocks.clear();
321 
322   return MadeChange;
323 }
324