xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp (revision b4e38a41f584ad4391c04b8cfec81f46176b18b0)
1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Target/TargetMachine.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "si-lower-sgpr-spills"
36 
37 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
38 
39 namespace {
40 
41 static cl::opt<bool> EnableSpillVGPRToAGPR(
42   "amdgpu-spill-vgpr-to-agpr",
43   cl::desc("Enable spilling VGPRs to AGPRs"),
44   cl::ReallyHidden,
45   cl::init(true));
46 
47 class SILowerSGPRSpills : public MachineFunctionPass {
48 private:
49   const SIRegisterInfo *TRI = nullptr;
50   const SIInstrInfo *TII = nullptr;
51   VirtRegMap *VRM = nullptr;
52   LiveIntervals *LIS = nullptr;
53 
54   // Save and Restore blocks of the current function. Typically there is a
55   // single save block, unless Windows EH funclets are involved.
56   MBBVector SaveBlocks;
57   MBBVector RestoreBlocks;
58 
59 public:
60   static char ID;
61 
62   SILowerSGPRSpills() : MachineFunctionPass(ID) {}
63 
64   void calculateSaveRestoreBlocks(MachineFunction &MF);
65   bool spillCalleeSavedRegs(MachineFunction &MF);
66 
67   bool runOnMachineFunction(MachineFunction &MF) override;
68 
69   void getAnalysisUsage(AnalysisUsage &AU) const override {
70     AU.setPreservesAll();
71     MachineFunctionPass::getAnalysisUsage(AU);
72   }
73 };
74 
75 } // end anonymous namespace
76 
77 char SILowerSGPRSpills::ID = 0;
78 
79 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
80                       "SI lower SGPR spill instructions", false, false)
81 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
82 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
83                     "SI lower SGPR spill instructions", false, false)
84 
85 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
86 
87 /// Insert restore code for the callee-saved registers used in the function.
88 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
89                            ArrayRef<CalleeSavedInfo> CSI,
90                            LiveIntervals *LIS) {
91   MachineFunction &MF = *SaveBlock.getParent();
92   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
93   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
94   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
95 
96   MachineBasicBlock::iterator I = SaveBlock.begin();
97   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
98     for (const CalleeSavedInfo &CS : CSI) {
99       // Insert the spill to the stack frame.
100       unsigned Reg = CS.getReg();
101 
102       MachineInstrSpan MIS(I, &SaveBlock);
103       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
104 
105       TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
106                               TRI);
107 
108       if (LIS) {
109         assert(std::distance(MIS.begin(), I) == 1);
110         MachineInstr &Inst = *std::prev(I);
111 
112         LIS->InsertMachineInstrInMaps(Inst);
113         LIS->removeAllRegUnitsForPhysReg(Reg);
114       }
115     }
116   }
117 }
118 
119 /// Insert restore code for the callee-saved registers used in the function.
120 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
121                               std::vector<CalleeSavedInfo> &CSI,
122                               LiveIntervals *LIS) {
123   MachineFunction &MF = *RestoreBlock.getParent();
124   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
125   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
126   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
127 
128   // Restore all registers immediately before the return and any
129   // terminators that precede it.
130   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
131 
132   // FIXME: Just emit the readlane/writelane directly
133   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
134     for (const CalleeSavedInfo &CI : reverse(CSI)) {
135       unsigned Reg = CI.getReg();
136       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
137 
138       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
139       assert(I != RestoreBlock.begin() &&
140              "loadRegFromStackSlot didn't insert any code!");
141       // Insert in reverse order.  loadRegFromStackSlot can insert
142       // multiple instructions.
143 
144       if (LIS) {
145         MachineInstr &Inst = *std::prev(I);
146         LIS->InsertMachineInstrInMaps(Inst);
147         LIS->removeAllRegUnitsForPhysReg(Reg);
148       }
149     }
150   }
151 }
152 
153 /// Compute the sets of entry and return blocks for saving and restoring
154 /// callee-saved registers, and placing prolog and epilog code.
155 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
156   const MachineFrameInfo &MFI = MF.getFrameInfo();
157 
158   // Even when we do not change any CSR, we still want to insert the
159   // prologue and epilogue of the function.
160   // So set the save points for those.
161 
162   // Use the points found by shrink-wrapping, if any.
163   if (MFI.getSavePoint()) {
164     SaveBlocks.push_back(MFI.getSavePoint());
165     assert(MFI.getRestorePoint() && "Both restore and save must be set");
166     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
167     // If RestoreBlock does not have any successor and is not a return block
168     // then the end point is unreachable and we do not need to insert any
169     // epilogue.
170     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
171       RestoreBlocks.push_back(RestoreBlock);
172     return;
173   }
174 
175   // Save refs to entry and return blocks.
176   SaveBlocks.push_back(&MF.front());
177   for (MachineBasicBlock &MBB : MF) {
178     if (MBB.isEHFuncletEntry())
179       SaveBlocks.push_back(&MBB);
180     if (MBB.isReturnBlock())
181       RestoreBlocks.push_back(&MBB);
182   }
183 }
184 
185 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
186   MachineRegisterInfo &MRI = MF.getRegInfo();
187   const Function &F = MF.getFunction();
188   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
189   const SIFrameLowering *TFI = ST.getFrameLowering();
190   MachineFrameInfo &MFI = MF.getFrameInfo();
191   RegScavenger *RS = nullptr;
192 
193   // Determine which of the registers in the callee save list should be saved.
194   BitVector SavedRegs;
195   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
196 
197   // Add the code to save and restore the callee saved registers.
198   if (!F.hasFnAttribute(Attribute::Naked)) {
199     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
200     // necessary for verifier liveness checks.
201     MFI.setCalleeSavedInfoValid(true);
202 
203     std::vector<CalleeSavedInfo> CSI;
204     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
205 
206     for (unsigned I = 0; CSRegs[I]; ++I) {
207       unsigned Reg = CSRegs[I];
208       if (SavedRegs.test(Reg)) {
209         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
210         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
211                                            TRI->getSpillAlignment(*RC),
212                                            true);
213 
214         CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
215       }
216     }
217 
218     if (!CSI.empty()) {
219       for (MachineBasicBlock *SaveBlock : SaveBlocks)
220         insertCSRSaves(*SaveBlock, CSI, LIS);
221 
222       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
223         insertCSRRestores(*RestoreBlock, CSI, LIS);
224       return true;
225     }
226   }
227 
228   return false;
229 }
230 
231 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
232   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
233   TII = ST.getInstrInfo();
234   TRI = &TII->getRegisterInfo();
235 
236   VRM = getAnalysisIfAvailable<VirtRegMap>();
237 
238   assert(SaveBlocks.empty() && RestoreBlocks.empty());
239 
240   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
241   // does, but somewhat simpler.
242   calculateSaveRestoreBlocks(MF);
243   bool HasCSRs = spillCalleeSavedRegs(MF);
244 
245   MachineFrameInfo &MFI = MF.getFrameInfo();
246   if (!MFI.hasStackObjects() && !HasCSRs) {
247     SaveBlocks.clear();
248     RestoreBlocks.clear();
249     return false;
250   }
251 
252   MachineRegisterInfo &MRI = MF.getRegInfo();
253   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
254   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
255     && EnableSpillVGPRToAGPR;
256 
257   bool MadeChange = false;
258 
259   const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
260 
261   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
262   // handled as SpilledToReg in regular PrologEpilogInserter.
263   if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
264       SpillVGPRToAGPR) {
265     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
266     // are spilled to VGPRs, in which case we can eliminate the stack usage.
267     //
268     // This operates under the assumption that only other SGPR spills are users
269     // of the frame index.
270     for (MachineBasicBlock &MBB : MF) {
271       MachineBasicBlock::iterator Next;
272       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
273         MachineInstr &MI = *I;
274         Next = std::next(I);
275 
276         if (SpillToAGPR && TII->isVGPRSpill(MI)) {
277           // Try to eliminate stack used by VGPR spills before frame
278           // finalization.
279           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
280                                                      AMDGPU::OpName::vaddr);
281           int FI = MI.getOperand(FIOp).getIndex();
282           Register VReg =
283               TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
284           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
285                                                 TRI->isAGPR(MRI, VReg))) {
286             TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
287             continue;
288           }
289         }
290 
291         if (!TII->isSGPRSpill(MI))
292           continue;
293 
294         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
295         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
296         if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
297           bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
298           (void)Spilled;
299           assert(Spilled && "failed to spill SGPR to VGPR when allocated");
300         }
301       }
302     }
303 
304     for (MachineBasicBlock &MBB : MF) {
305       for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
306         MBB.addLiveIn(SSpill.VGPR);
307 
308       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
309         MBB.addLiveIn(Reg);
310 
311       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
312         MBB.addLiveIn(Reg);
313 
314       MBB.sortUniqueLiveIns();
315     }
316 
317     MadeChange = true;
318   }
319 
320   SaveBlocks.clear();
321   RestoreBlocks.clear();
322 
323   return MadeChange;
324 }
325