1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/VirtRegMap.h" 30 #include "llvm/Target/TargetMachine.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "si-lower-sgpr-spills" 35 36 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 37 38 namespace { 39 40 static cl::opt<bool> EnableSpillVGPRToAGPR( 41 "amdgpu-spill-vgpr-to-agpr", 42 cl::desc("Enable spilling VGPRs to AGPRs"), 43 cl::ReallyHidden, 44 cl::init(true)); 45 46 class SILowerSGPRSpills : public MachineFunctionPass { 47 private: 48 const SIRegisterInfo *TRI = nullptr; 49 const SIInstrInfo *TII = nullptr; 50 VirtRegMap *VRM = nullptr; 51 LiveIntervals *LIS = nullptr; 52 53 // Save and Restore blocks of the current function. Typically there is a 54 // single save block, unless Windows EH funclets are involved. 55 MBBVector SaveBlocks; 56 MBBVector RestoreBlocks; 57 58 public: 59 static char ID; 60 61 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 62 63 void calculateSaveRestoreBlocks(MachineFunction &MF); 64 bool spillCalleeSavedRegs(MachineFunction &MF); 65 66 bool runOnMachineFunction(MachineFunction &MF) override; 67 68 void getAnalysisUsage(AnalysisUsage &AU) const override { 69 AU.setPreservesAll(); 70 MachineFunctionPass::getAnalysisUsage(AU); 71 } 72 }; 73 74 } // end anonymous namespace 75 76 char SILowerSGPRSpills::ID = 0; 77 78 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 79 "SI lower SGPR spill instructions", false, false) 80 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 81 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 82 "SI lower SGPR spill instructions", false, false) 83 84 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 85 86 /// Insert restore code for the callee-saved registers used in the function. 87 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 88 ArrayRef<CalleeSavedInfo> CSI, 89 LiveIntervals *LIS) { 90 MachineFunction &MF = *SaveBlock.getParent(); 91 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 92 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 93 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 94 95 MachineBasicBlock::iterator I = SaveBlock.begin(); 96 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 97 for (const CalleeSavedInfo &CS : CSI) { 98 // Insert the spill to the stack frame. 99 unsigned Reg = CS.getReg(); 100 101 MachineInstrSpan MIS(I, &SaveBlock); 102 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 103 104 TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, 105 TRI); 106 107 if (LIS) { 108 assert(std::distance(MIS.begin(), I) == 1); 109 MachineInstr &Inst = *std::prev(I); 110 111 LIS->InsertMachineInstrInMaps(Inst); 112 LIS->removeAllRegUnitsForPhysReg(Reg); 113 } 114 } 115 } 116 } 117 118 /// Insert restore code for the callee-saved registers used in the function. 119 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 120 std::vector<CalleeSavedInfo> &CSI, 121 LiveIntervals *LIS) { 122 MachineFunction &MF = *RestoreBlock.getParent(); 123 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 124 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 125 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 126 127 // Restore all registers immediately before the return and any 128 // terminators that precede it. 129 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 130 131 // FIXME: Just emit the readlane/writelane directly 132 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 133 for (const CalleeSavedInfo &CI : reverse(CSI)) { 134 unsigned Reg = CI.getReg(); 135 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 136 137 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 138 assert(I != RestoreBlock.begin() && 139 "loadRegFromStackSlot didn't insert any code!"); 140 // Insert in reverse order. loadRegFromStackSlot can insert 141 // multiple instructions. 142 143 if (LIS) { 144 MachineInstr &Inst = *std::prev(I); 145 LIS->InsertMachineInstrInMaps(Inst); 146 LIS->removeAllRegUnitsForPhysReg(Reg); 147 } 148 } 149 } 150 } 151 152 /// Compute the sets of entry and return blocks for saving and restoring 153 /// callee-saved registers, and placing prolog and epilog code. 154 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 155 const MachineFrameInfo &MFI = MF.getFrameInfo(); 156 157 // Even when we do not change any CSR, we still want to insert the 158 // prologue and epilogue of the function. 159 // So set the save points for those. 160 161 // Use the points found by shrink-wrapping, if any. 162 if (MFI.getSavePoint()) { 163 SaveBlocks.push_back(MFI.getSavePoint()); 164 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 165 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 166 // If RestoreBlock does not have any successor and is not a return block 167 // then the end point is unreachable and we do not need to insert any 168 // epilogue. 169 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 170 RestoreBlocks.push_back(RestoreBlock); 171 return; 172 } 173 174 // Save refs to entry and return blocks. 175 SaveBlocks.push_back(&MF.front()); 176 for (MachineBasicBlock &MBB : MF) { 177 if (MBB.isEHFuncletEntry()) 178 SaveBlocks.push_back(&MBB); 179 if (MBB.isReturnBlock()) 180 RestoreBlocks.push_back(&MBB); 181 } 182 } 183 184 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 185 MachineRegisterInfo &MRI = MF.getRegInfo(); 186 const Function &F = MF.getFunction(); 187 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 188 const SIFrameLowering *TFI = ST.getFrameLowering(); 189 MachineFrameInfo &MFI = MF.getFrameInfo(); 190 RegScavenger *RS = nullptr; 191 192 // Determine which of the registers in the callee save list should be saved. 193 BitVector SavedRegs; 194 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 195 196 // Add the code to save and restore the callee saved registers. 197 if (!F.hasFnAttribute(Attribute::Naked)) { 198 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 199 // necessary for verifier liveness checks. 200 MFI.setCalleeSavedInfoValid(true); 201 202 std::vector<CalleeSavedInfo> CSI; 203 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 204 205 for (unsigned I = 0; CSRegs[I]; ++I) { 206 unsigned Reg = CSRegs[I]; 207 if (SavedRegs.test(Reg)) { 208 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 209 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 210 TRI->getSpillAlignment(*RC), 211 true); 212 213 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 214 } 215 } 216 217 if (!CSI.empty()) { 218 for (MachineBasicBlock *SaveBlock : SaveBlocks) 219 insertCSRSaves(*SaveBlock, CSI, LIS); 220 221 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 222 insertCSRRestores(*RestoreBlock, CSI, LIS); 223 return true; 224 } 225 } 226 227 return false; 228 } 229 230 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 231 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 232 TII = ST.getInstrInfo(); 233 TRI = &TII->getRegisterInfo(); 234 235 VRM = getAnalysisIfAvailable<VirtRegMap>(); 236 237 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 238 239 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 240 // does, but somewhat simpler. 241 calculateSaveRestoreBlocks(MF); 242 bool HasCSRs = spillCalleeSavedRegs(MF); 243 244 MachineFrameInfo &MFI = MF.getFrameInfo(); 245 if (!MFI.hasStackObjects() && !HasCSRs) { 246 SaveBlocks.clear(); 247 RestoreBlocks.clear(); 248 return false; 249 } 250 251 MachineRegisterInfo &MRI = MF.getRegInfo(); 252 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 253 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() 254 && EnableSpillVGPRToAGPR; 255 256 bool MadeChange = false; 257 258 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); 259 260 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 261 // handled as SpilledToReg in regular PrologEpilogInserter. 262 if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || 263 SpillVGPRToAGPR) { 264 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 265 // are spilled to VGPRs, in which case we can eliminate the stack usage. 266 // 267 // This operates under the assumption that only other SGPR spills are users 268 // of the frame index. 269 for (MachineBasicBlock &MBB : MF) { 270 MachineBasicBlock::iterator Next; 271 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { 272 MachineInstr &MI = *I; 273 Next = std::next(I); 274 275 if (SpillToAGPR && TII->isVGPRSpill(MI)) { 276 // Try to eliminate stack used by VGPR spills before frame 277 // finalization. 278 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 279 AMDGPU::OpName::vaddr); 280 int FI = MI.getOperand(FIOp).getIndex(); 281 Register VReg = 282 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 283 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, 284 TRI->isAGPR(MRI, VReg))) { 285 TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); 286 continue; 287 } 288 } 289 290 if (!TII->isSGPRSpill(MI)) 291 continue; 292 293 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 294 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 295 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 296 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); 297 (void)Spilled; 298 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 299 } 300 } 301 } 302 303 for (MachineBasicBlock &MBB : MF) { 304 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 305 MBB.addLiveIn(SSpill.VGPR); 306 307 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) 308 MBB.addLiveIn(Reg); 309 310 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) 311 MBB.addLiveIn(Reg); 312 313 MBB.sortUniqueLiveIns(); 314 } 315 316 MadeChange = true; 317 } 318 319 SaveBlocks.clear(); 320 RestoreBlocks.clear(); 321 322 return MadeChange; 323 } 324