1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/VirtRegMap.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Target/TargetMachine.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "si-lower-sgpr-spills" 36 37 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 38 39 namespace { 40 41 static cl::opt<bool> EnableSpillVGPRToAGPR( 42 "amdgpu-spill-vgpr-to-agpr", 43 cl::desc("Enable spilling VGPRs to AGPRs"), 44 cl::ReallyHidden, 45 cl::init(true)); 46 47 class SILowerSGPRSpills : public MachineFunctionPass { 48 private: 49 const SIRegisterInfo *TRI = nullptr; 50 const SIInstrInfo *TII = nullptr; 51 VirtRegMap *VRM = nullptr; 52 LiveIntervals *LIS = nullptr; 53 54 // Save and Restore blocks of the current function. Typically there is a 55 // single save block, unless Windows EH funclets are involved. 56 MBBVector SaveBlocks; 57 MBBVector RestoreBlocks; 58 59 public: 60 static char ID; 61 62 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 63 64 void calculateSaveRestoreBlocks(MachineFunction &MF); 65 bool spillCalleeSavedRegs(MachineFunction &MF); 66 67 bool runOnMachineFunction(MachineFunction &MF) override; 68 69 void getAnalysisUsage(AnalysisUsage &AU) const override { 70 AU.setPreservesAll(); 71 MachineFunctionPass::getAnalysisUsage(AU); 72 } 73 }; 74 75 } // end anonymous namespace 76 77 char SILowerSGPRSpills::ID = 0; 78 79 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 80 "SI lower SGPR spill instructions", false, false) 81 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 82 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 83 "SI lower SGPR spill instructions", false, false) 84 85 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 86 87 /// Insert restore code for the callee-saved registers used in the function. 88 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 89 ArrayRef<CalleeSavedInfo> CSI, 90 LiveIntervals *LIS) { 91 MachineFunction &MF = *SaveBlock.getParent(); 92 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 93 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 94 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 95 96 MachineBasicBlock::iterator I = SaveBlock.begin(); 97 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 98 for (const CalleeSavedInfo &CS : CSI) { 99 // Insert the spill to the stack frame. 100 unsigned Reg = CS.getReg(); 101 102 MachineInstrSpan MIS(I, &SaveBlock); 103 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 104 105 TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, 106 TRI); 107 108 if (LIS) { 109 assert(std::distance(MIS.begin(), I) == 1); 110 MachineInstr &Inst = *std::prev(I); 111 112 LIS->InsertMachineInstrInMaps(Inst); 113 LIS->removeAllRegUnitsForPhysReg(Reg); 114 } 115 } 116 } 117 } 118 119 /// Insert restore code for the callee-saved registers used in the function. 120 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 121 std::vector<CalleeSavedInfo> &CSI, 122 LiveIntervals *LIS) { 123 MachineFunction &MF = *RestoreBlock.getParent(); 124 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 125 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 126 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 127 128 // Restore all registers immediately before the return and any 129 // terminators that precede it. 130 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 131 132 // FIXME: Just emit the readlane/writelane directly 133 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 134 for (const CalleeSavedInfo &CI : reverse(CSI)) { 135 unsigned Reg = CI.getReg(); 136 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 137 138 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 139 assert(I != RestoreBlock.begin() && 140 "loadRegFromStackSlot didn't insert any code!"); 141 // Insert in reverse order. loadRegFromStackSlot can insert 142 // multiple instructions. 143 144 if (LIS) { 145 MachineInstr &Inst = *std::prev(I); 146 LIS->InsertMachineInstrInMaps(Inst); 147 LIS->removeAllRegUnitsForPhysReg(Reg); 148 } 149 } 150 } 151 } 152 153 /// Compute the sets of entry and return blocks for saving and restoring 154 /// callee-saved registers, and placing prolog and epilog code. 155 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 156 const MachineFrameInfo &MFI = MF.getFrameInfo(); 157 158 // Even when we do not change any CSR, we still want to insert the 159 // prologue and epilogue of the function. 160 // So set the save points for those. 161 162 // Use the points found by shrink-wrapping, if any. 163 if (MFI.getSavePoint()) { 164 SaveBlocks.push_back(MFI.getSavePoint()); 165 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 166 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 167 // If RestoreBlock does not have any successor and is not a return block 168 // then the end point is unreachable and we do not need to insert any 169 // epilogue. 170 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 171 RestoreBlocks.push_back(RestoreBlock); 172 return; 173 } 174 175 // Save refs to entry and return blocks. 176 SaveBlocks.push_back(&MF.front()); 177 for (MachineBasicBlock &MBB : MF) { 178 if (MBB.isEHFuncletEntry()) 179 SaveBlocks.push_back(&MBB); 180 if (MBB.isReturnBlock()) 181 RestoreBlocks.push_back(&MBB); 182 } 183 } 184 185 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 186 MachineRegisterInfo &MRI = MF.getRegInfo(); 187 const Function &F = MF.getFunction(); 188 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 189 const SIFrameLowering *TFI = ST.getFrameLowering(); 190 MachineFrameInfo &MFI = MF.getFrameInfo(); 191 RegScavenger *RS = nullptr; 192 193 // Determine which of the registers in the callee save list should be saved. 194 BitVector SavedRegs; 195 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 196 197 // Add the code to save and restore the callee saved registers. 198 if (!F.hasFnAttribute(Attribute::Naked)) { 199 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 200 // necessary for verifier liveness checks. 201 MFI.setCalleeSavedInfoValid(true); 202 203 std::vector<CalleeSavedInfo> CSI; 204 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 205 206 for (unsigned I = 0; CSRegs[I]; ++I) { 207 unsigned Reg = CSRegs[I]; 208 if (SavedRegs.test(Reg)) { 209 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 210 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 211 TRI->getSpillAlignment(*RC), 212 true); 213 214 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 215 } 216 } 217 218 if (!CSI.empty()) { 219 for (MachineBasicBlock *SaveBlock : SaveBlocks) 220 insertCSRSaves(*SaveBlock, CSI, LIS); 221 222 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 223 insertCSRRestores(*RestoreBlock, CSI, LIS); 224 return true; 225 } 226 } 227 228 return false; 229 } 230 231 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 232 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 233 TII = ST.getInstrInfo(); 234 TRI = &TII->getRegisterInfo(); 235 236 VRM = getAnalysisIfAvailable<VirtRegMap>(); 237 238 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 239 240 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 241 // does, but somewhat simpler. 242 calculateSaveRestoreBlocks(MF); 243 bool HasCSRs = spillCalleeSavedRegs(MF); 244 245 MachineFrameInfo &MFI = MF.getFrameInfo(); 246 if (!MFI.hasStackObjects() && !HasCSRs) { 247 SaveBlocks.clear(); 248 RestoreBlocks.clear(); 249 return false; 250 } 251 252 MachineRegisterInfo &MRI = MF.getRegInfo(); 253 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 254 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() 255 && EnableSpillVGPRToAGPR; 256 257 bool MadeChange = false; 258 259 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); 260 261 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 262 // handled as SpilledToReg in regular PrologEpilogInserter. 263 if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || 264 SpillVGPRToAGPR) { 265 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 266 // are spilled to VGPRs, in which case we can eliminate the stack usage. 267 // 268 // This operates under the assumption that only other SGPR spills are users 269 // of the frame index. 270 for (MachineBasicBlock &MBB : MF) { 271 MachineBasicBlock::iterator Next; 272 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { 273 MachineInstr &MI = *I; 274 Next = std::next(I); 275 276 if (SpillToAGPR && TII->isVGPRSpill(MI)) { 277 // Try to eliminate stack used by VGPR spills before frame 278 // finalization. 279 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 280 AMDGPU::OpName::vaddr); 281 int FI = MI.getOperand(FIOp).getIndex(); 282 Register VReg = 283 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 284 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, 285 TRI->isAGPR(MRI, VReg))) { 286 TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); 287 continue; 288 } 289 } 290 291 if (!TII->isSGPRSpill(MI)) 292 continue; 293 294 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 295 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 296 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 297 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); 298 (void)Spilled; 299 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 300 } 301 } 302 } 303 304 for (MachineBasicBlock &MBB : MF) { 305 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 306 MBB.addLiveIn(SSpill.VGPR); 307 308 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) 309 MBB.addLiveIn(Reg); 310 311 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) 312 MBB.addLiveIn(Reg); 313 314 MBB.sortUniqueLiveIns(); 315 } 316 317 MadeChange = true; 318 } 319 320 SaveBlocks.clear(); 321 RestoreBlocks.clear(); 322 323 return MadeChange; 324 } 325