1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/VirtRegMap.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Target/TargetMachine.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "si-lower-sgpr-spills" 36 37 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 38 39 namespace { 40 41 static cl::opt<bool> EnableSpillVGPRToAGPR( 42 "amdgpu-spill-vgpr-to-agpr", 43 cl::desc("Enable spilling VGPRs to AGPRs"), 44 cl::ReallyHidden, 45 cl::init(true)); 46 47 class SILowerSGPRSpills : public MachineFunctionPass { 48 private: 49 const SIRegisterInfo *TRI = nullptr; 50 const SIInstrInfo *TII = nullptr; 51 VirtRegMap *VRM = nullptr; 52 LiveIntervals *LIS = nullptr; 53 54 // Save and Restore blocks of the current function. Typically there is a 55 // single save block, unless Windows EH funclets are involved. 56 MBBVector SaveBlocks; 57 MBBVector RestoreBlocks; 58 59 public: 60 static char ID; 61 62 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 63 64 void calculateSaveRestoreBlocks(MachineFunction &MF); 65 bool spillCalleeSavedRegs(MachineFunction &MF); 66 67 bool runOnMachineFunction(MachineFunction &MF) override; 68 69 void getAnalysisUsage(AnalysisUsage &AU) const override { 70 AU.setPreservesAll(); 71 MachineFunctionPass::getAnalysisUsage(AU); 72 } 73 }; 74 75 } // end anonymous namespace 76 77 char SILowerSGPRSpills::ID = 0; 78 79 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 80 "SI lower SGPR spill instructions", false, false) 81 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 82 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 83 "SI lower SGPR spill instructions", false, false) 84 85 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 86 87 /// Insert restore code for the callee-saved registers used in the function. 88 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 89 ArrayRef<CalleeSavedInfo> CSI, 90 LiveIntervals *LIS) { 91 MachineFunction &MF = *SaveBlock.getParent(); 92 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 93 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 94 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 95 96 MachineBasicBlock::iterator I = SaveBlock.begin(); 97 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 98 for (const CalleeSavedInfo &CS : CSI) { 99 // Insert the spill to the stack frame. 100 unsigned Reg = CS.getReg(); 101 102 MachineInstrSpan MIS(I, &SaveBlock); 103 const TargetRegisterClass *RC = 104 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 105 106 TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, 107 TRI); 108 109 if (LIS) { 110 assert(std::distance(MIS.begin(), I) == 1); 111 MachineInstr &Inst = *std::prev(I); 112 113 LIS->InsertMachineInstrInMaps(Inst); 114 LIS->removeAllRegUnitsForPhysReg(Reg); 115 } 116 } 117 } 118 } 119 120 /// Insert restore code for the callee-saved registers used in the function. 121 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 122 MutableArrayRef<CalleeSavedInfo> CSI, 123 LiveIntervals *LIS) { 124 MachineFunction &MF = *RestoreBlock.getParent(); 125 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 126 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 127 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 128 129 // Restore all registers immediately before the return and any 130 // terminators that precede it. 131 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 132 133 // FIXME: Just emit the readlane/writelane directly 134 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 135 for (const CalleeSavedInfo &CI : reverse(CSI)) { 136 unsigned Reg = CI.getReg(); 137 const TargetRegisterClass *RC = 138 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 139 140 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 141 assert(I != RestoreBlock.begin() && 142 "loadRegFromStackSlot didn't insert any code!"); 143 // Insert in reverse order. loadRegFromStackSlot can insert 144 // multiple instructions. 145 146 if (LIS) { 147 MachineInstr &Inst = *std::prev(I); 148 LIS->InsertMachineInstrInMaps(Inst); 149 LIS->removeAllRegUnitsForPhysReg(Reg); 150 } 151 } 152 } 153 } 154 155 /// Compute the sets of entry and return blocks for saving and restoring 156 /// callee-saved registers, and placing prolog and epilog code. 157 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 158 const MachineFrameInfo &MFI = MF.getFrameInfo(); 159 160 // Even when we do not change any CSR, we still want to insert the 161 // prologue and epilogue of the function. 162 // So set the save points for those. 163 164 // Use the points found by shrink-wrapping, if any. 165 if (MFI.getSavePoint()) { 166 SaveBlocks.push_back(MFI.getSavePoint()); 167 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 168 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 169 // If RestoreBlock does not have any successor and is not a return block 170 // then the end point is unreachable and we do not need to insert any 171 // epilogue. 172 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 173 RestoreBlocks.push_back(RestoreBlock); 174 return; 175 } 176 177 // Save refs to entry and return blocks. 178 SaveBlocks.push_back(&MF.front()); 179 for (MachineBasicBlock &MBB : MF) { 180 if (MBB.isEHFuncletEntry()) 181 SaveBlocks.push_back(&MBB); 182 if (MBB.isReturnBlock()) 183 RestoreBlocks.push_back(&MBB); 184 } 185 } 186 187 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 188 MachineRegisterInfo &MRI = MF.getRegInfo(); 189 const Function &F = MF.getFunction(); 190 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 191 const SIFrameLowering *TFI = ST.getFrameLowering(); 192 MachineFrameInfo &MFI = MF.getFrameInfo(); 193 RegScavenger *RS = nullptr; 194 195 // Determine which of the registers in the callee save list should be saved. 196 BitVector SavedRegs; 197 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 198 199 // Add the code to save and restore the callee saved registers. 200 if (!F.hasFnAttribute(Attribute::Naked)) { 201 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 202 // necessary for verifier liveness checks. 203 MFI.setCalleeSavedInfoValid(true); 204 205 std::vector<CalleeSavedInfo> CSI; 206 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 207 208 for (unsigned I = 0; CSRegs[I]; ++I) { 209 unsigned Reg = CSRegs[I]; 210 if (SavedRegs.test(Reg)) { 211 const TargetRegisterClass *RC = 212 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 213 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 214 TRI->getSpillAlign(*RC), true); 215 216 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 217 } 218 } 219 220 if (!CSI.empty()) { 221 for (MachineBasicBlock *SaveBlock : SaveBlocks) 222 insertCSRSaves(*SaveBlock, CSI, LIS); 223 224 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 225 insertCSRRestores(*RestoreBlock, CSI, LIS); 226 return true; 227 } 228 } 229 230 return false; 231 } 232 233 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills. 234 static bool lowerShiftReservedVGPR(MachineFunction &MF, 235 const GCNSubtarget &ST) { 236 MachineRegisterInfo &MRI = MF.getRegInfo(); 237 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 238 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 239 Register LowestAvailableVGPR, ReservedVGPR; 240 ArrayRef<MCPhysReg> AllVGPR32s = ST.getRegisterInfo()->getAllVGPR32(MF); 241 for (MCPhysReg Reg : AllVGPR32s) { 242 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) { 243 LowestAvailableVGPR = Reg; 244 break; 245 } 246 } 247 248 if (!LowestAvailableVGPR) 249 return false; 250 251 ReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill; 252 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 253 int i = 0; 254 255 for (MachineBasicBlock &MBB : MF) { 256 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) { 257 if (Reg.VGPR == ReservedVGPR) { 258 MBB.removeLiveIn(ReservedVGPR); 259 MBB.addLiveIn(LowestAvailableVGPR); 260 Optional<int> FI; 261 if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR)) 262 FI = FrameInfo.CreateSpillStackObject(4, Align(4)); 263 264 FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, i); 265 } 266 ++i; 267 } 268 MBB.sortUniqueLiveIns(); 269 } 270 271 return true; 272 } 273 274 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 275 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 276 TII = ST.getInstrInfo(); 277 TRI = &TII->getRegisterInfo(); 278 279 VRM = getAnalysisIfAvailable<VirtRegMap>(); 280 281 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 282 283 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 284 // does, but somewhat simpler. 285 calculateSaveRestoreBlocks(MF); 286 bool HasCSRs = spillCalleeSavedRegs(MF); 287 288 MachineFrameInfo &MFI = MF.getFrameInfo(); 289 if (!MFI.hasStackObjects() && !HasCSRs) { 290 SaveBlocks.clear(); 291 RestoreBlocks.clear(); 292 return false; 293 } 294 295 MachineRegisterInfo &MRI = MF.getRegInfo(); 296 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 297 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() 298 && EnableSpillVGPRToAGPR; 299 300 bool MadeChange = false; 301 302 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); 303 304 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 305 // handled as SpilledToReg in regular PrologEpilogInserter. 306 if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || 307 SpillVGPRToAGPR) { 308 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 309 // are spilled to VGPRs, in which case we can eliminate the stack usage. 310 // 311 // This operates under the assumption that only other SGPR spills are users 312 // of the frame index. 313 314 lowerShiftReservedVGPR(MF, ST); 315 316 for (MachineBasicBlock &MBB : MF) { 317 MachineBasicBlock::iterator Next; 318 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { 319 MachineInstr &MI = *I; 320 Next = std::next(I); 321 322 if (SpillToAGPR && TII->isVGPRSpill(MI)) { 323 // Try to eliminate stack used by VGPR spills before frame 324 // finalization. 325 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 326 AMDGPU::OpName::vaddr); 327 int FI = MI.getOperand(FIOp).getIndex(); 328 Register VReg = 329 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 330 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, 331 TRI->isAGPR(MRI, VReg))) { 332 TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); 333 continue; 334 } 335 } 336 337 if (!TII->isSGPRSpill(MI)) 338 continue; 339 340 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 341 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 342 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 343 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); 344 (void)Spilled; 345 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 346 } 347 } 348 } 349 350 for (MachineBasicBlock &MBB : MF) { 351 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 352 MBB.addLiveIn(SSpill.VGPR); 353 354 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) 355 MBB.addLiveIn(Reg); 356 357 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) 358 MBB.addLiveIn(Reg); 359 360 MBB.sortUniqueLiveIns(); 361 } 362 363 MadeChange = true; 364 } else if (FuncInfo->VGPRReservedForSGPRSpill) { 365 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); 366 } 367 368 SaveBlocks.clear(); 369 RestoreBlocks.clear(); 370 371 return MadeChange; 372 } 373