1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 #include "llvm/InitializePasses.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "si-lower-sgpr-spills" 29 30 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 31 32 namespace { 33 34 class SILowerSGPRSpills : public MachineFunctionPass { 35 private: 36 const SIRegisterInfo *TRI = nullptr; 37 const SIInstrInfo *TII = nullptr; 38 VirtRegMap *VRM = nullptr; 39 LiveIntervals *LIS = nullptr; 40 41 // Save and Restore blocks of the current function. Typically there is a 42 // single save block, unless Windows EH funclets are involved. 43 MBBVector SaveBlocks; 44 MBBVector RestoreBlocks; 45 46 public: 47 static char ID; 48 49 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 50 51 void calculateSaveRestoreBlocks(MachineFunction &MF); 52 bool spillCalleeSavedRegs(MachineFunction &MF); 53 54 bool runOnMachineFunction(MachineFunction &MF) override; 55 56 void getAnalysisUsage(AnalysisUsage &AU) const override { 57 AU.setPreservesAll(); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 }; 61 62 } // end anonymous namespace 63 64 char SILowerSGPRSpills::ID = 0; 65 66 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 67 "SI lower SGPR spill instructions", false, false) 68 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 69 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 70 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 71 "SI lower SGPR spill instructions", false, false) 72 73 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 74 75 /// Insert restore code for the callee-saved registers used in the function. 76 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 77 ArrayRef<CalleeSavedInfo> CSI, 78 LiveIntervals *LIS) { 79 MachineFunction &MF = *SaveBlock.getParent(); 80 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 81 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 82 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 83 84 MachineBasicBlock::iterator I = SaveBlock.begin(); 85 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 86 const MachineRegisterInfo &MRI = MF.getRegInfo(); 87 88 for (const CalleeSavedInfo &CS : CSI) { 89 // Insert the spill to the stack frame. 90 MCRegister Reg = CS.getReg(); 91 92 MachineInstrSpan MIS(I, &SaveBlock); 93 const TargetRegisterClass *RC = 94 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 95 96 // If this value was already livein, we probably have a direct use of the 97 // incoming register value, so don't kill at the spill point. This happens 98 // since we pass some special inputs (workgroup IDs) in the callee saved 99 // range. 100 const bool IsLiveIn = MRI.isLiveIn(Reg); 101 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 102 RC, TRI); 103 104 if (LIS) { 105 assert(std::distance(MIS.begin(), I) == 1); 106 MachineInstr &Inst = *std::prev(I); 107 108 LIS->InsertMachineInstrInMaps(Inst); 109 LIS->removeAllRegUnitsForPhysReg(Reg); 110 } 111 } 112 } 113 } 114 115 /// Insert restore code for the callee-saved registers used in the function. 116 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 117 MutableArrayRef<CalleeSavedInfo> CSI, 118 LiveIntervals *LIS) { 119 MachineFunction &MF = *RestoreBlock.getParent(); 120 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 121 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 122 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 123 124 // Restore all registers immediately before the return and any 125 // terminators that precede it. 126 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 127 128 // FIXME: Just emit the readlane/writelane directly 129 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 130 for (const CalleeSavedInfo &CI : reverse(CSI)) { 131 unsigned Reg = CI.getReg(); 132 const TargetRegisterClass *RC = 133 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 134 135 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 136 assert(I != RestoreBlock.begin() && 137 "loadRegFromStackSlot didn't insert any code!"); 138 // Insert in reverse order. loadRegFromStackSlot can insert 139 // multiple instructions. 140 141 if (LIS) { 142 MachineInstr &Inst = *std::prev(I); 143 LIS->InsertMachineInstrInMaps(Inst); 144 LIS->removeAllRegUnitsForPhysReg(Reg); 145 } 146 } 147 } 148 } 149 150 /// Compute the sets of entry and return blocks for saving and restoring 151 /// callee-saved registers, and placing prolog and epilog code. 152 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 153 const MachineFrameInfo &MFI = MF.getFrameInfo(); 154 155 // Even when we do not change any CSR, we still want to insert the 156 // prologue and epilogue of the function. 157 // So set the save points for those. 158 159 // Use the points found by shrink-wrapping, if any. 160 if (MFI.getSavePoint()) { 161 SaveBlocks.push_back(MFI.getSavePoint()); 162 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 163 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 164 // If RestoreBlock does not have any successor and is not a return block 165 // then the end point is unreachable and we do not need to insert any 166 // epilogue. 167 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 168 RestoreBlocks.push_back(RestoreBlock); 169 return; 170 } 171 172 // Save refs to entry and return blocks. 173 SaveBlocks.push_back(&MF.front()); 174 for (MachineBasicBlock &MBB : MF) { 175 if (MBB.isEHFuncletEntry()) 176 SaveBlocks.push_back(&MBB); 177 if (MBB.isReturnBlock()) 178 RestoreBlocks.push_back(&MBB); 179 } 180 } 181 182 // TODO: To support shrink wrapping, this would need to copy 183 // PrologEpilogInserter's updateLiveness. 184 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 185 MachineBasicBlock &EntryBB = MF.front(); 186 187 for (const CalleeSavedInfo &CSIReg : CSI) 188 EntryBB.addLiveIn(CSIReg.getReg()); 189 EntryBB.sortUniqueLiveIns(); 190 } 191 192 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 193 MachineRegisterInfo &MRI = MF.getRegInfo(); 194 const Function &F = MF.getFunction(); 195 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 196 const SIFrameLowering *TFI = ST.getFrameLowering(); 197 MachineFrameInfo &MFI = MF.getFrameInfo(); 198 RegScavenger *RS = nullptr; 199 200 // Determine which of the registers in the callee save list should be saved. 201 BitVector SavedRegs; 202 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 203 204 // Add the code to save and restore the callee saved registers. 205 if (!F.hasFnAttribute(Attribute::Naked)) { 206 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 207 // necessary for verifier liveness checks. 208 MFI.setCalleeSavedInfoValid(true); 209 210 std::vector<CalleeSavedInfo> CSI; 211 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 212 213 for (unsigned I = 0; CSRegs[I]; ++I) { 214 MCRegister Reg = CSRegs[I]; 215 216 if (SavedRegs.test(Reg)) { 217 const TargetRegisterClass *RC = 218 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 219 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 220 TRI->getSpillAlign(*RC), true); 221 222 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 223 } 224 } 225 226 if (!CSI.empty()) { 227 for (MachineBasicBlock *SaveBlock : SaveBlocks) 228 insertCSRSaves(*SaveBlock, CSI, LIS); 229 230 // Add live ins to save blocks. 231 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 232 updateLiveness(MF, CSI); 233 234 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 235 insertCSRRestores(*RestoreBlock, CSI, LIS); 236 return true; 237 } 238 } 239 240 return false; 241 } 242 243 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills. 244 static bool lowerShiftReservedVGPR(MachineFunction &MF, 245 const GCNSubtarget &ST) { 246 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 247 const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill; 248 // Early out if pre-reservation of a VGPR for SGPR spilling is disabled. 249 if (!PreReservedVGPR) 250 return false; 251 252 // If there are no free lower VGPRs available, default to using the 253 // pre-reserved register instead. 254 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 255 Register LowestAvailableVGPR = 256 TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF); 257 if (!LowestAvailableVGPR) 258 LowestAvailableVGPR = PreReservedVGPR; 259 260 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 261 // Create a stack object for a possible spill in the function prologue. 262 // Note Non-CSR VGPR also need this as we may overwrite inactive lanes. 263 Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4)); 264 265 // Find saved info about the pre-reserved register. 266 const auto *ReservedVGPRInfoItr = 267 llvm::find_if(FuncInfo->getSGPRSpillVGPRs(), 268 [PreReservedVGPR](const auto &SpillRegInfo) { 269 return SpillRegInfo.VGPR == PreReservedVGPR; 270 }); 271 272 assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end()); 273 auto Index = 274 std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr); 275 276 FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index); 277 278 for (MachineBasicBlock &MBB : MF) { 279 assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR"); 280 MBB.addLiveIn(LowestAvailableVGPR); 281 MBB.sortUniqueLiveIns(); 282 } 283 284 return true; 285 } 286 287 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 288 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 289 TII = ST.getInstrInfo(); 290 TRI = &TII->getRegisterInfo(); 291 292 VRM = getAnalysisIfAvailable<VirtRegMap>(); 293 LIS = getAnalysisIfAvailable<LiveIntervals>(); 294 295 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 296 297 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 298 // does, but somewhat simpler. 299 calculateSaveRestoreBlocks(MF); 300 bool HasCSRs = spillCalleeSavedRegs(MF); 301 302 MachineFrameInfo &MFI = MF.getFrameInfo(); 303 MachineRegisterInfo &MRI = MF.getRegInfo(); 304 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 305 306 if (!MFI.hasStackObjects() && !HasCSRs) { 307 SaveBlocks.clear(); 308 RestoreBlocks.clear(); 309 if (FuncInfo->VGPRReservedForSGPRSpill) { 310 // Free the reserved VGPR for later possible use by frame lowering. 311 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); 312 MRI.freezeReservedRegs(MF); 313 } 314 return false; 315 } 316 317 bool MadeChange = false; 318 bool NewReservedRegs = false; 319 320 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 321 // handled as SpilledToReg in regular PrologEpilogInserter. 322 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 323 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 324 if (HasSGPRSpillToVGPR) { 325 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 326 // are spilled to VGPRs, in which case we can eliminate the stack usage. 327 // 328 // This operates under the assumption that only other SGPR spills are users 329 // of the frame index. 330 331 lowerShiftReservedVGPR(MF, ST); 332 333 // To track the spill frame indices handled in this pass. 334 BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 335 336 for (MachineBasicBlock &MBB : MF) { 337 MachineBasicBlock::iterator Next; 338 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { 339 MachineInstr &MI = *I; 340 Next = std::next(I); 341 342 if (!TII->isSGPRSpill(MI)) 343 continue; 344 345 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 346 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 347 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 348 NewReservedRegs = true; 349 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, 350 nullptr, LIS); 351 (void)Spilled; 352 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 353 SpillFIs.set(FI); 354 } 355 } 356 } 357 358 // FIXME: Adding to live-ins redundant with reserving registers. 359 for (MachineBasicBlock &MBB : MF) { 360 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 361 MBB.addLiveIn(SSpill.VGPR); 362 MBB.sortUniqueLiveIns(); 363 364 // FIXME: The dead frame indices are replaced with a null register from 365 // the debug value instructions. We should instead, update it with the 366 // correct register value. But not sure the register value alone is 367 // adequate to lower the DIExpression. It should be worked out later. 368 for (MachineInstr &MI : MBB) { 369 if (MI.isDebugValue() && MI.getOperand(0).isFI() && 370 SpillFIs[MI.getOperand(0).getIndex()]) { 371 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); 372 MI.getOperand(0).setIsDebug(); 373 } 374 } 375 } 376 377 MadeChange = true; 378 } else if (FuncInfo->VGPRReservedForSGPRSpill) { 379 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); 380 } 381 382 SaveBlocks.clear(); 383 RestoreBlocks.clear(); 384 385 // Updated the reserved registers with any VGPRs added for SGPR spills. 386 if (NewReservedRegs) 387 MRI.freezeReservedRegs(MF); 388 389 return MadeChange; 390 } 391