1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/RegisterScavenging.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-lower-sgpr-spills" 30 31 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 32 33 namespace { 34 35 class SILowerSGPRSpills : public MachineFunctionPass { 36 private: 37 const SIRegisterInfo *TRI = nullptr; 38 const SIInstrInfo *TII = nullptr; 39 LiveIntervals *LIS = nullptr; 40 SlotIndexes *Indexes = nullptr; 41 42 // Save and Restore blocks of the current function. Typically there is a 43 // single save block, unless Windows EH funclets are involved. 44 MBBVector SaveBlocks; 45 MBBVector RestoreBlocks; 46 47 public: 48 static char ID; 49 50 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 51 52 void calculateSaveRestoreBlocks(MachineFunction &MF); 53 bool spillCalleeSavedRegs(MachineFunction &MF); 54 55 bool runOnMachineFunction(MachineFunction &MF) override; 56 57 void getAnalysisUsage(AnalysisUsage &AU) const override { 58 AU.setPreservesAll(); 59 MachineFunctionPass::getAnalysisUsage(AU); 60 } 61 }; 62 63 } // end anonymous namespace 64 65 char SILowerSGPRSpills::ID = 0; 66 67 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 68 "SI lower SGPR spill instructions", false, false) 69 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 70 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 71 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 72 "SI lower SGPR spill instructions", false, false) 73 74 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 75 76 /// Insert spill code for the callee-saved registers used in the function. 77 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 78 ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes, 79 LiveIntervals *LIS) { 80 MachineFunction &MF = *SaveBlock.getParent(); 81 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 82 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 83 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 84 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 85 const SIRegisterInfo *RI = ST.getRegisterInfo(); 86 87 MachineBasicBlock::iterator I = SaveBlock.begin(); 88 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 89 const MachineRegisterInfo &MRI = MF.getRegInfo(); 90 91 for (const CalleeSavedInfo &CS : CSI) { 92 // Insert the spill to the stack frame. 93 MCRegister Reg = CS.getReg(); 94 95 MachineInstrSpan MIS(I, &SaveBlock); 96 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 97 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 98 99 // If this value was already livein, we probably have a direct use of the 100 // incoming register value, so don't kill at the spill point. This happens 101 // since we pass some special inputs (workgroup IDs) in the callee saved 102 // range. 103 const bool IsLiveIn = MRI.isLiveIn(Reg); 104 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 105 RC, TRI, Register()); 106 107 if (Indexes) { 108 assert(std::distance(MIS.begin(), I) == 1); 109 MachineInstr &Inst = *std::prev(I); 110 Indexes->insertMachineInstrInMaps(Inst); 111 } 112 113 if (LIS) 114 LIS->removeAllRegUnitsForPhysReg(Reg); 115 } 116 } 117 } 118 119 /// Insert restore code for the callee-saved registers used in the function. 120 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 121 MutableArrayRef<CalleeSavedInfo> CSI, 122 SlotIndexes *Indexes, LiveIntervals *LIS) { 123 MachineFunction &MF = *RestoreBlock.getParent(); 124 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 125 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 126 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 127 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 128 const SIRegisterInfo *RI = ST.getRegisterInfo(); 129 // Restore all registers immediately before the return and any 130 // terminators that precede it. 131 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 132 133 // FIXME: Just emit the readlane/writelane directly 134 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 135 for (const CalleeSavedInfo &CI : reverse(CSI)) { 136 Register Reg = CI.getReg(); 137 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 138 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 139 140 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI, 141 Register()); 142 assert(I != RestoreBlock.begin() && 143 "loadRegFromStackSlot didn't insert any code!"); 144 // Insert in reverse order. loadRegFromStackSlot can insert 145 // multiple instructions. 146 147 if (Indexes) { 148 MachineInstr &Inst = *std::prev(I); 149 Indexes->insertMachineInstrInMaps(Inst); 150 } 151 152 if (LIS) 153 LIS->removeAllRegUnitsForPhysReg(Reg); 154 } 155 } 156 } 157 158 /// Compute the sets of entry and return blocks for saving and restoring 159 /// callee-saved registers, and placing prolog and epilog code. 160 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 161 const MachineFrameInfo &MFI = MF.getFrameInfo(); 162 163 // Even when we do not change any CSR, we still want to insert the 164 // prologue and epilogue of the function. 165 // So set the save points for those. 166 167 // Use the points found by shrink-wrapping, if any. 168 if (MFI.getSavePoint()) { 169 SaveBlocks.push_back(MFI.getSavePoint()); 170 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 171 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 172 // If RestoreBlock does not have any successor and is not a return block 173 // then the end point is unreachable and we do not need to insert any 174 // epilogue. 175 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 176 RestoreBlocks.push_back(RestoreBlock); 177 return; 178 } 179 180 // Save refs to entry and return blocks. 181 SaveBlocks.push_back(&MF.front()); 182 for (MachineBasicBlock &MBB : MF) { 183 if (MBB.isEHFuncletEntry()) 184 SaveBlocks.push_back(&MBB); 185 if (MBB.isReturnBlock()) 186 RestoreBlocks.push_back(&MBB); 187 } 188 } 189 190 // TODO: To support shrink wrapping, this would need to copy 191 // PrologEpilogInserter's updateLiveness. 192 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 193 MachineBasicBlock &EntryBB = MF.front(); 194 195 for (const CalleeSavedInfo &CSIReg : CSI) 196 EntryBB.addLiveIn(CSIReg.getReg()); 197 EntryBB.sortUniqueLiveIns(); 198 } 199 200 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 201 MachineRegisterInfo &MRI = MF.getRegInfo(); 202 const Function &F = MF.getFunction(); 203 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 204 const SIFrameLowering *TFI = ST.getFrameLowering(); 205 MachineFrameInfo &MFI = MF.getFrameInfo(); 206 RegScavenger *RS = nullptr; 207 208 // Determine which of the registers in the callee save list should be saved. 209 BitVector SavedRegs; 210 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 211 212 // Add the code to save and restore the callee saved registers. 213 if (!F.hasFnAttribute(Attribute::Naked)) { 214 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 215 // necessary for verifier liveness checks. 216 MFI.setCalleeSavedInfoValid(true); 217 218 std::vector<CalleeSavedInfo> CSI; 219 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 220 221 for (unsigned I = 0; CSRegs[I]; ++I) { 222 MCRegister Reg = CSRegs[I]; 223 224 if (SavedRegs.test(Reg)) { 225 const TargetRegisterClass *RC = 226 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 227 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 228 TRI->getSpillAlign(*RC), true); 229 230 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 231 } 232 } 233 234 if (!CSI.empty()) { 235 for (MachineBasicBlock *SaveBlock : SaveBlocks) 236 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); 237 238 // Add live ins to save blocks. 239 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 240 updateLiveness(MF, CSI); 241 242 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 243 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS); 244 return true; 245 } 246 } 247 248 return false; 249 } 250 251 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 252 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 253 TII = ST.getInstrInfo(); 254 TRI = &TII->getRegisterInfo(); 255 256 LIS = getAnalysisIfAvailable<LiveIntervals>(); 257 Indexes = getAnalysisIfAvailable<SlotIndexes>(); 258 259 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 260 261 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 262 // does, but somewhat simpler. 263 calculateSaveRestoreBlocks(MF); 264 bool HasCSRs = spillCalleeSavedRegs(MF); 265 266 MachineFrameInfo &MFI = MF.getFrameInfo(); 267 MachineRegisterInfo &MRI = MF.getRegInfo(); 268 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 269 270 if (!MFI.hasStackObjects() && !HasCSRs) { 271 SaveBlocks.clear(); 272 RestoreBlocks.clear(); 273 return false; 274 } 275 276 bool MadeChange = false; 277 bool NewReservedRegs = false; 278 279 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 280 // handled as SpilledToReg in regular PrologEpilogInserter. 281 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 282 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 283 if (HasSGPRSpillToVGPR) { 284 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 285 // are spilled to VGPRs, in which case we can eliminate the stack usage. 286 // 287 // This operates under the assumption that only other SGPR spills are users 288 // of the frame index. 289 290 // To track the spill frame indices handled in this pass. 291 BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 292 293 for (MachineBasicBlock &MBB : MF) { 294 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 295 if (!TII->isSGPRSpill(MI)) 296 continue; 297 298 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 299 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 300 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { 301 NewReservedRegs = true; 302 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 303 MI, FI, nullptr, Indexes, LIS); 304 (void)Spilled; 305 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 306 SpillFIs.set(FI); 307 } 308 } 309 } 310 311 // FIXME: Adding to live-ins redundant with reserving registers. 312 for (MachineBasicBlock &MBB : MF) { 313 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) 314 MBB.addLiveIn(Reg); 315 MBB.sortUniqueLiveIns(); 316 317 // FIXME: The dead frame indices are replaced with a null register from 318 // the debug value instructions. We should instead, update it with the 319 // correct register value. But not sure the register value alone is 320 // adequate to lower the DIExpression. It should be worked out later. 321 for (MachineInstr &MI : MBB) { 322 if (MI.isDebugValue() && MI.getOperand(0).isFI() && 323 !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && 324 SpillFIs[MI.getOperand(0).getIndex()]) { 325 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); 326 } 327 } 328 } 329 330 // All those frame indices which are dead by now should be removed from the 331 // function frame. Otherwise, there is a side effect such as re-mapping of 332 // free frame index ids by the later pass(es) like "stack slot coloring" 333 // which in turn could mess-up with the book keeping of "frame index to VGPR 334 // lane". 335 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); 336 337 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass(); 338 // Shift back the reserved SGPR for EXEC copy into the lowest range. 339 // This SGPR is reserved to handle the whole-wave spill/copy operations 340 // that might get inserted during vgpr regalloc. 341 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF); 342 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < 343 TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) 344 FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); 345 346 MadeChange = true; 347 } else { 348 // No SGPR spills and hence there won't be any WWM spills/copies. Reset the 349 // SGPR reserved for EXEC copy. 350 FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); 351 } 352 353 SaveBlocks.clear(); 354 RestoreBlocks.clear(); 355 356 // Updated the reserved registers with any VGPRs added for SGPR spills. 357 if (NewReservedRegs) 358 MRI.freezeReservedRegs(MF); 359 360 return MadeChange; 361 } 362