1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 #include "llvm/InitializePasses.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "si-lower-sgpr-spills" 29 30 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 31 32 namespace { 33 34 class SILowerSGPRSpills : public MachineFunctionPass { 35 private: 36 const SIRegisterInfo *TRI = nullptr; 37 const SIInstrInfo *TII = nullptr; 38 LiveIntervals *LIS = nullptr; 39 40 // Save and Restore blocks of the current function. Typically there is a 41 // single save block, unless Windows EH funclets are involved. 42 MBBVector SaveBlocks; 43 MBBVector RestoreBlocks; 44 45 public: 46 static char ID; 47 48 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 49 50 void calculateSaveRestoreBlocks(MachineFunction &MF); 51 bool spillCalleeSavedRegs(MachineFunction &MF); 52 53 bool runOnMachineFunction(MachineFunction &MF) override; 54 55 void getAnalysisUsage(AnalysisUsage &AU) const override { 56 AU.setPreservesAll(); 57 MachineFunctionPass::getAnalysisUsage(AU); 58 } 59 }; 60 61 } // end anonymous namespace 62 63 char SILowerSGPRSpills::ID = 0; 64 65 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 66 "SI lower SGPR spill instructions", false, false) 67 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 68 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 69 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 70 "SI lower SGPR spill instructions", false, false) 71 72 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 73 74 /// Insert restore code for the callee-saved registers used in the function. 75 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 76 ArrayRef<CalleeSavedInfo> CSI, 77 LiveIntervals *LIS) { 78 MachineFunction &MF = *SaveBlock.getParent(); 79 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 80 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 81 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 82 83 MachineBasicBlock::iterator I = SaveBlock.begin(); 84 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 85 const MachineRegisterInfo &MRI = MF.getRegInfo(); 86 87 for (const CalleeSavedInfo &CS : CSI) { 88 // Insert the spill to the stack frame. 89 MCRegister Reg = CS.getReg(); 90 91 MachineInstrSpan MIS(I, &SaveBlock); 92 const TargetRegisterClass *RC = 93 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 94 95 // If this value was already livein, we probably have a direct use of the 96 // incoming register value, so don't kill at the spill point. This happens 97 // since we pass some special inputs (workgroup IDs) in the callee saved 98 // range. 99 const bool IsLiveIn = MRI.isLiveIn(Reg); 100 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 101 RC, TRI); 102 103 if (LIS) { 104 assert(std::distance(MIS.begin(), I) == 1); 105 MachineInstr &Inst = *std::prev(I); 106 107 LIS->InsertMachineInstrInMaps(Inst); 108 LIS->removeAllRegUnitsForPhysReg(Reg); 109 } 110 } 111 } 112 } 113 114 /// Insert restore code for the callee-saved registers used in the function. 115 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 116 MutableArrayRef<CalleeSavedInfo> CSI, 117 LiveIntervals *LIS) { 118 MachineFunction &MF = *RestoreBlock.getParent(); 119 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 120 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 121 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 122 123 // Restore all registers immediately before the return and any 124 // terminators that precede it. 125 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 126 127 // FIXME: Just emit the readlane/writelane directly 128 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 129 for (const CalleeSavedInfo &CI : reverse(CSI)) { 130 Register Reg = CI.getReg(); 131 const TargetRegisterClass *RC = 132 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 133 134 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 135 assert(I != RestoreBlock.begin() && 136 "loadRegFromStackSlot didn't insert any code!"); 137 // Insert in reverse order. loadRegFromStackSlot can insert 138 // multiple instructions. 139 140 if (LIS) { 141 MachineInstr &Inst = *std::prev(I); 142 LIS->InsertMachineInstrInMaps(Inst); 143 LIS->removeAllRegUnitsForPhysReg(Reg); 144 } 145 } 146 } 147 } 148 149 /// Compute the sets of entry and return blocks for saving and restoring 150 /// callee-saved registers, and placing prolog and epilog code. 151 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 152 const MachineFrameInfo &MFI = MF.getFrameInfo(); 153 154 // Even when we do not change any CSR, we still want to insert the 155 // prologue and epilogue of the function. 156 // So set the save points for those. 157 158 // Use the points found by shrink-wrapping, if any. 159 if (MFI.getSavePoint()) { 160 SaveBlocks.push_back(MFI.getSavePoint()); 161 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 162 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 163 // If RestoreBlock does not have any successor and is not a return block 164 // then the end point is unreachable and we do not need to insert any 165 // epilogue. 166 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 167 RestoreBlocks.push_back(RestoreBlock); 168 return; 169 } 170 171 // Save refs to entry and return blocks. 172 SaveBlocks.push_back(&MF.front()); 173 for (MachineBasicBlock &MBB : MF) { 174 if (MBB.isEHFuncletEntry()) 175 SaveBlocks.push_back(&MBB); 176 if (MBB.isReturnBlock()) 177 RestoreBlocks.push_back(&MBB); 178 } 179 } 180 181 // TODO: To support shrink wrapping, this would need to copy 182 // PrologEpilogInserter's updateLiveness. 183 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 184 MachineBasicBlock &EntryBB = MF.front(); 185 186 for (const CalleeSavedInfo &CSIReg : CSI) 187 EntryBB.addLiveIn(CSIReg.getReg()); 188 EntryBB.sortUniqueLiveIns(); 189 } 190 191 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 192 MachineRegisterInfo &MRI = MF.getRegInfo(); 193 const Function &F = MF.getFunction(); 194 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 195 const SIFrameLowering *TFI = ST.getFrameLowering(); 196 MachineFrameInfo &MFI = MF.getFrameInfo(); 197 RegScavenger *RS = nullptr; 198 199 // Determine which of the registers in the callee save list should be saved. 200 BitVector SavedRegs; 201 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 202 203 // Add the code to save and restore the callee saved registers. 204 if (!F.hasFnAttribute(Attribute::Naked)) { 205 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 206 // necessary for verifier liveness checks. 207 MFI.setCalleeSavedInfoValid(true); 208 209 std::vector<CalleeSavedInfo> CSI; 210 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 211 212 for (unsigned I = 0; CSRegs[I]; ++I) { 213 MCRegister Reg = CSRegs[I]; 214 215 if (SavedRegs.test(Reg)) { 216 const TargetRegisterClass *RC = 217 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 218 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 219 TRI->getSpillAlign(*RC), true); 220 221 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 222 } 223 } 224 225 if (!CSI.empty()) { 226 for (MachineBasicBlock *SaveBlock : SaveBlocks) 227 insertCSRSaves(*SaveBlock, CSI, LIS); 228 229 // Add live ins to save blocks. 230 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 231 updateLiveness(MF, CSI); 232 233 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 234 insertCSRRestores(*RestoreBlock, CSI, LIS); 235 return true; 236 } 237 } 238 239 return false; 240 } 241 242 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 243 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 244 TII = ST.getInstrInfo(); 245 TRI = &TII->getRegisterInfo(); 246 247 LIS = getAnalysisIfAvailable<LiveIntervals>(); 248 249 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 250 251 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 252 // does, but somewhat simpler. 253 calculateSaveRestoreBlocks(MF); 254 bool HasCSRs = spillCalleeSavedRegs(MF); 255 256 MachineFrameInfo &MFI = MF.getFrameInfo(); 257 MachineRegisterInfo &MRI = MF.getRegInfo(); 258 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 259 260 if (!MFI.hasStackObjects() && !HasCSRs) { 261 SaveBlocks.clear(); 262 RestoreBlocks.clear(); 263 return false; 264 } 265 266 bool MadeChange = false; 267 bool NewReservedRegs = false; 268 269 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 270 // handled as SpilledToReg in regular PrologEpilogInserter. 271 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 272 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 273 if (HasSGPRSpillToVGPR) { 274 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 275 // are spilled to VGPRs, in which case we can eliminate the stack usage. 276 // 277 // This operates under the assumption that only other SGPR spills are users 278 // of the frame index. 279 280 // To track the spill frame indices handled in this pass. 281 BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 282 283 for (MachineBasicBlock &MBB : MF) { 284 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 285 if (!TII->isSGPRSpill(MI)) 286 continue; 287 288 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 289 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 290 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 291 NewReservedRegs = true; 292 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, 293 nullptr, LIS); 294 (void)Spilled; 295 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 296 SpillFIs.set(FI); 297 } 298 } 299 } 300 301 // FIXME: Adding to live-ins redundant with reserving registers. 302 for (MachineBasicBlock &MBB : MF) { 303 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 304 MBB.addLiveIn(SSpill.VGPR); 305 MBB.sortUniqueLiveIns(); 306 307 // FIXME: The dead frame indices are replaced with a null register from 308 // the debug value instructions. We should instead, update it with the 309 // correct register value. But not sure the register value alone is 310 // adequate to lower the DIExpression. It should be worked out later. 311 for (MachineInstr &MI : MBB) { 312 if (MI.isDebugValue() && MI.getOperand(0).isFI() && 313 SpillFIs[MI.getOperand(0).getIndex()]) { 314 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); 315 } 316 } 317 } 318 319 // All those frame indices which are dead by now should be removed from the 320 // function frame. Otherwise, there is a side effect such as re-mapping of 321 // free frame index ids by the later pass(es) like "stack slot coloring" 322 // which in turn could mess-up with the book keeping of "frame index to VGPR 323 // lane". 324 FuncInfo->removeDeadFrameIndices(MFI); 325 326 MadeChange = true; 327 } 328 329 SaveBlocks.clear(); 330 RestoreBlocks.clear(); 331 332 // Updated the reserved registers with any VGPRs added for SGPR spills. 333 if (NewReservedRegs) 334 MRI.freezeReservedRegs(MF); 335 336 return MadeChange; 337 } 338