1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/RegisterScavenging.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-lower-sgpr-spills" 30 31 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 32 33 namespace { 34 35 class SILowerSGPRSpills : public MachineFunctionPass { 36 private: 37 const SIRegisterInfo *TRI = nullptr; 38 const SIInstrInfo *TII = nullptr; 39 LiveIntervals *LIS = nullptr; 40 SlotIndexes *Indexes = nullptr; 41 42 // Save and Restore blocks of the current function. Typically there is a 43 // single save block, unless Windows EH funclets are involved. 44 MBBVector SaveBlocks; 45 MBBVector RestoreBlocks; 46 47 public: 48 static char ID; 49 50 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 51 52 void calculateSaveRestoreBlocks(MachineFunction &MF); 53 bool spillCalleeSavedRegs(MachineFunction &MF, 54 SmallVectorImpl<int> &CalleeSavedFIs); 55 void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS); 56 57 bool runOnMachineFunction(MachineFunction &MF) override; 58 59 void getAnalysisUsage(AnalysisUsage &AU) const override { 60 AU.setPreservesAll(); 61 MachineFunctionPass::getAnalysisUsage(AU); 62 } 63 64 MachineFunctionProperties getClearedProperties() const override { 65 // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs. 66 return MachineFunctionProperties() 67 .set(MachineFunctionProperties::Property::IsSSA) 68 .set(MachineFunctionProperties::Property::NoVRegs); 69 } 70 }; 71 72 } // end anonymous namespace 73 74 char SILowerSGPRSpills::ID = 0; 75 76 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 77 "SI lower SGPR spill instructions", false, false) 78 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 79 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 80 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 81 "SI lower SGPR spill instructions", false, false) 82 83 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 84 85 /// Insert spill code for the callee-saved registers used in the function. 86 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 87 ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes, 88 LiveIntervals *LIS) { 89 MachineFunction &MF = *SaveBlock.getParent(); 90 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 91 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 92 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 93 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 94 const SIRegisterInfo *RI = ST.getRegisterInfo(); 95 96 MachineBasicBlock::iterator I = SaveBlock.begin(); 97 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 98 const MachineRegisterInfo &MRI = MF.getRegInfo(); 99 100 for (const CalleeSavedInfo &CS : CSI) { 101 // Insert the spill to the stack frame. 102 MCRegister Reg = CS.getReg(); 103 104 MachineInstrSpan MIS(I, &SaveBlock); 105 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 106 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 107 108 // If this value was already livein, we probably have a direct use of the 109 // incoming register value, so don't kill at the spill point. This happens 110 // since we pass some special inputs (workgroup IDs) in the callee saved 111 // range. 112 const bool IsLiveIn = MRI.isLiveIn(Reg); 113 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 114 RC, TRI, Register()); 115 116 if (Indexes) { 117 assert(std::distance(MIS.begin(), I) == 1); 118 MachineInstr &Inst = *std::prev(I); 119 Indexes->insertMachineInstrInMaps(Inst); 120 } 121 122 if (LIS) 123 LIS->removeAllRegUnitsForPhysReg(Reg); 124 } 125 } 126 } 127 128 /// Insert restore code for the callee-saved registers used in the function. 129 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 130 MutableArrayRef<CalleeSavedInfo> CSI, 131 SlotIndexes *Indexes, LiveIntervals *LIS) { 132 MachineFunction &MF = *RestoreBlock.getParent(); 133 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 134 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 135 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 136 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 137 const SIRegisterInfo *RI = ST.getRegisterInfo(); 138 // Restore all registers immediately before the return and any 139 // terminators that precede it. 140 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 141 142 // FIXME: Just emit the readlane/writelane directly 143 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 144 for (const CalleeSavedInfo &CI : reverse(CSI)) { 145 Register Reg = CI.getReg(); 146 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( 147 Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); 148 149 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI, 150 Register()); 151 assert(I != RestoreBlock.begin() && 152 "loadRegFromStackSlot didn't insert any code!"); 153 // Insert in reverse order. loadRegFromStackSlot can insert 154 // multiple instructions. 155 156 if (Indexes) { 157 MachineInstr &Inst = *std::prev(I); 158 Indexes->insertMachineInstrInMaps(Inst); 159 } 160 161 if (LIS) 162 LIS->removeAllRegUnitsForPhysReg(Reg); 163 } 164 } 165 } 166 167 /// Compute the sets of entry and return blocks for saving and restoring 168 /// callee-saved registers, and placing prolog and epilog code. 169 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 170 const MachineFrameInfo &MFI = MF.getFrameInfo(); 171 172 // Even when we do not change any CSR, we still want to insert the 173 // prologue and epilogue of the function. 174 // So set the save points for those. 175 176 // Use the points found by shrink-wrapping, if any. 177 if (MFI.getSavePoint()) { 178 SaveBlocks.push_back(MFI.getSavePoint()); 179 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 180 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 181 // If RestoreBlock does not have any successor and is not a return block 182 // then the end point is unreachable and we do not need to insert any 183 // epilogue. 184 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 185 RestoreBlocks.push_back(RestoreBlock); 186 return; 187 } 188 189 // Save refs to entry and return blocks. 190 SaveBlocks.push_back(&MF.front()); 191 for (MachineBasicBlock &MBB : MF) { 192 if (MBB.isEHFuncletEntry()) 193 SaveBlocks.push_back(&MBB); 194 if (MBB.isReturnBlock()) 195 RestoreBlocks.push_back(&MBB); 196 } 197 } 198 199 // TODO: To support shrink wrapping, this would need to copy 200 // PrologEpilogInserter's updateLiveness. 201 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 202 MachineBasicBlock &EntryBB = MF.front(); 203 204 for (const CalleeSavedInfo &CSIReg : CSI) 205 EntryBB.addLiveIn(CSIReg.getReg()); 206 EntryBB.sortUniqueLiveIns(); 207 } 208 209 bool SILowerSGPRSpills::spillCalleeSavedRegs( 210 MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) { 211 MachineRegisterInfo &MRI = MF.getRegInfo(); 212 const Function &F = MF.getFunction(); 213 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 214 const SIFrameLowering *TFI = ST.getFrameLowering(); 215 MachineFrameInfo &MFI = MF.getFrameInfo(); 216 RegScavenger *RS = nullptr; 217 218 // Determine which of the registers in the callee save list should be saved. 219 BitVector SavedRegs; 220 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 221 222 // Add the code to save and restore the callee saved registers. 223 if (!F.hasFnAttribute(Attribute::Naked)) { 224 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 225 // necessary for verifier liveness checks. 226 MFI.setCalleeSavedInfoValid(true); 227 228 std::vector<CalleeSavedInfo> CSI; 229 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 230 231 for (unsigned I = 0; CSRegs[I]; ++I) { 232 MCRegister Reg = CSRegs[I]; 233 234 if (SavedRegs.test(Reg)) { 235 const TargetRegisterClass *RC = 236 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 237 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 238 TRI->getSpillAlign(*RC), true); 239 240 CSI.emplace_back(Reg, JunkFI); 241 CalleeSavedFIs.push_back(JunkFI); 242 } 243 } 244 245 if (!CSI.empty()) { 246 for (MachineBasicBlock *SaveBlock : SaveBlocks) 247 insertCSRSaves(*SaveBlock, CSI, Indexes, LIS); 248 249 // Add live ins to save blocks. 250 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 251 updateLiveness(MF, CSI); 252 253 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 254 insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS); 255 return true; 256 } 257 } 258 259 return false; 260 } 261 262 void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF, 263 LiveIntervals *LIS) { 264 // TODO: This is a workaround to avoid the unmodelled liveness computed with 265 // whole-wave virtual registers when allocated together with the regular VGPR 266 // virtual registers. Presently, the liveness computed during the regalloc is 267 // only uniform (or single lane aware) and it doesn't take account of the 268 // divergent control flow that exists for our GPUs. Since the WWM registers 269 // can modify inactive lanes, the wave-aware liveness should be computed for 270 // the virtual registers to accurately plot their interferences. Without 271 // having the divergent CFG for the function, it is difficult to implement the 272 // wave-aware liveness info. Until then, we conservatively extend the liveness 273 // of the wwm registers into the entire function so that they won't be reused 274 // without first spilling/splitting their liveranges. 275 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 276 277 // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks. 278 for (auto Reg : MFI->getSGPRSpillVGPRs()) { 279 for (MachineBasicBlock *SaveBlock : SaveBlocks) { 280 MachineBasicBlock::iterator InsertBefore = SaveBlock->begin(); 281 DebugLoc DL = SaveBlock->findDebugLoc(InsertBefore); 282 auto MIB = BuildMI(*SaveBlock, InsertBefore, DL, 283 TII->get(AMDGPU::IMPLICIT_DEF), Reg); 284 MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); 285 // Set SGPR_SPILL asm printer flag 286 MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); 287 if (LIS) { 288 LIS->InsertMachineInstrInMaps(*MIB); 289 } 290 } 291 } 292 293 // Insert the KILL in the return blocks to extend their liveness untill the 294 // end of function. Insert a separate KILL for each VGPR. 295 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) { 296 MachineBasicBlock::iterator InsertBefore = 297 RestoreBlock->getFirstTerminator(); 298 DebugLoc DL = RestoreBlock->findDebugLoc(InsertBefore); 299 for (auto Reg : MFI->getSGPRSpillVGPRs()) { 300 auto MIB = BuildMI(*RestoreBlock, InsertBefore, DL, 301 TII->get(TargetOpcode::KILL)); 302 MIB.addReg(Reg); 303 if (LIS) 304 LIS->InsertMachineInstrInMaps(*MIB); 305 } 306 } 307 } 308 309 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 310 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 311 TII = ST.getInstrInfo(); 312 TRI = &TII->getRegisterInfo(); 313 314 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 315 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 316 auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); 317 Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; 318 319 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 320 321 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 322 // does, but somewhat simpler. 323 calculateSaveRestoreBlocks(MF); 324 SmallVector<int> CalleeSavedFIs; 325 bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs); 326 327 MachineFrameInfo &MFI = MF.getFrameInfo(); 328 MachineRegisterInfo &MRI = MF.getRegInfo(); 329 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 330 331 if (!MFI.hasStackObjects() && !HasCSRs) { 332 SaveBlocks.clear(); 333 RestoreBlocks.clear(); 334 return false; 335 } 336 337 bool MadeChange = false; 338 bool SpilledToVirtVGPRLanes = false; 339 340 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 341 // handled as SpilledToReg in regular PrologEpilogInserter. 342 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 343 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 344 if (HasSGPRSpillToVGPR) { 345 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 346 // are spilled to VGPRs, in which case we can eliminate the stack usage. 347 // 348 // This operates under the assumption that only other SGPR spills are users 349 // of the frame index. 350 351 // To track the spill frame indices handled in this pass. 352 BitVector SpillFIs(MFI.getObjectIndexEnd(), false); 353 354 for (MachineBasicBlock &MBB : MF) { 355 for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 356 if (!TII->isSGPRSpill(MI)) 357 continue; 358 359 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 360 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 361 362 bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI); 363 if (IsCalleeSaveSGPRSpill) { 364 // Spill callee-saved SGPRs into physical VGPR lanes. 365 366 // TODO: This is to ensure the CFIs are static for efficient frame 367 // unwinding in the debugger. Spilling them into virtual VGPR lanes 368 // involve regalloc to allocate the physical VGPRs and that might 369 // cause intermediate spill/split of such liveranges for successful 370 // allocation. This would result in broken CFI encoding unless the 371 // regalloc aware CFI generation to insert new CFIs along with the 372 // intermediate spills is implemented. There is no such support 373 // currently exist in the LLVM compiler. 374 if (FuncInfo->allocateSGPRSpillToVGPRLane( 375 MF, FI, /*SpillToPhysVGPRLane=*/true)) { 376 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 377 MI, FI, nullptr, Indexes, LIS, true); 378 if (!Spilled) 379 llvm_unreachable( 380 "failed to spill SGPR to physical VGPR lane when allocated"); 381 } 382 } else { 383 if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { 384 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( 385 MI, FI, nullptr, Indexes, LIS); 386 if (!Spilled) 387 llvm_unreachable( 388 "failed to spill SGPR to virtual VGPR lane when allocated"); 389 SpillFIs.set(FI); 390 SpilledToVirtVGPRLanes = true; 391 } 392 } 393 } 394 } 395 396 if (SpilledToVirtVGPRLanes) { 397 extendWWMVirtRegLiveness(MF, LIS); 398 if (LIS) { 399 // Compute the LiveInterval for the newly created virtual registers. 400 for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) 401 LIS->createAndComputeVirtRegInterval(Reg); 402 } 403 } 404 405 for (MachineBasicBlock &MBB : MF) { 406 // FIXME: The dead frame indices are replaced with a null register from 407 // the debug value instructions. We should instead, update it with the 408 // correct register value. But not sure the register value alone is 409 // adequate to lower the DIExpression. It should be worked out later. 410 for (MachineInstr &MI : MBB) { 411 if (MI.isDebugValue() && MI.getOperand(0).isFI() && 412 !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && 413 SpillFIs[MI.getOperand(0).getIndex()]) { 414 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); 415 } 416 } 417 } 418 419 // All those frame indices which are dead by now should be removed from the 420 // function frame. Otherwise, there is a side effect such as re-mapping of 421 // free frame index ids by the later pass(es) like "stack slot coloring" 422 // which in turn could mess-up with the book keeping of "frame index to VGPR 423 // lane". 424 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false); 425 426 MadeChange = true; 427 } 428 429 if (SpilledToVirtVGPRLanes) { 430 const TargetRegisterClass *RC = TRI->getWaveMaskRegClass(); 431 // Shift back the reserved SGPR for EXEC copy into the lowest range. 432 // This SGPR is reserved to handle the whole-wave spill/copy operations 433 // that might get inserted during vgpr regalloc. 434 Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF); 435 if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) < 436 TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy())) 437 FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR); 438 } else { 439 // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM 440 // spills/copies. Reset the SGPR reserved for EXEC copy. 441 FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister); 442 } 443 444 SaveBlocks.clear(); 445 RestoreBlocks.clear(); 446 447 return MadeChange; 448 } 449