1 //===----------------------- SIFrameLowering.cpp --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "SIFrameLowering.h" 10 #include "AMDGPU.h" 11 #include "GCNSubtarget.h" 12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13 #include "SIMachineFunctionInfo.h" 14 #include "llvm/CodeGen/LivePhysRegs.h" 15 #include "llvm/CodeGen/MachineFrameInfo.h" 16 #include "llvm/CodeGen/RegisterScavenging.h" 17 #include "llvm/Target/TargetMachine.h" 18 19 using namespace llvm; 20 21 #define DEBUG_TYPE "frame-info" 22 23 24 // Find a scratch register that we can use at the start of the prologue to 25 // re-align the stack pointer. We avoid using callee-save registers since they 26 // may appear to be free when this is called from canUseAsPrologue (during 27 // shrink wrapping), but then no longer be free when this is called from 28 // emitPrologue. 29 // 30 // FIXME: This is a bit conservative, since in the above case we could use one 31 // of the callee-save registers as a scratch temp to re-align the stack pointer, 32 // but we would then have to make sure that we were in fact saving at least one 33 // callee-save register in the prologue, which is additional complexity that 34 // doesn't seem worth the benefit. 35 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, 36 LivePhysRegs &LiveRegs, 37 const TargetRegisterClass &RC, 38 bool Unused = false) { 39 // Mark callee saved registers as used so we will not choose them. 40 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 41 for (unsigned i = 0; CSRegs[i]; ++i) 42 LiveRegs.addReg(CSRegs[i]); 43 44 if (Unused) { 45 // We are looking for a register that can be used throughout the entire 46 // function, so any use is unacceptable. 47 for (MCRegister Reg : RC) { 48 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) 49 return Reg; 50 } 51 } else { 52 for (MCRegister Reg : RC) { 53 if (LiveRegs.available(MRI, Reg)) 54 return Reg; 55 } 56 } 57 58 // If we require an unused register, this is used in contexts where failure is 59 // an option and has an alternative plan. In other contexts, this must 60 // succeed0. 61 if (!Unused) 62 report_fatal_error("failed to find free scratch register"); 63 64 return MCRegister(); 65 } 66 67 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, 68 LivePhysRegs &LiveRegs, 69 Register &TempSGPR, 70 Optional<int> &FrameIndex, 71 bool IsFP) { 72 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 73 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 74 75 #ifndef NDEBUG 76 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 77 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 78 #endif 79 80 // We need to save and restore the current FP/BP. 81 82 // 1: If there is already a VGPR with free lanes, use it. We 83 // may already have to pay the penalty for spilling a CSR VGPR. 84 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { 85 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 86 TargetStackID::SGPRSpill); 87 88 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) 89 llvm_unreachable("allocate SGPR spill should have worked"); 90 91 FrameIndex = NewFI; 92 93 LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 94 dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to " 95 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane 96 << '\n'); 97 return; 98 } 99 100 // 2: Next, try to save the FP/BP in an unused SGPR. 101 TempSGPR = findScratchNonCalleeSaveRegister( 102 MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); 103 104 if (!TempSGPR) { 105 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, 106 TargetStackID::SGPRSpill); 107 108 if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { 109 // 3: There's no free lane to spill, and no free register to save FP/BP, 110 // so we're forced to spill another VGPR to use for the spill. 111 FrameIndex = NewFI; 112 113 LLVM_DEBUG( 114 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); 115 dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to " 116 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';); 117 } else { 118 // Remove dead <NewFI> index 119 MF.getFrameInfo().RemoveStackObject(NewFI); 120 // 4: If all else fails, spill the FP/BP to memory. 121 FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); 122 LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling " 123 << (IsFP ? "FP" : "BP") << '\n'); 124 } 125 } else { 126 LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to " 127 << printReg(TempSGPR, TRI) << '\n'); 128 } 129 } 130 131 // We need to specially emit stack operations here because a different frame 132 // register is used than in the rest of the function, as getFrameRegister would 133 // use. 134 static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 135 MachineBasicBlock &MBB, 136 MachineBasicBlock::iterator I, 137 const SIInstrInfo *TII, Register SpillReg, 138 Register ScratchRsrcReg, Register SPReg, int FI) { 139 MachineFunction *MF = MBB.getParent(); 140 MachineFrameInfo &MFI = MF->getFrameInfo(); 141 142 int64_t Offset = MFI.getObjectOffset(FI); 143 144 MachineMemOperand *MMO = MF->getMachineMemOperand( 145 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 146 MFI.getObjectAlign(FI)); 147 148 if (ST.enableFlatScratch()) { 149 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 150 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 151 .addReg(SpillReg, RegState::Kill) 152 .addReg(SPReg) 153 .addImm(Offset) 154 .addImm(0) // glc 155 .addImm(0) // slc 156 .addImm(0) // dlc 157 .addMemOperand(MMO); 158 return; 159 } 160 } else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 161 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET)) 162 .addReg(SpillReg, RegState::Kill) 163 .addReg(ScratchRsrcReg) 164 .addReg(SPReg) 165 .addImm(Offset) 166 .addImm(0) // glc 167 .addImm(0) // slc 168 .addImm(0) // tfe 169 .addImm(0) // dlc 170 .addImm(0) // swz 171 .addMemOperand(MMO); 172 return; 173 } 174 175 // Don't clobber the TmpVGPR if we also need a scratch reg for the stack 176 // offset in the spill. 177 LiveRegs.addReg(SpillReg); 178 179 if (ST.enableFlatScratch()) { 180 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 181 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 182 183 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 184 .addReg(SPReg) 185 .addImm(Offset); 186 187 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) 188 .addReg(SpillReg, RegState::Kill) 189 .addReg(OffsetReg, RegState::Kill) 190 .addImm(0) 191 .addImm(0) // glc 192 .addImm(0) // slc 193 .addImm(0) // dlc 194 .addMemOperand(MMO); 195 } else { 196 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 197 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 198 199 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 200 .addImm(Offset); 201 202 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN)) 203 .addReg(SpillReg, RegState::Kill) 204 .addReg(OffsetReg, RegState::Kill) 205 .addReg(ScratchRsrcReg) 206 .addReg(SPReg) 207 .addImm(0) 208 .addImm(0) // glc 209 .addImm(0) // slc 210 .addImm(0) // tfe 211 .addImm(0) // dlc 212 .addImm(0) // swz 213 .addMemOperand(MMO); 214 } 215 216 LiveRegs.removeReg(SpillReg); 217 } 218 219 static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs, 220 MachineBasicBlock &MBB, 221 MachineBasicBlock::iterator I, 222 const SIInstrInfo *TII, Register SpillReg, 223 Register ScratchRsrcReg, Register SPReg, int FI) { 224 MachineFunction *MF = MBB.getParent(); 225 MachineFrameInfo &MFI = MF->getFrameInfo(); 226 int64_t Offset = MFI.getObjectOffset(FI); 227 228 MachineMemOperand *MMO = MF->getMachineMemOperand( 229 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4, 230 MFI.getObjectAlign(FI)); 231 232 if (ST.enableFlatScratch()) { 233 if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 234 BuildMI(MBB, I, DebugLoc(), 235 TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg) 236 .addReg(SPReg) 237 .addImm(Offset) 238 .addImm(0) // glc 239 .addImm(0) // slc 240 .addImm(0) // dlc 241 .addMemOperand(MMO); 242 return; 243 } 244 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 245 MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass); 246 247 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg) 248 .addReg(SPReg) 249 .addImm(Offset); 250 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), 251 SpillReg) 252 .addReg(OffsetReg, RegState::Kill) 253 .addImm(0) 254 .addImm(0) // glc 255 .addImm(0) // slc 256 .addImm(0) // dlc 257 .addMemOperand(MMO); 258 return; 259 } 260 261 if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) { 262 BuildMI(MBB, I, DebugLoc(), 263 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg) 264 .addReg(ScratchRsrcReg) 265 .addReg(SPReg) 266 .addImm(Offset) 267 .addImm(0) // glc 268 .addImm(0) // slc 269 .addImm(0) // tfe 270 .addImm(0) // dlc 271 .addImm(0) // swz 272 .addMemOperand(MMO); 273 return; 274 } 275 276 MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister( 277 MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass); 278 279 BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg) 280 .addImm(Offset); 281 282 BuildMI(MBB, I, DebugLoc(), 283 TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg) 284 .addReg(OffsetReg, RegState::Kill) 285 .addReg(ScratchRsrcReg) 286 .addReg(SPReg) 287 .addImm(0) 288 .addImm(0) // glc 289 .addImm(0) // slc 290 .addImm(0) // tfe 291 .addImm(0) // dlc 292 .addImm(0) // swz 293 .addMemOperand(MMO); 294 } 295 296 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 297 const DebugLoc &DL, const SIInstrInfo *TII, 298 Register TargetReg) { 299 MachineFunction *MF = MBB.getParent(); 300 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 301 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 302 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 303 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); 304 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); 305 306 if (MFI->getGITPtrHigh() != 0xffffffff) { 307 BuildMI(MBB, I, DL, SMovB32, TargetHi) 308 .addImm(MFI->getGITPtrHigh()) 309 .addReg(TargetReg, RegState::ImplicitDefine); 310 } else { 311 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); 312 BuildMI(MBB, I, DL, GetPC64, TargetReg); 313 } 314 Register GitPtrLo = MFI->getGITPtrLoReg(*MF); 315 MF->getRegInfo().addLiveIn(GitPtrLo); 316 MBB.addLiveIn(GitPtrLo); 317 BuildMI(MBB, I, DL, SMovB32, TargetLo) 318 .addReg(GitPtrLo); 319 } 320 321 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` 322 void SIFrameLowering::emitEntryFunctionFlatScratchInit( 323 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 324 const DebugLoc &DL, Register ScratchWaveOffsetReg) const { 325 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 326 const SIInstrInfo *TII = ST.getInstrInfo(); 327 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 328 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 329 330 // We don't need this if we only have spills since there is no user facing 331 // scratch. 332 333 // TODO: If we know we don't have flat instructions earlier, we can omit 334 // this from the input registers. 335 // 336 // TODO: We only need to know if we access scratch space through a flat 337 // pointer. Because we only detect if flat instructions are used at all, 338 // this will be used more often than necessary on VI. 339 340 Register FlatScrInitLo; 341 Register FlatScrInitHi; 342 343 if (ST.isAmdPalOS()) { 344 // Extract the scratch offset from the descriptor in the GIT 345 LivePhysRegs LiveRegs; 346 LiveRegs.init(*TRI); 347 LiveRegs.addLiveIns(MBB); 348 349 // Find unused reg to load flat scratch init into 350 MachineRegisterInfo &MRI = MF.getRegInfo(); 351 Register FlatScrInit = AMDGPU::NoRegister; 352 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); 353 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; 354 AllSGPR64s = AllSGPR64s.slice( 355 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); 356 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 357 for (MCPhysReg Reg : AllSGPR64s) { 358 if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) && 359 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 360 FlatScrInit = Reg; 361 break; 362 } 363 } 364 assert(FlatScrInit && "Failed to find free register for scratch init"); 365 366 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); 367 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); 368 369 buildGitPtr(MBB, I, DL, TII, FlatScrInit); 370 371 // We now have the GIT ptr - now get the scratch descriptor from the entry 372 // at offset 0 (or offset 16 for a compute shader). 373 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 374 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 375 auto *MMO = MF.getMachineMemOperand( 376 PtrInfo, 377 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 378 MachineMemOperand::MODereferenceable, 379 8, Align(4)); 380 unsigned Offset = 381 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 382 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 383 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 384 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) 385 .addReg(FlatScrInit) 386 .addImm(EncodedOffset) // offset 387 .addImm(0) // glc 388 .addImm(0) // dlc 389 .addMemOperand(MMO); 390 391 // Mask the offset in [47:0] of the descriptor 392 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); 393 BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) 394 .addReg(FlatScrInitHi) 395 .addImm(0xffff); 396 } else { 397 Register FlatScratchInitReg = 398 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); 399 assert(FlatScratchInitReg); 400 401 MachineRegisterInfo &MRI = MF.getRegInfo(); 402 MRI.addLiveIn(FlatScratchInitReg); 403 MBB.addLiveIn(FlatScratchInitReg); 404 405 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); 406 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); 407 } 408 409 // Do a 64-bit pointer add. 410 if (ST.flatScratchIsPointer()) { 411 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { 412 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 413 .addReg(FlatScrInitLo) 414 .addReg(ScratchWaveOffsetReg); 415 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) 416 .addReg(FlatScrInitHi) 417 .addImm(0); 418 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 419 addReg(FlatScrInitLo). 420 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | 421 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 422 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). 423 addReg(FlatScrInitHi). 424 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | 425 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); 426 return; 427 } 428 429 // For GFX9. 430 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) 431 .addReg(FlatScrInitLo) 432 .addReg(ScratchWaveOffsetReg); 433 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) 434 .addReg(FlatScrInitHi) 435 .addImm(0); 436 437 return; 438 } 439 440 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9); 441 442 // Copy the size in bytes. 443 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) 444 .addReg(FlatScrInitHi, RegState::Kill); 445 446 // Add wave offset in bytes to private base offset. 447 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. 448 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) 449 .addReg(FlatScrInitLo) 450 .addReg(ScratchWaveOffsetReg); 451 452 // Convert offset to 256-byte units. 453 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) 454 .addReg(FlatScrInitLo, RegState::Kill) 455 .addImm(8); 456 } 457 458 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not 459 // memory. They should have been removed by now. 460 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { 461 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 462 I != E; ++I) { 463 if (!MFI.isDeadObjectIndex(I)) 464 return false; 465 } 466 467 return true; 468 } 469 470 // Shift down registers reserved for the scratch RSRC. 471 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( 472 MachineFunction &MF) const { 473 474 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 475 const SIInstrInfo *TII = ST.getInstrInfo(); 476 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 477 MachineRegisterInfo &MRI = MF.getRegInfo(); 478 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 479 480 assert(MFI->isEntryFunction()); 481 482 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); 483 484 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && 485 allStackObjectsAreDead(MF.getFrameInfo()))) 486 return Register(); 487 488 if (ST.hasSGPRInitBug() || 489 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) 490 return ScratchRsrcReg; 491 492 // We reserved the last registers for this. Shift it down to the end of those 493 // which were actually used. 494 // 495 // FIXME: It might be safer to use a pseudoregister before replacement. 496 497 // FIXME: We should be able to eliminate unused input registers. We only 498 // cannot do this for the resources required for scratch access. For now we 499 // skip over user SGPRs and may leave unused holes. 500 501 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; 502 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); 503 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); 504 505 // Skip the last N reserved elements because they should have already been 506 // reserved for VCC etc. 507 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 508 for (MCPhysReg Reg : AllSGPR128s) { 509 // Pick the first unallocated one. Make sure we don't clobber the other 510 // reserved input we needed. Also for PAL, make sure we don't clobber 511 // the GIT pointer passed in SGPR0 or SGPR8. 512 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 513 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { 514 MRI.replaceRegWith(ScratchRsrcReg, Reg); 515 MFI->setScratchRSrcReg(Reg); 516 return Reg; 517 } 518 } 519 520 return ScratchRsrcReg; 521 } 522 523 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { 524 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); 525 } 526 527 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, 528 MachineBasicBlock &MBB) const { 529 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 530 531 // FIXME: If we only have SGPR spills, we won't actually be using scratch 532 // memory since these spill to VGPRs. We should be cleaning up these unused 533 // SGPR spill frame indices somewhere. 534 535 // FIXME: We still have implicit uses on SGPR spill instructions in case they 536 // need to spill to vector memory. It's likely that will not happen, but at 537 // this point it appears we need the setup. This part of the prolog should be 538 // emitted after frame indices are eliminated. 539 540 // FIXME: Remove all of the isPhysRegUsed checks 541 542 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 543 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 544 const SIInstrInfo *TII = ST.getInstrInfo(); 545 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 546 MachineRegisterInfo &MRI = MF.getRegInfo(); 547 const Function &F = MF.getFunction(); 548 549 assert(MFI->isEntryFunction()); 550 551 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( 552 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 553 // FIXME: Hack to not crash in situations which emitted an error. 554 if (!PreloadedScratchWaveOffsetReg) 555 return; 556 557 // We need to do the replacement of the private segment buffer register even 558 // if there are no stack objects. There could be stores to undef or a 559 // constant without an associated object. 560 // 561 // This will return `Register()` in cases where there are no actual 562 // uses of the SRSRC. 563 Register ScratchRsrcReg; 564 if (!ST.enableFlatScratch()) 565 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); 566 567 // Make the selected register live throughout the function. 568 if (ScratchRsrcReg) { 569 for (MachineBasicBlock &OtherBB : MF) { 570 if (&OtherBB != &MBB) { 571 OtherBB.addLiveIn(ScratchRsrcReg); 572 } 573 } 574 } 575 576 // Now that we have fixed the reserved SRSRC we need to locate the 577 // (potentially) preloaded SRSRC. 578 Register PreloadedScratchRsrcReg; 579 if (ST.isAmdHsaOrMesa(F)) { 580 PreloadedScratchRsrcReg = 581 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); 582 if (ScratchRsrcReg && PreloadedScratchRsrcReg) { 583 // We added live-ins during argument lowering, but since they were not 584 // used they were deleted. We're adding the uses now, so add them back. 585 MRI.addLiveIn(PreloadedScratchRsrcReg); 586 MBB.addLiveIn(PreloadedScratchRsrcReg); 587 } 588 } 589 590 // Debug location must be unknown since the first debug location is used to 591 // determine the end of the prologue. 592 DebugLoc DL; 593 MachineBasicBlock::iterator I = MBB.begin(); 594 595 // We found the SRSRC first because it needs four registers and has an 596 // alignment requirement. If the SRSRC that we found is clobbering with 597 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR 598 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch 599 // wave offset to a free SGPR. 600 Register ScratchWaveOffsetReg; 601 if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { 602 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); 603 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); 604 AllSGPRs = AllSGPRs.slice( 605 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); 606 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); 607 for (MCPhysReg Reg : AllSGPRs) { 608 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && 609 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { 610 ScratchWaveOffsetReg = Reg; 611 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) 612 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); 613 break; 614 } 615 } 616 } else { 617 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; 618 } 619 assert(ScratchWaveOffsetReg); 620 621 if (requiresStackPointerReference(MF)) { 622 Register SPReg = MFI->getStackPtrOffsetReg(); 623 assert(SPReg != AMDGPU::SP_REG); 624 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) 625 .addImm(MF.getFrameInfo().getStackSize() * getScratchScaleFactor(ST)); 626 } 627 628 if (hasFP(MF)) { 629 Register FPReg = MFI->getFrameOffsetReg(); 630 assert(FPReg != AMDGPU::FP_REG); 631 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); 632 } 633 634 if (MFI->hasFlatScratchInit() || ScratchRsrcReg) { 635 MRI.addLiveIn(PreloadedScratchWaveOffsetReg); 636 MBB.addLiveIn(PreloadedScratchWaveOffsetReg); 637 } 638 639 if (MFI->hasFlatScratchInit()) { 640 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); 641 } 642 643 if (ScratchRsrcReg) { 644 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, 645 PreloadedScratchRsrcReg, 646 ScratchRsrcReg, ScratchWaveOffsetReg); 647 } 648 } 649 650 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` 651 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( 652 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 653 const DebugLoc &DL, Register PreloadedScratchRsrcReg, 654 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { 655 656 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 657 const SIInstrInfo *TII = ST.getInstrInfo(); 658 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 659 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 660 const Function &Fn = MF.getFunction(); 661 662 if (ST.isAmdPalOS()) { 663 // The pointer to the GIT is formed from the offset passed in and either 664 // the amdgpu-git-ptr-high function attribute or the top part of the PC 665 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 666 667 buildGitPtr(MBB, I, DL, TII, Rsrc01); 668 669 // We now have the GIT ptr - now get the scratch descriptor from the entry 670 // at offset 0 (or offset 16 for a compute shader). 671 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 672 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); 673 auto MMO = MF.getMachineMemOperand(PtrInfo, 674 MachineMemOperand::MOLoad | 675 MachineMemOperand::MOInvariant | 676 MachineMemOperand::MODereferenceable, 677 16, Align(4)); 678 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; 679 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); 680 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); 681 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) 682 .addReg(Rsrc01) 683 .addImm(EncodedOffset) // offset 684 .addImm(0) // glc 685 .addImm(0) // dlc 686 .addReg(ScratchRsrcReg, RegState::ImplicitDefine) 687 .addMemOperand(MMO); 688 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { 689 assert(!ST.isAmdHsaOrMesa(Fn)); 690 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 691 692 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 693 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 694 695 // Use relocations to get the pointer, and setup the other bits manually. 696 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 697 698 if (MFI->hasImplicitBufferPtr()) { 699 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 700 701 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { 702 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); 703 704 BuildMI(MBB, I, DL, Mov64, Rsrc01) 705 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 706 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 707 } else { 708 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); 709 710 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); 711 auto MMO = MF.getMachineMemOperand( 712 PtrInfo, 713 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | 714 MachineMemOperand::MODereferenceable, 715 8, Align(4)); 716 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) 717 .addReg(MFI->getImplicitBufferPtrUserSGPR()) 718 .addImm(0) // offset 719 .addImm(0) // glc 720 .addImm(0) // dlc 721 .addMemOperand(MMO) 722 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 723 724 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 725 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); 726 } 727 } else { 728 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 729 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 730 731 BuildMI(MBB, I, DL, SMovB32, Rsrc0) 732 .addExternalSymbol("SCRATCH_RSRC_DWORD0") 733 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 734 735 BuildMI(MBB, I, DL, SMovB32, Rsrc1) 736 .addExternalSymbol("SCRATCH_RSRC_DWORD1") 737 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 738 739 } 740 741 BuildMI(MBB, I, DL, SMovB32, Rsrc2) 742 .addImm(Rsrc23 & 0xffffffff) 743 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 744 745 BuildMI(MBB, I, DL, SMovB32, Rsrc3) 746 .addImm(Rsrc23 >> 32) 747 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 748 } else if (ST.isAmdHsaOrMesa(Fn)) { 749 assert(PreloadedScratchRsrcReg); 750 751 if (ScratchRsrcReg != PreloadedScratchRsrcReg) { 752 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) 753 .addReg(PreloadedScratchRsrcReg, RegState::Kill); 754 } 755 } 756 757 // Add the scratch wave offset into the scratch RSRC. 758 // 759 // We only want to update the first 48 bits, which is the base address 760 // pointer, without touching the adjacent 16 bits of flags. We know this add 761 // cannot carry-out from bit 47, otherwise the scratch allocation would be 762 // impossible to fit in the 48-bit global address space. 763 // 764 // TODO: Evaluate if it is better to just construct an SRD using the flat 765 // scratch init and some constants rather than update the one we are passed. 766 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 767 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 768 769 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in 770 // the kernel body via inreg arguments. 771 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) 772 .addReg(ScratchRsrcSub0) 773 .addReg(ScratchWaveOffsetReg) 774 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 775 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) 776 .addReg(ScratchRsrcSub1) 777 .addImm(0) 778 .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 779 } 780 781 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { 782 switch (ID) { 783 case TargetStackID::Default: 784 case TargetStackID::NoAlloc: 785 case TargetStackID::SGPRSpill: 786 return true; 787 case TargetStackID::ScalableVector: 788 return false; 789 } 790 llvm_unreachable("Invalid TargetStackID::Value"); 791 } 792 793 // Activate all lanes, returns saved exec. 794 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, 795 MachineFunction &MF, 796 MachineBasicBlock &MBB, 797 MachineBasicBlock::iterator MBBI, 798 bool IsProlog) { 799 Register ScratchExecCopy; 800 MachineRegisterInfo &MRI = MF.getRegInfo(); 801 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 802 const SIInstrInfo *TII = ST.getInstrInfo(); 803 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 804 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 805 DebugLoc DL; 806 807 if (LiveRegs.empty()) { 808 if (IsProlog) { 809 LiveRegs.init(TRI); 810 LiveRegs.addLiveIns(MBB); 811 if (FuncInfo->SGPRForFPSaveRestoreCopy) 812 LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); 813 814 if (FuncInfo->SGPRForBPSaveRestoreCopy) 815 LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy); 816 } else { 817 // In epilog. 818 LiveRegs.init(*ST.getRegisterInfo()); 819 LiveRegs.addLiveOuts(MBB); 820 LiveRegs.stepBackward(*MBBI); 821 } 822 } 823 824 ScratchExecCopy = findScratchNonCalleeSaveRegister( 825 MRI, LiveRegs, *TRI.getWaveMaskRegClass()); 826 827 if (!IsProlog) 828 LiveRegs.removeReg(ScratchExecCopy); 829 830 const unsigned OrSaveExec = 831 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; 832 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); 833 834 return ScratchExecCopy; 835 } 836 837 void SIFrameLowering::emitPrologue(MachineFunction &MF, 838 MachineBasicBlock &MBB) const { 839 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 840 if (FuncInfo->isEntryFunction()) { 841 emitEntryFunctionPrologue(MF, MBB); 842 return; 843 } 844 845 const MachineFrameInfo &MFI = MF.getFrameInfo(); 846 MachineRegisterInfo &MRI = MF.getRegInfo(); 847 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 848 const SIInstrInfo *TII = ST.getInstrInfo(); 849 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 850 851 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 852 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 853 Register BasePtrReg = 854 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 855 LivePhysRegs LiveRegs; 856 857 MachineBasicBlock::iterator MBBI = MBB.begin(); 858 DebugLoc DL; 859 860 bool HasFP = false; 861 bool HasBP = false; 862 uint32_t NumBytes = MFI.getStackSize(); 863 uint32_t RoundedSize = NumBytes; 864 // To avoid clobbering VGPRs in lanes that weren't active on function entry, 865 // turn on all lanes before doing the spill to memory. 866 Register ScratchExecCopy; 867 868 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 869 bool SpillFPToMemory = false; 870 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 871 // Otherwise we are spilling the FP to memory. 872 if (HasFPSaveIndex) { 873 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 874 TargetStackID::SGPRSpill; 875 } 876 877 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 878 bool SpillBPToMemory = false; 879 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. 880 // Otherwise we are spilling the BP to memory. 881 if (HasBPSaveIndex) { 882 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 883 TargetStackID::SGPRSpill; 884 } 885 886 // Emit the copy if we need an FP, and are using a free SGPR to save it. 887 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 888 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) 889 .addReg(FramePtrReg) 890 .setMIFlag(MachineInstr::FrameSetup); 891 } 892 893 // Emit the copy if we need a BP, and are using a free SGPR to save it. 894 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 895 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), 896 FuncInfo->SGPRForBPSaveRestoreCopy) 897 .addReg(BasePtrReg) 898 .setMIFlag(MachineInstr::FrameSetup); 899 } 900 901 // If a copy has been emitted for FP and/or BP, Make the SGPRs 902 // used in the copy instructions live throughout the function. 903 SmallVector<MCPhysReg, 2> TempSGPRs; 904 if (FuncInfo->SGPRForFPSaveRestoreCopy) 905 TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); 906 907 if (FuncInfo->SGPRForBPSaveRestoreCopy) 908 TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); 909 910 if (!TempSGPRs.empty()) { 911 for (MachineBasicBlock &MBB : MF) { 912 for (MCPhysReg Reg : TempSGPRs) 913 MBB.addLiveIn(Reg); 914 915 MBB.sortUniqueLiveIns(); 916 } 917 } 918 919 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg 920 : FuncInfo->getSGPRSpillVGPRs()) { 921 if (!Reg.FI.hasValue()) 922 continue; 923 924 if (!ScratchExecCopy) 925 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 926 927 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 928 FuncInfo->getScratchRSrcReg(), 929 StackPtrReg, 930 Reg.FI.getValue()); 931 } 932 933 if (HasFPSaveIndex && SpillFPToMemory) { 934 assert(!MFI.isDeadObjectIndex(FuncInfo->FramePointerSaveIndex.getValue())); 935 936 if (!ScratchExecCopy) 937 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 938 939 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 940 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 941 942 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 943 .addReg(FramePtrReg); 944 945 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 946 FuncInfo->getScratchRSrcReg(), StackPtrReg, 947 FuncInfo->FramePointerSaveIndex.getValue()); 948 } 949 950 if (HasBPSaveIndex && SpillBPToMemory) { 951 assert(!MFI.isDeadObjectIndex(*FuncInfo->BasePointerSaveIndex)); 952 953 if (!ScratchExecCopy) 954 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); 955 956 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( 957 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 958 959 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 960 .addReg(BasePtrReg); 961 962 buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR, 963 FuncInfo->getScratchRSrcReg(), StackPtrReg, 964 *FuncInfo->BasePointerSaveIndex); 965 } 966 967 if (ScratchExecCopy) { 968 // FIXME: Split block and make terminator. 969 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 970 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 971 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 972 .addReg(ScratchExecCopy, RegState::Kill); 973 LiveRegs.addReg(ScratchExecCopy); 974 } 975 976 // In this case, spill the FP to a reserved VGPR. 977 if (HasFPSaveIndex && !SpillFPToMemory) { 978 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 979 assert(!MFI.isDeadObjectIndex(FI)); 980 981 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 982 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 983 FuncInfo->getSGPRToVGPRSpills(FI); 984 assert(Spill.size() == 1); 985 986 // Save FP before setting it up. 987 // FIXME: This should respect spillSGPRToVGPR; 988 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 989 .addReg(FramePtrReg) 990 .addImm(Spill[0].Lane) 991 .addReg(Spill[0].VGPR, RegState::Undef); 992 } 993 994 // In this case, spill the BP to a reserved VGPR. 995 if (HasBPSaveIndex && !SpillBPToMemory) { 996 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 997 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 998 999 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 1000 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1001 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1002 assert(Spill.size() == 1); 1003 1004 // Save BP before setting it up. 1005 // FIXME: This should respect spillSGPRToVGPR; 1006 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) 1007 .addReg(BasePtrReg) 1008 .addImm(Spill[0].Lane) 1009 .addReg(Spill[0].VGPR, RegState::Undef); 1010 } 1011 1012 if (TRI.needsStackRealignment(MF)) { 1013 HasFP = true; 1014 const unsigned Alignment = MFI.getMaxAlign().value(); 1015 1016 RoundedSize += Alignment; 1017 if (LiveRegs.empty()) { 1018 LiveRegs.init(TRI); 1019 LiveRegs.addLiveIns(MBB); 1020 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1021 LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1022 } 1023 1024 Register ScratchSPReg = findScratchNonCalleeSaveRegister( 1025 MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass); 1026 assert(ScratchSPReg && ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy && 1027 ScratchSPReg != FuncInfo->SGPRForBPSaveRestoreCopy); 1028 1029 // s_add_u32 tmp_reg, s32, NumBytes 1030 // s_and_b32 s32, tmp_reg, 0b111...0000 1031 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) 1032 .addReg(StackPtrReg) 1033 .addImm((Alignment - 1) * getScratchScaleFactor(ST)) 1034 .setMIFlag(MachineInstr::FrameSetup); 1035 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) 1036 .addReg(ScratchSPReg, RegState::Kill) 1037 .addImm(-Alignment * getScratchScaleFactor(ST)) 1038 .setMIFlag(MachineInstr::FrameSetup); 1039 FuncInfo->setIsStackRealigned(true); 1040 } else if ((HasFP = hasFP(MF))) { 1041 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1042 .addReg(StackPtrReg) 1043 .setMIFlag(MachineInstr::FrameSetup); 1044 } 1045 1046 // If we need a base pointer, set it up here. It's whatever the value of 1047 // the stack pointer is at this point. Any variable size objects will be 1048 // allocated after this, so we can still use the base pointer to reference 1049 // the incoming arguments. 1050 if ((HasBP = TRI.hasBasePointer(MF))) { 1051 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1052 .addReg(StackPtrReg) 1053 .setMIFlag(MachineInstr::FrameSetup); 1054 } 1055 1056 if (HasFP && RoundedSize != 0) { 1057 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) 1058 .addReg(StackPtrReg) 1059 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1060 .setMIFlag(MachineInstr::FrameSetup); 1061 } 1062 1063 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || 1064 FuncInfo->FramePointerSaveIndex)) && 1065 "Needed to save FP but didn't save it anywhere"); 1066 1067 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy && 1068 !FuncInfo->FramePointerSaveIndex)) && 1069 "Saved FP but didn't need it"); 1070 1071 assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy || 1072 FuncInfo->BasePointerSaveIndex)) && 1073 "Needed to save BP but didn't save it anywhere"); 1074 1075 assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy && 1076 !FuncInfo->BasePointerSaveIndex)) && 1077 "Saved BP but didn't need it"); 1078 } 1079 1080 void SIFrameLowering::emitEpilogue(MachineFunction &MF, 1081 MachineBasicBlock &MBB) const { 1082 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1083 if (FuncInfo->isEntryFunction()) 1084 return; 1085 1086 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1087 const SIInstrInfo *TII = ST.getInstrInfo(); 1088 MachineRegisterInfo &MRI = MF.getRegInfo(); 1089 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 1090 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1091 LivePhysRegs LiveRegs; 1092 DebugLoc DL; 1093 1094 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1095 uint32_t NumBytes = MFI.getStackSize(); 1096 uint32_t RoundedSize = FuncInfo->isStackRealigned() 1097 ? NumBytes + MFI.getMaxAlign().value() 1098 : NumBytes; 1099 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); 1100 const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1101 const Register BasePtrReg = 1102 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); 1103 1104 bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); 1105 bool SpillFPToMemory = false; 1106 if (HasFPSaveIndex) { 1107 SpillFPToMemory = MFI.getStackID(*FuncInfo->FramePointerSaveIndex) != 1108 TargetStackID::SGPRSpill; 1109 } 1110 1111 bool HasBPSaveIndex = FuncInfo->BasePointerSaveIndex.hasValue(); 1112 bool SpillBPToMemory = false; 1113 if (HasBPSaveIndex) { 1114 SpillBPToMemory = MFI.getStackID(*FuncInfo->BasePointerSaveIndex) != 1115 TargetStackID::SGPRSpill; 1116 } 1117 1118 if (RoundedSize != 0 && hasFP(MF)) { 1119 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) 1120 .addReg(StackPtrReg) 1121 .addImm(RoundedSize * getScratchScaleFactor(ST)) 1122 .setMIFlag(MachineInstr::FrameDestroy); 1123 } 1124 1125 if (FuncInfo->SGPRForFPSaveRestoreCopy) { 1126 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) 1127 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) 1128 .setMIFlag(MachineInstr::FrameSetup); 1129 } 1130 1131 if (FuncInfo->SGPRForBPSaveRestoreCopy) { 1132 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) 1133 .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) 1134 .setMIFlag(MachineInstr::FrameSetup); 1135 } 1136 1137 Register ScratchExecCopy; 1138 if (HasFPSaveIndex) { 1139 const int FI = FuncInfo->FramePointerSaveIndex.getValue(); 1140 assert(!MFI.isDeadObjectIndex(FI)); 1141 if (SpillFPToMemory) { 1142 if (!ScratchExecCopy) 1143 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1144 1145 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1146 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1147 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1148 FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); 1149 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) 1150 .addReg(TempVGPR, RegState::Kill); 1151 } else { 1152 // Reload from VGPR spill. 1153 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 1154 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1155 FuncInfo->getSGPRToVGPRSpills(FI); 1156 assert(Spill.size() == 1); 1157 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) 1158 .addReg(Spill[0].VGPR) 1159 .addImm(Spill[0].Lane); 1160 } 1161 } 1162 1163 if (HasBPSaveIndex) { 1164 const int BasePtrFI = *FuncInfo->BasePointerSaveIndex; 1165 assert(!MFI.isDeadObjectIndex(BasePtrFI)); 1166 if (SpillBPToMemory) { 1167 if (!ScratchExecCopy) 1168 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1169 1170 MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( 1171 MRI, LiveRegs, AMDGPU::VGPR_32RegClass); 1172 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR, 1173 FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI); 1174 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) 1175 .addReg(TempVGPR, RegState::Kill); 1176 } else { 1177 // Reload from VGPR spill. 1178 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); 1179 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = 1180 FuncInfo->getSGPRToVGPRSpills(BasePtrFI); 1181 assert(Spill.size() == 1); 1182 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) 1183 .addReg(Spill[0].VGPR) 1184 .addImm(Spill[0].Lane); 1185 } 1186 } 1187 1188 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg : 1189 FuncInfo->getSGPRSpillVGPRs()) { 1190 if (!Reg.FI.hasValue()) 1191 continue; 1192 1193 if (!ScratchExecCopy) 1194 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); 1195 1196 buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR, 1197 FuncInfo->getScratchRSrcReg(), StackPtrReg, 1198 Reg.FI.getValue()); 1199 } 1200 1201 if (ScratchExecCopy) { 1202 // FIXME: Split block and make terminator. 1203 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 1204 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 1205 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) 1206 .addReg(ScratchExecCopy, RegState::Kill); 1207 } 1208 } 1209 1210 #ifndef NDEBUG 1211 static bool allSGPRSpillsAreDead(const MachineFunction &MF) { 1212 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1213 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1214 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); 1215 I != E; ++I) { 1216 if (!MFI.isDeadObjectIndex(I) && 1217 MFI.getStackID(I) == TargetStackID::SGPRSpill && 1218 (I != FuncInfo->FramePointerSaveIndex && 1219 I != FuncInfo->BasePointerSaveIndex)) { 1220 return false; 1221 } 1222 } 1223 1224 return true; 1225 } 1226 #endif 1227 1228 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, 1229 int FI, 1230 Register &FrameReg) const { 1231 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); 1232 1233 FrameReg = RI->getFrameRegister(MF); 1234 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); 1235 } 1236 1237 void SIFrameLowering::processFunctionBeforeFrameFinalized( 1238 MachineFunction &MF, 1239 RegScavenger *RS) const { 1240 MachineFrameInfo &MFI = MF.getFrameInfo(); 1241 1242 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1243 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1244 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1245 1246 FuncInfo->removeDeadFrameIndices(MFI); 1247 assert(allSGPRSpillsAreDead(MF) && 1248 "SGPR spill should have been removed in SILowerSGPRSpills"); 1249 1250 // FIXME: The other checks should be redundant with allStackObjectsAreDead, 1251 // but currently hasNonSpillStackObjects is set only from source 1252 // allocas. Stack temps produced from legalization are not counted currently. 1253 if (!allStackObjectsAreDead(MFI)) { 1254 assert(RS && "RegScavenger required if spilling"); 1255 1256 if (FuncInfo->isEntryFunction()) { 1257 int ScavengeFI = MFI.CreateFixedObject( 1258 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); 1259 RS->addScavengingFrameIndex(ScavengeFI); 1260 } else { 1261 int ScavengeFI = MFI.CreateStackObject( 1262 TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 1263 TRI->getSpillAlign(AMDGPU::SGPR_32RegClass), false); 1264 RS->addScavengingFrameIndex(ScavengeFI); 1265 } 1266 } 1267 } 1268 1269 // Only report VGPRs to generic code. 1270 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, 1271 BitVector &SavedVGPRs, 1272 RegScavenger *RS) const { 1273 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); 1274 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1275 if (MFI->isEntryFunction()) 1276 return; 1277 1278 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1279 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1280 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1281 1282 // Ignore the SGPRs the default implementation found. 1283 SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask()); 1284 1285 // hasFP only knows about stack objects that already exist. We're now 1286 // determining the stack slots that will be created, so we have to predict 1287 // them. Stack objects force FP usage with calls. 1288 // 1289 // Note a new VGPR CSR may be introduced if one is used for the spill, but we 1290 // don't want to report it here. 1291 // 1292 // FIXME: Is this really hasReservedCallFrame? 1293 const bool WillHaveFP = 1294 FrameInfo.hasCalls() && 1295 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); 1296 1297 // VGPRs used for SGPR spilling need to be specially inserted in the prolog, 1298 // so don't allow the default insertion to handle them. 1299 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 1300 SavedVGPRs.reset(SSpill.VGPR); 1301 1302 LivePhysRegs LiveRegs; 1303 LiveRegs.init(*TRI); 1304 1305 if (WillHaveFP || hasFP(MF)) { 1306 assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex && 1307 "Re-reserving spill slot for FP"); 1308 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, 1309 MFI->FramePointerSaveIndex, true); 1310 } 1311 1312 if (TRI->hasBasePointer(MF)) { 1313 if (MFI->SGPRForFPSaveRestoreCopy) 1314 LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); 1315 1316 assert(!MFI->SGPRForBPSaveRestoreCopy && 1317 !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP"); 1318 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, 1319 MFI->BasePointerSaveIndex, false); 1320 } 1321 } 1322 1323 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, 1324 BitVector &SavedRegs, 1325 RegScavenger *RS) const { 1326 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1327 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1328 if (MFI->isEntryFunction()) 1329 return; 1330 1331 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1332 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 1333 1334 // The SP is specifically managed and we don't want extra spills of it. 1335 SavedRegs.reset(MFI->getStackPtrOffsetReg()); 1336 1337 const BitVector AllSavedRegs = SavedRegs; 1338 SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask()); 1339 1340 // If clearing VGPRs changed the mask, we will have some CSR VGPR spills. 1341 const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs; 1342 1343 // We have to anticipate introducing CSR VGPR spills if we don't have any 1344 // stack objects already, since we require an FP if there is a call and stack. 1345 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 1346 const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR; 1347 1348 // FP will be specially managed like SP. 1349 if (WillHaveFP || hasFP(MF)) 1350 SavedRegs.reset(MFI->getFrameOffsetReg()); 1351 } 1352 1353 bool SIFrameLowering::assignCalleeSavedSpillSlots( 1354 MachineFunction &MF, const TargetRegisterInfo *TRI, 1355 std::vector<CalleeSavedInfo> &CSI) const { 1356 if (CSI.empty()) 1357 return true; // Early exit if no callee saved registers are modified! 1358 1359 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 1360 if (!FuncInfo->SGPRForFPSaveRestoreCopy && 1361 !FuncInfo->SGPRForBPSaveRestoreCopy) 1362 return false; 1363 1364 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1365 const SIRegisterInfo *RI = ST.getRegisterInfo(); 1366 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); 1367 Register BasePtrReg = RI->getBaseRegister(); 1368 unsigned NumModifiedRegs = 0; 1369 1370 if (FuncInfo->SGPRForFPSaveRestoreCopy) 1371 NumModifiedRegs++; 1372 if (FuncInfo->SGPRForBPSaveRestoreCopy) 1373 NumModifiedRegs++; 1374 1375 for (auto &CS : CSI) { 1376 if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { 1377 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); 1378 if (--NumModifiedRegs) 1379 break; 1380 } else if (CS.getReg() == BasePtrReg && 1381 FuncInfo->SGPRForBPSaveRestoreCopy) { 1382 CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); 1383 if (--NumModifiedRegs) 1384 break; 1385 } 1386 } 1387 1388 return false; 1389 } 1390 1391 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( 1392 MachineFunction &MF, 1393 MachineBasicBlock &MBB, 1394 MachineBasicBlock::iterator I) const { 1395 int64_t Amount = I->getOperand(0).getImm(); 1396 if (Amount == 0) 1397 return MBB.erase(I); 1398 1399 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 1400 const SIInstrInfo *TII = ST.getInstrInfo(); 1401 const DebugLoc &DL = I->getDebugLoc(); 1402 unsigned Opc = I->getOpcode(); 1403 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 1404 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 1405 1406 if (!hasReservedCallFrame(MF)) { 1407 Amount = alignTo(Amount, getStackAlign()); 1408 assert(isUInt<32>(Amount) && "exceeded stack address space size"); 1409 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1410 Register SPReg = MFI->getStackPtrOffsetReg(); 1411 1412 unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 1413 BuildMI(MBB, I, DL, TII->get(Op), SPReg) 1414 .addReg(SPReg) 1415 .addImm(Amount * getScratchScaleFactor(ST)); 1416 } else if (CalleePopAmount != 0) { 1417 llvm_unreachable("is this used?"); 1418 } 1419 1420 return MBB.erase(I); 1421 } 1422 1423 /// Returns true if the frame will require a reference to the stack pointer. 1424 /// 1425 /// This is the set of conditions common to setting up the stack pointer in a 1426 /// kernel, and for using a frame pointer in a callable function. 1427 /// 1428 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm 1429 /// references SP. 1430 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { 1431 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); 1432 } 1433 1434 // The FP for kernels is always known 0, so we never really need to setup an 1435 // explicit register for it. However, DisableFramePointerElim will force us to 1436 // use a register for it. 1437 bool SIFrameLowering::hasFP(const MachineFunction &MF) const { 1438 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1439 1440 // For entry functions we can use an immediate offset in most cases, so the 1441 // presence of calls doesn't imply we need a distinct frame pointer. 1442 if (MFI.hasCalls() && 1443 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1444 // All offsets are unsigned, so need to be addressed in the same direction 1445 // as stack growth. 1446 1447 // FIXME: This function is pretty broken, since it can be called before the 1448 // frame layout is determined or CSR spills are inserted. 1449 return MFI.getStackSize() != 0; 1450 } 1451 1452 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || 1453 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) || 1454 MF.getTarget().Options.DisableFramePointerElim(MF); 1455 } 1456 1457 // This is essentially a reduced version of hasFP for entry functions. Since the 1458 // stack pointer is known 0 on entry to kernels, we never really need an FP 1459 // register. We may need to initialize the stack pointer depending on the frame 1460 // properties, which logically overlaps many of the cases where an ordinary 1461 // function would require an FP. 1462 bool SIFrameLowering::requiresStackPointerReference( 1463 const MachineFunction &MF) const { 1464 // Callable functions always require a stack pointer reference. 1465 assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() && 1466 "only expected to call this for entry points"); 1467 1468 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1469 1470 // Entry points ordinarily don't need to initialize SP. We have to set it up 1471 // for callees if there are any. Also note tail calls are impossible/don't 1472 // make any sense for kernels. 1473 if (MFI.hasCalls()) 1474 return true; 1475 1476 // We still need to initialize the SP if we're doing anything weird that 1477 // references the SP, like variable sized stack objects. 1478 return frameTriviallyRequiresSP(MFI); 1479 } 1480