Lines Matching +full:csr +full:- +full:offset
1 //===----------------------- SIFrameLowering.cpp --------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //==-----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "frame-info"
24 "amdgpu-spill-vgpr-to-agpr",
46 // callee-save registers since they may appear to be free when this is called
81 unsigned Size = TRI->getSpillSize(RC); in getVGPRSpillLaneOrTempRegister()
82 Align Alignment = TRI->getSpillAlign(RC); in getVGPRSpillLaneOrTempRegister()
97 if (TRI->spillSGPRToVGPR() && in getVGPRSpillLaneOrTempRegister()
98 MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true, in getVGPRSpillLaneOrTempRegister()
102 MFI->addToPrologEpilogSGPRSpills( in getVGPRSpillLaneOrTempRegister()
106 LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front(); in getVGPRSpillLaneOrTempRegister()
115 MFI->addToPrologEpilogSGPRSpills( in getVGPRSpillLaneOrTempRegister()
122 MFI->addToPrologEpilogSGPRSpills( in getVGPRSpillLaneOrTempRegister()
181 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); in buildGitPtr()
182 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); in buildGitPtr()
183 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); in buildGitPtr()
184 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); in buildGitPtr()
185 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); in buildGitPtr()
187 if (MFI->getGITPtrHigh() != 0xffffffff) { in buildGitPtr()
189 .addImm(MFI->getGITPtrHigh()) in buildGitPtr()
192 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo); in buildGitPtr()
195 Register GitPtrLo = MFI->getGITPtrLoReg(*MF); in buildGitPtr()
196 MF->getRegInfo().addLiveIn(GitPtrLo); in buildGitPtr()
257 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) in saveToMemory()
271 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); in saveToVGPRLane()
278 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR), in saveToVGPRLane()
287 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg) in copyToScratchSGPR()
308 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) in restoreFromMemory()
317 FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI); in restoreFromVGPRLane()
324 BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg) in restoreFromVGPRLane()
331 BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg) in copyFromScratchSGPR()
381 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
387 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); in emitEntryFunctionFlatScratchInit()
404 // Extract the scratch offset from the descriptor in the GIT in emitEntryFunctionFlatScratchInit()
412 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); in emitEntryFunctionFlatScratchInit()
413 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; in emitEntryFunctionFlatScratchInit()
416 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); in emitEntryFunctionFlatScratchInit()
419 MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { in emitEntryFunctionFlatScratchInit()
426 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); in emitEntryFunctionFlatScratchInit()
427 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); in emitEntryFunctionFlatScratchInit()
431 // We now have the GIT ptr - now get the scratch descriptor from the entry in emitEntryFunctionFlatScratchInit()
432 // at offset 0 (or offset 16 for a compute shader). in emitEntryFunctionFlatScratchInit()
434 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); in emitEntryFunctionFlatScratchInit()
440 unsigned Offset = in emitEntryFunctionFlatScratchInit() local
443 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); in emitEntryFunctionFlatScratchInit()
446 .addImm(EncodedOffset) // offset in emitEntryFunctionFlatScratchInit()
450 // Mask the offset in [47:0] of the descriptor in emitEntryFunctionFlatScratchInit()
451 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); in emitEntryFunctionFlatScratchInit()
455 And->getOperand(3).setIsDead(); // Mark SCC as dead. in emitEntryFunctionFlatScratchInit()
458 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); in emitEntryFunctionFlatScratchInit()
465 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); in emitEntryFunctionFlatScratchInit()
466 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); in emitEntryFunctionFlatScratchInit()
469 // Do a 64-bit pointer add. in emitEntryFunctionFlatScratchInit()
472 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) in emitEntryFunctionFlatScratchInit()
475 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), in emitEntryFunctionFlatScratchInit()
479 Addc->getOperand(3).setIsDead(); // Mark SCC as dead. in emitEntryFunctionFlatScratchInit()
482 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)) in emitEntryFunctionFlatScratchInit()
485 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)) in emitEntryFunctionFlatScratchInit()
492 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) in emitEntryFunctionFlatScratchInit()
495 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), in emitEntryFunctionFlatScratchInit()
499 Addc->getOperand(3).setIsDead(); // Mark SCC as dead. in emitEntryFunctionFlatScratchInit()
507 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) in emitEntryFunctionFlatScratchInit()
510 // Add wave offset in bytes to private base offset. in emitEntryFunctionFlatScratchInit()
512 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo) in emitEntryFunctionFlatScratchInit()
516 // Convert offset to 256-byte units. in emitEntryFunctionFlatScratchInit()
517 auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), in emitEntryFunctionFlatScratchInit()
521 LShr->getOperand(3).setIsDead(); // Mark SCC as dead. in emitEntryFunctionFlatScratchInit()
542 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); in getEntryFunctionReservedScratchRsrcReg()
546 assert(MFI->isEntryFunction()); in getEntryFunctionReservedScratchRsrcReg()
548 Register ScratchRsrcReg = MFI->getScratchRSrcReg(); in getEntryFunctionReservedScratchRsrcReg()
555 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) in getEntryFunctionReservedScratchRsrcReg()
567 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; in getEntryFunctionReservedScratchRsrcReg()
568 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); in getEntryFunctionReservedScratchRsrcReg()
573 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); in getEntryFunctionReservedScratchRsrcReg()
579 (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) { in getEntryFunctionReservedScratchRsrcReg()
581 MFI->setScratchRSrcReg(Reg); in getEntryFunctionReservedScratchRsrcReg()
596 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); in emitEntryFunctionPrologue()
612 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); in emitEntryFunctionPrologue()
617 assert(MFI->isEntryFunction()); in emitEntryFunctionPrologue()
619 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( in emitEntryFunctionPrologue()
646 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); in emitEntryFunctionPrologue()
648 // We added live-ins during argument lowering, but since they were not in emitEntryFunctionPrologue()
662 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR in emitEntryFunctionPrologue()
664 // wave offset to a free SGPR. in emitEntryFunctionPrologue()
667 TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { in emitEntryFunctionPrologue()
668 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); in emitEntryFunctionPrologue()
669 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); in emitEntryFunctionPrologue()
672 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); in emitEntryFunctionPrologue()
675 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { in emitEntryFunctionPrologue()
677 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) in emitEntryFunctionPrologue()
687 "could not find temporary scratch offset register in prolog"); in emitEntryFunctionPrologue()
694 Register FPReg = MFI->getFrameOffsetReg(); in emitEntryFunctionPrologue()
696 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); in emitEntryFunctionPrologue()
700 Register SPReg = MFI->getStackPtrOffsetReg(); in emitEntryFunctionPrologue()
702 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) in emitEntryFunctionPrologue()
707 MFI->getUserSGPRInfo().hasFlatScratchInit() && in emitEntryFunctionPrologue()
736 const SIRegisterInfo *TRI = &TII->getRegisterInfo(); in emitEntryFunctionScratchRsrcRegSetup()
741 // The pointer to the GIT is formed from the offset passed in and either in emitEntryFunctionScratchRsrcRegSetup()
742 // the amdgpu-git-ptr-high function attribute or the top part of the PC in emitEntryFunctionScratchRsrcRegSetup()
743 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); in emitEntryFunctionScratchRsrcRegSetup()
744 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); in emitEntryFunctionScratchRsrcRegSetup()
748 // We now have the GIT ptr - now get the scratch descriptor from the entry in emitEntryFunctionScratchRsrcRegSetup()
749 // at offset 0 (or offset 16 for a compute shader). in emitEntryFunctionScratchRsrcRegSetup()
751 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); in emitEntryFunctionScratchRsrcRegSetup()
757 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; in emitEntryFunctionScratchRsrcRegSetup() local
759 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); in emitEntryFunctionScratchRsrcRegSetup()
762 .addImm(EncodedOffset) // offset in emitEntryFunctionScratchRsrcRegSetup()
768 // descriptor / bits 22:21 of third sub-reg will be 0b11) in emitEntryFunctionScratchRsrcRegSetup()
770 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The in emitEntryFunctionScratchRsrcRegSetup()
775 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32); in emitEntryFunctionScratchRsrcRegSetup()
782 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); in emitEntryFunctionScratchRsrcRegSetup()
784 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); in emitEntryFunctionScratchRsrcRegSetup()
785 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); in emitEntryFunctionScratchRsrcRegSetup()
788 uint64_t Rsrc23 = TII->getScratchRsrcWords23(); in emitEntryFunctionScratchRsrcRegSetup()
790 if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) { in emitEntryFunctionScratchRsrcRegSetup()
791 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); in emitEntryFunctionScratchRsrcRegSetup()
794 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); in emitEntryFunctionScratchRsrcRegSetup()
797 .addReg(MFI->getImplicitBufferPtrUserSGPR()) in emitEntryFunctionScratchRsrcRegSetup()
800 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); in emitEntryFunctionScratchRsrcRegSetup()
809 .addReg(MFI->getImplicitBufferPtrUserSGPR()) in emitEntryFunctionScratchRsrcRegSetup()
810 .addImm(0) // offset in emitEntryFunctionScratchRsrcRegSetup()
815 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); in emitEntryFunctionScratchRsrcRegSetup()
816 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); in emitEntryFunctionScratchRsrcRegSetup()
819 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); in emitEntryFunctionScratchRsrcRegSetup()
820 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); in emitEntryFunctionScratchRsrcRegSetup()
842 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) in emitEntryFunctionScratchRsrcRegSetup()
847 // Add the scratch wave offset into the scratch RSRC. in emitEntryFunctionScratchRsrcRegSetup()
851 // cannot carry-out from bit 47, otherwise the scratch allocation would be in emitEntryFunctionScratchRsrcRegSetup()
852 // impossible to fit in the 48-bit global address space. in emitEntryFunctionScratchRsrcRegSetup()
856 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); in emitEntryFunctionScratchRsrcRegSetup()
857 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); in emitEntryFunctionScratchRsrcRegSetup()
861 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) in emitEntryFunctionScratchRsrcRegSetup()
865 auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) in emitEntryFunctionScratchRsrcRegSetup()
869 Addc->getOperand(3).setIsDead(); // Mark SCC as dead. in emitEntryFunctionScratchRsrcRegSetup()
897 const SIRegisterInfo &TRI = TII->getRegisterInfo(); in buildScratchExecCopy()
915 BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1); in buildScratchExecCopy()
916 SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead. in buildScratchExecCopy()
928 const SIRegisterInfo &TRI = TII->getRegisterInfo(); in emitCSRSpillStores()
930 // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch in emitCSRSpillStores()
931 // registers. However, save all lanes of callee-saved VGPRs. Due to this, we in emitCSRSpillStores()
935 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); in emitCSRSpillStores()
955 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1); in emitCSRSpillStores()
967 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec()) in emitCSRSpillStores()
972 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); in emitCSRSpillStores()
974 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) { in emitCSRSpillStores()
992 FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs); in emitCSRSpillStores()
1014 const SIRegisterInfo &TRI = TII->getRegisterInfo(); in emitCSRSpillRestores()
1015 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); in emitCSRSpillRestores()
1017 for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) { in emitCSRSpillRestores()
1033 // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the in emitCSRSpillRestores()
1034 // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to in emitCSRSpillRestores()
1038 FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs); in emitCSRSpillRestores()
1058 BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1); in emitCSRSpillRestores()
1070 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec()) in emitCSRSpillRestores()
1078 if (FuncInfo->isEntryFunction()) { in emitPrologue()
1086 const SIRegisterInfo &TRI = TII->getRegisterInfo(); in emitPrologue()
1089 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); in emitPrologue()
1090 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); in emitPrologue()
1100 if (FuncInfo->isChainFunction()) { in emitPrologue()
1107 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg) in emitPrologue()
1122 // Emit the CSR spill stores with SP base register. in emitPrologue()
1124 FuncInfo->isChainFunction() ? Register() : StackPtrReg, in emitPrologue()
1127 // CSR spill stores will use FP as base register. in emitPrologue()
1129 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); in emitPrologue()
1138 FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI, in emitPrologue()
1151 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy) in emitPrologue()
1167 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg) in emitPrologue()
1169 .addImm((Alignment - 1) * getScratchScaleFactor(ST)) in emitPrologue()
1171 auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) in emitPrologue()
1173 .addImm(-Alignment * getScratchScaleFactor(ST)) in emitPrologue()
1175 And->getOperand(3).setIsDead(); // Mark SCC as dead. in emitPrologue()
1176 FuncInfo->setIsStackRealigned(true); in emitPrologue()
1178 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) in emitPrologue()
1183 // If FP is used, emit the CSR spills with FP base register. in emitPrologue()
1196 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) in emitPrologue()
1202 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) in emitPrologue()
1206 Add->getOperand(3).setIsDead(); // Mark SCC as dead. in emitPrologue()
1209 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg); in emitPrologue()
1219 bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg); in emitPrologue()
1230 if (FuncInfo->isEntryFunction()) in emitEpilogue()
1235 const SIRegisterInfo &TRI = TII->getRegisterInfo(); in emitEpilogue()
1245 DL = MBBI->getDebugLoc(); in emitEpilogue()
1252 uint32_t RoundedSize = FuncInfo->isStackRealigned() in emitEpilogue()
1255 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); in emitEpilogue()
1256 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); in emitEpilogue()
1257 bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg); in emitEpilogue()
1261 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); in emitEpilogue()
1263 // CSR spill restores should use FP as base register. If in emitEpilogue()
1284 auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) in emitEpilogue()
1286 .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST))) in emitEpilogue()
1288 Add->getOperand(3).setIsDead(); // Mark SCC as dead. in emitEpilogue()
1296 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) in emitEpilogue()
1301 // Insert the CSR spill restores with SP as the base register. in emitEpilogue()
1315 !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) { in allSGPRSpillsAreDead()
1329 FrameReg = RI->getFrameRegister(MF); in getFrameIndexReference()
1348 FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall(); in processFunctionBeforeFrameFinalized()
1349 if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) { in processFunctionBeforeFrameFinalized()
1350 for (Register Reg : FuncInfo->getWWMReservedRegs()) { in processFunctionBeforeFrameFinalized()
1351 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); in processFunctionBeforeFrameFinalized()
1352 FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), in processFunctionBeforeFrameFinalized()
1353 TRI->getSpillAlign(*RC)); in processFunctionBeforeFrameFinalized()
1357 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() in processFunctionBeforeFrameFinalized()
1373 if (TII->isVGPRSpill(MI)) { in processFunctionBeforeFrameFinalized()
1380 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); in processFunctionBeforeFrameFinalized()
1381 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, in processFunctionBeforeFrameFinalized()
1382 TRI->isAGPR(MRI, VReg))) { in processFunctionBeforeFrameFinalized()
1384 RS->enterBasicBlockEnd(MBB); in processFunctionBeforeFrameFinalized()
1385 RS->backward(std::next(MI.getIterator())); in processFunctionBeforeFrameFinalized()
1386 TRI->eliminateFrameIndex(MI, 0, FIOp, RS); in processFunctionBeforeFrameFinalized()
1390 } else if (TII->isStoreToStackSlot(MI, FrameIndex) || in processFunctionBeforeFrameFinalized()
1391 TII->isLoadFromStackSlot(MI, FrameIndex)) in processFunctionBeforeFrameFinalized()
1401 FuncInfo->setVGPRToAGPRSpillDead(FI); in processFunctionBeforeFrameFinalized()
1404 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) in processFunctionBeforeFrameFinalized()
1407 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) in processFunctionBeforeFrameFinalized()
1431 FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true); in processFunctionBeforeFrameFinalized()
1442 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); in processFunctionBeforeFrameFinalized()
1448 RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false)); in processFunctionBeforeFrameFinalized()
1465 Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy(); in processFunctionBeforeFrameIndicesReplaced()
1467 TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); in processFunctionBeforeFrameIndicesReplaced()
1468 if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) < in processFunctionBeforeFrameIndicesReplaced()
1469 TRI->getHWRegIndex(VGPRForAGPRCopy))) { in processFunctionBeforeFrameIndicesReplaced()
1474 FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR); in processFunctionBeforeFrameIndicesReplaced()
1480 Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg(); in processFunctionBeforeFrameIndicesReplaced()
1482 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF); in processFunctionBeforeFrameIndicesReplaced()
1488 FuncInfo->setLongBranchReservedReg(UnusedLowSGPR); in processFunctionBeforeFrameIndicesReplaced()
1511 const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass(); in determinePrologEpilogSGPRSaves()
1513 Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy(); in determinePrologEpilogSGPRSaves()
1522 MFI->setSGPRForEXECCopy(UnusedScratchReg); in determinePrologEpilogSGPRSaves()
1527 assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) && in determinePrologEpilogSGPRSaves()
1528 "Re-reserving spill slot for EXEC copy register"); in determinePrologEpilogSGPRSaves()
1533 // Reset it at this point. There are no whole-wave copies and spills in determinePrologEpilogSGPRSaves()
1535 MFI->setSGPRForEXECCopy(AMDGPU::NoRegister); in determinePrologEpilogSGPRSaves()
1542 // Note a new VGPR CSR may be introduced if one is used for the spill, but we in determinePrologEpilogSGPRSaves()
1551 Register FramePtrReg = MFI->getFrameOffsetReg(); in determinePrologEpilogSGPRSaves()
1552 assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) && in determinePrologEpilogSGPRSaves()
1553 "Re-reserving spill slot for FP"); in determinePrologEpilogSGPRSaves()
1557 if (TRI->hasBasePointer(MF)) { in determinePrologEpilogSGPRSaves()
1558 Register BasePtrReg = TRI->getBaseRegister(); in determinePrologEpilogSGPRSaves()
1559 assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) && in determinePrologEpilogSGPRSaves()
1560 "Re-reserving spill slot for BP"); in determinePrologEpilogSGPRSaves()
1574 if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall()) in determineCalleeSaves()
1577 MFI->shiftSpillPhysVGPRsToLowestRange(MF); in determineCalleeSaves()
1580 if (MFI->isEntryFunction()) in determineCalleeSaves()
1593 // marked Caller-saved. in determineCalleeSaves()
1600 MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg()); in determineCalleeSaves()
1602 MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg()); in determineCalleeSaves()
1603 else if (TII->isWWMRegSpillOpcode(MI.getOpcode())) in determineCalleeSaves()
1607 (MFI->isChainFunction() && in determineCalleeSaves()
1608 TII->isChainCallOpcode(MI.getOpcode()))) { in determineCalleeSaves()
1612 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); }))); in determineCalleeSaves()
1619 // This prevents CSR restore from clobbering return VGPRs. in determineCalleeSaves()
1621 for (auto &Op : ReturnMI->operands()) { in determineCalleeSaves()
1628 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); in determineCalleeSaves()
1634 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); in determineCalleeSaves()
1638 // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't in determineCalleeSaves()
1640 for (auto &Reg : MFI->getWWMSpills()) in determineCalleeSaves()
1645 for (auto &Reg : MFI->getWWMSpills()) in determineCalleeSaves()
1657 if (MFI->isEntryFunction()) in determineCalleeSavesSGPR()
1664 SavedRegs.reset(MFI->getStackPtrOffsetReg()); in determineCalleeSavesSGPR()
1667 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask()); in determineCalleeSavesSGPR()
1669 // We have to anticipate introducing CSR VGPR spills or spill of caller in determineCalleeSavesSGPR()
1673 // there are any SGPR spills. Whether they are CSR spills or otherwise. in determineCalleeSavesSGPR()
1676 FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs()); in determineCalleeSavesSGPR()
1680 SavedRegs.reset(MFI->getFrameOffsetReg()); in determineCalleeSavesSGPR()
1684 // does not use CSR list, the clobbering of return address by function calls in determineCalleeSavesSGPR()
1689 Register RetAddrReg = TRI->getReturnAddressReg(MF); in determineCalleeSavesSGPR()
1690 if (!MFI->isEntryFunction() && in determineCalleeSavesSGPR()
1692 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0)); in determineCalleeSavesSGPR()
1693 SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1)); in determineCalleeSavesSGPR()
1706 Register FramePtrReg = FuncInfo->getFrameOffsetReg(); in assignCalleeSavedSpillSlots()
1707 Register BasePtrReg = RI->getBaseRegister(); in assignCalleeSavedSpillSlots()
1709 FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg); in assignCalleeSavedSpillSlots()
1711 FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg); in assignCalleeSavedSpillSlots()
1725 if (--NumModifiedRegs) in assignCalleeSavedSpillSlots()
1729 if (--NumModifiedRegs) in assignCalleeSavedSpillSlots()
1744 uint64_t MaxOffset = EstStackSize - 1; in allocateScavengingFrameIndexesNearIncomingSP()
1747 // MUBUF/flat scratch immediate offset from the base register, so assign these in allocateScavengingFrameIndexesNearIncomingSP()
1754 if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, in allocateScavengingFrameIndexesNearIncomingSP()
1758 if (TII->isLegalMUBUFImmOffset(MaxOffset)) in allocateScavengingFrameIndexesNearIncomingSP()
1769 int64_t Amount = I->getOperand(0).getImm(); in eliminateCallFramePseudoInstr()
1775 const DebugLoc &DL = I->getDebugLoc(); in eliminateCallFramePseudoInstr()
1776 unsigned Opc = I->getOpcode(); in eliminateCallFramePseudoInstr()
1777 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); in eliminateCallFramePseudoInstr()
1778 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; in eliminateCallFramePseudoInstr()
1784 Register SPReg = MFI->getStackPtrOffsetReg(); in eliminateCallFramePseudoInstr()
1788 Amount = -Amount; in eliminateCallFramePseudoInstr()
1789 auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg) in eliminateCallFramePseudoInstr()
1792 Add->getOperand(3).setIsDead(); // Mark SCC as dead. in eliminateCallFramePseudoInstr()
1817 // For entry & chain functions we can use an immediate offset in most cases, in hasFP()
1820 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() && in hasFP()
1821 !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) { in hasFP()
1826 // frame layout is determined or CSR spills are inserted. in hasFP()
1831 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment( in hasFP()
1846 assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() || in requiresStackPointerReference()
1847 MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) && in requiresStackPointerReference()