10b57cec5SDimitry Andric //===----------------------- SIFrameLowering.cpp --------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "SIFrameLowering.h"
10e8d8bef9SDimitry Andric #include "AMDGPU.h"
11e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
120b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h"
145f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
17e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info"
220b57cec5SDimitry Andric
23fe6060f1SDimitry Andric static cl::opt<bool> EnableSpillVGPRToAGPR(
24fe6060f1SDimitry Andric "amdgpu-spill-vgpr-to-agpr",
25fe6060f1SDimitry Andric cl::desc("Enable spilling VGPRs to AGPRs"),
26fe6060f1SDimitry Andric cl::ReallyHidden,
27fe6060f1SDimitry Andric cl::init(true));
280b57cec5SDimitry Andric
295f757f3fSDimitry Andric // Find a register matching \p RC from \p LiveUnits which is unused and
305f757f3fSDimitry Andric // available throughout the function. On failure, returns AMDGPU::NoRegister.
315f757f3fSDimitry Andric // TODO: Rewrite the loop here to iterate over MCRegUnits instead of
325f757f3fSDimitry Andric // MCRegisters. This should reduce the number of iterations and avoid redundant
335f757f3fSDimitry Andric // checking.
findUnusedRegister(MachineRegisterInfo & MRI,const LiveRegUnits & LiveUnits,const TargetRegisterClass & RC)34bdd1243dSDimitry Andric static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
355f757f3fSDimitry Andric const LiveRegUnits &LiveUnits,
36bdd1243dSDimitry Andric const TargetRegisterClass &RC) {
37bdd1243dSDimitry Andric for (MCRegister Reg : RC) {
385f757f3fSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
395f757f3fSDimitry Andric !MRI.isReserved(Reg))
40bdd1243dSDimitry Andric return Reg;
41bdd1243dSDimitry Andric }
42bdd1243dSDimitry Andric return MCRegister();
43bdd1243dSDimitry Andric }
44bdd1243dSDimitry Andric
45fe6060f1SDimitry Andric // Find a scratch register that we can use in the prologue. We avoid using
46fe6060f1SDimitry Andric // callee-save registers since they may appear to be free when this is called
47fe6060f1SDimitry Andric // from canUseAsPrologue (during shrink wrapping), but then no longer be free
48fe6060f1SDimitry Andric // when this is called from emitPrologue.
findScratchNonCalleeSaveRegister(MachineRegisterInfo & MRI,LiveRegUnits & LiveUnits,const TargetRegisterClass & RC,bool Unused=false)495f757f3fSDimitry Andric static MCRegister findScratchNonCalleeSaveRegister(
505f757f3fSDimitry Andric MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
515f757f3fSDimitry Andric const TargetRegisterClass &RC, bool Unused = false) {
520b57cec5SDimitry Andric // Mark callee saved registers as used so we will not choose them.
530b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
540b57cec5SDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i)
555f757f3fSDimitry Andric LiveUnits.addReg(CSRegs[i]);
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric // We are looking for a register that can be used throughout the entire
580b57cec5SDimitry Andric // function, so any use is unacceptable.
59bdd1243dSDimitry Andric if (Unused)
605f757f3fSDimitry Andric return findUnusedRegister(MRI, LiveUnits, RC);
61bdd1243dSDimitry Andric
625ffd83dbSDimitry Andric for (MCRegister Reg : RC) {
635f757f3fSDimitry Andric if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
640b57cec5SDimitry Andric return Reg;
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric
675ffd83dbSDimitry Andric return MCRegister();
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric
7006c3fb27SDimitry Andric /// Query target location for spilling SGPRs
7106c3fb27SDimitry Andric /// \p IncludeScratchCopy : Also look for free scratch SGPRs
getVGPRSpillLaneOrTempRegister(MachineFunction & MF,LiveRegUnits & LiveUnits,Register SGPR,const TargetRegisterClass & RC=AMDGPU::SReg_32_XM0_XEXECRegClass,bool IncludeScratchCopy=true)72bdd1243dSDimitry Andric static void getVGPRSpillLaneOrTempRegister(
735f757f3fSDimitry Andric MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
7406c3fb27SDimitry Andric const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
7506c3fb27SDimitry Andric bool IncludeScratchCopy = true) {
765ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
775ffd83dbSDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
785ffd83dbSDimitry Andric
795ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
805ffd83dbSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
81bdd1243dSDimitry Andric unsigned Size = TRI->getSpillSize(RC);
82bdd1243dSDimitry Andric Align Alignment = TRI->getSpillAlign(RC);
835ffd83dbSDimitry Andric
84bdd1243dSDimitry Andric // We need to save and restore the given SGPR.
855ffd83dbSDimitry Andric
8606c3fb27SDimitry Andric Register ScratchSGPR;
875f757f3fSDimitry Andric // 1: Try to save the given register into an unused scratch SGPR. The
885f757f3fSDimitry Andric // LiveUnits should have all the callee saved registers marked as used. For
895f757f3fSDimitry Andric // certain cases we skip copy to scratch SGPR.
9006c3fb27SDimitry Andric if (IncludeScratchCopy)
915f757f3fSDimitry Andric ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
92bdd1243dSDimitry Andric
93bdd1243dSDimitry Andric if (!ScratchSGPR) {
94bdd1243dSDimitry Andric int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
955ffd83dbSDimitry Andric TargetStackID::SGPRSpill);
965ffd83dbSDimitry Andric
97bdd1243dSDimitry Andric if (TRI->spillSGPRToVGPR() &&
987a6dacacSDimitry Andric MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
997a6dacacSDimitry Andric /*IsPrologEpilog=*/true)) {
100bdd1243dSDimitry Andric // 2: There's no free lane to spill, and no free register to save the
101bdd1243dSDimitry Andric // SGPR, so we're forced to take another VGPR to use for the spill.
102bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills(
103bdd1243dSDimitry Andric SGPR, PrologEpilogSGPRSaveRestoreInfo(
104bdd1243dSDimitry Andric SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
105e8d8bef9SDimitry Andric
1065f757f3fSDimitry Andric LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
107bdd1243dSDimitry Andric dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
1085f757f3fSDimitry Andric << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
1095f757f3fSDimitry Andric << '\n';);
1105ffd83dbSDimitry Andric } else {
111bdd1243dSDimitry Andric // Remove dead <FI> index
112bdd1243dSDimitry Andric MF.getFrameInfo().RemoveStackObject(FI);
113bdd1243dSDimitry Andric // 3: If all else fails, spill the register to memory.
114bdd1243dSDimitry Andric FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
115bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills(
116bdd1243dSDimitry Andric SGPR,
117bdd1243dSDimitry Andric PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));
118bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
119bdd1243dSDimitry Andric << printReg(SGPR, TRI) << '\n');
1205ffd83dbSDimitry Andric }
1215ffd83dbSDimitry Andric } else {
122bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills(
123bdd1243dSDimitry Andric SGPR, PrologEpilogSGPRSaveRestoreInfo(
124bdd1243dSDimitry Andric SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
1255f757f3fSDimitry Andric LiveUnits.addReg(ScratchSGPR);
126bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
127bdd1243dSDimitry Andric << printReg(ScratchSGPR, TRI) << '\n');
1285ffd83dbSDimitry Andric }
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric // We need to specially emit stack operations here because a different frame
1320b57cec5SDimitry Andric // register is used than in the rest of the function, as getFrameRegister would
1330b57cec5SDimitry Andric // use.
buildPrologSpill(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register SpillReg,int FI,Register FrameReg,int64_t DwordOff=0)134fe6060f1SDimitry Andric static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
135fe6060f1SDimitry Andric const SIMachineFunctionInfo &FuncInfo,
1365f757f3fSDimitry Andric LiveRegUnits &LiveUnits, MachineFunction &MF,
137e8d8bef9SDimitry Andric MachineBasicBlock &MBB,
138349cc55cSDimitry Andric MachineBasicBlock::iterator I, const DebugLoc &DL,
139bdd1243dSDimitry Andric Register SpillReg, int FI, Register FrameReg,
140bdd1243dSDimitry Andric int64_t DwordOff = 0) {
141fe6060f1SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142fe6060f1SDimitry Andric : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1430b57cec5SDimitry Andric
144fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
145fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
146fe6060f1SDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand(
147fe6060f1SDimitry Andric PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
148fe6060f1SDimitry Andric FrameInfo.getObjectAlign(FI));
1495f757f3fSDimitry Andric LiveUnits.addReg(SpillReg);
150bdd1243dSDimitry Andric bool IsKill = !MBB.isLiveIn(SpillReg);
151bdd1243dSDimitry Andric TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
1525f757f3fSDimitry Andric DwordOff, MMO, nullptr, &LiveUnits);
153bdd1243dSDimitry Andric if (IsKill)
1545f757f3fSDimitry Andric LiveUnits.removeReg(SpillReg);
155e8d8bef9SDimitry Andric }
156e8d8bef9SDimitry Andric
buildEpilogRestore(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register SpillReg,int FI,Register FrameReg,int64_t DwordOff=0)157fe6060f1SDimitry Andric static void buildEpilogRestore(const GCNSubtarget &ST,
158fe6060f1SDimitry Andric const SIRegisterInfo &TRI,
159fe6060f1SDimitry Andric const SIMachineFunctionInfo &FuncInfo,
1605f757f3fSDimitry Andric LiveRegUnits &LiveUnits, MachineFunction &MF,
161e8d8bef9SDimitry Andric MachineBasicBlock &MBB,
162349cc55cSDimitry Andric MachineBasicBlock::iterator I,
163bdd1243dSDimitry Andric const DebugLoc &DL, Register SpillReg, int FI,
164bdd1243dSDimitry Andric Register FrameReg, int64_t DwordOff = 0) {
165fe6060f1SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166fe6060f1SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1670b57cec5SDimitry Andric
168fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
169fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
170fe6060f1SDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand(
171fe6060f1SDimitry Andric PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
172fe6060f1SDimitry Andric FrameInfo.getObjectAlign(FI));
173bdd1243dSDimitry Andric TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
1745f757f3fSDimitry Andric DwordOff, MMO, nullptr, &LiveUnits);
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric
buildGitPtr(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,const SIInstrInfo * TII,Register TargetReg)177e8d8bef9SDimitry Andric static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
178e8d8bef9SDimitry Andric const DebugLoc &DL, const SIInstrInfo *TII,
179e8d8bef9SDimitry Andric Register TargetReg) {
180e8d8bef9SDimitry Andric MachineFunction *MF = MBB.getParent();
181e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
182e8d8bef9SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
183e8d8bef9SDimitry Andric const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
184e8d8bef9SDimitry Andric Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
185e8d8bef9SDimitry Andric Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
186e8d8bef9SDimitry Andric
187e8d8bef9SDimitry Andric if (MFI->getGITPtrHigh() != 0xffffffff) {
188e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, SMovB32, TargetHi)
189e8d8bef9SDimitry Andric .addImm(MFI->getGITPtrHigh())
190e8d8bef9SDimitry Andric .addReg(TargetReg, RegState::ImplicitDefine);
191e8d8bef9SDimitry Andric } else {
1927a6dacacSDimitry Andric const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
193e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, GetPC64, TargetReg);
194e8d8bef9SDimitry Andric }
195e8d8bef9SDimitry Andric Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
196e8d8bef9SDimitry Andric MF->getRegInfo().addLiveIn(GitPtrLo);
197e8d8bef9SDimitry Andric MBB.addLiveIn(GitPtrLo);
198e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, SMovB32, TargetLo)
199e8d8bef9SDimitry Andric .addReg(GitPtrLo);
200e8d8bef9SDimitry Andric }
201e8d8bef9SDimitry Andric
initLiveUnits(LiveRegUnits & LiveUnits,const SIRegisterInfo & TRI,const SIMachineFunctionInfo * FuncInfo,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)2025f757f3fSDimitry Andric static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
203bdd1243dSDimitry Andric const SIMachineFunctionInfo *FuncInfo,
204bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB,
205bdd1243dSDimitry Andric MachineBasicBlock::iterator MBBI, bool IsProlog) {
2065f757f3fSDimitry Andric if (LiveUnits.empty()) {
2075f757f3fSDimitry Andric LiveUnits.init(TRI);
208bdd1243dSDimitry Andric if (IsProlog) {
2095f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB);
210bdd1243dSDimitry Andric } else {
211bdd1243dSDimitry Andric // In epilog.
2125f757f3fSDimitry Andric LiveUnits.addLiveOuts(MBB);
2135f757f3fSDimitry Andric LiveUnits.stepBackward(*MBBI);
214bdd1243dSDimitry Andric }
215bdd1243dSDimitry Andric }
216bdd1243dSDimitry Andric }
217bdd1243dSDimitry Andric
218bdd1243dSDimitry Andric namespace llvm {
219bdd1243dSDimitry Andric
220bdd1243dSDimitry Andric // SpillBuilder to save/restore special SGPR spills like the one needed for FP,
221bdd1243dSDimitry Andric // BP, etc. These spills are delayed until the current function's frame is
222bdd1243dSDimitry Andric // finalized. For a given register, the builder uses the
223bdd1243dSDimitry Andric // PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
224bdd1243dSDimitry Andric class PrologEpilogSGPRSpillBuilder {
225bdd1243dSDimitry Andric MachineBasicBlock::iterator MI;
226bdd1243dSDimitry Andric MachineBasicBlock &MBB;
227bdd1243dSDimitry Andric MachineFunction &MF;
228bdd1243dSDimitry Andric const GCNSubtarget &ST;
229bdd1243dSDimitry Andric MachineFrameInfo &MFI;
230bdd1243dSDimitry Andric SIMachineFunctionInfo *FuncInfo;
231bdd1243dSDimitry Andric const SIInstrInfo *TII;
232bdd1243dSDimitry Andric const SIRegisterInfo &TRI;
233bdd1243dSDimitry Andric Register SuperReg;
234bdd1243dSDimitry Andric const PrologEpilogSGPRSaveRestoreInfo SI;
2355f757f3fSDimitry Andric LiveRegUnits &LiveUnits;
236bdd1243dSDimitry Andric const DebugLoc &DL;
237bdd1243dSDimitry Andric Register FrameReg;
238bdd1243dSDimitry Andric ArrayRef<int16_t> SplitParts;
239bdd1243dSDimitry Andric unsigned NumSubRegs;
240bdd1243dSDimitry Andric unsigned EltSize = 4;
241bdd1243dSDimitry Andric
saveToMemory(const int FI) const242bdd1243dSDimitry Andric void saveToMemory(const int FI) const {
243bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
244bdd1243dSDimitry Andric assert(!MFI.isDeadObjectIndex(FI));
245bdd1243dSDimitry Andric
2465f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
247bdd1243dSDimitry Andric
248bdd1243dSDimitry Andric MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2495f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
250bdd1243dSDimitry Andric if (!TmpVGPR)
251bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register");
252bdd1243dSDimitry Andric
253bdd1243dSDimitry Andric for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
254bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
255bdd1243dSDimitry Andric ? SuperReg
256bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
257bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
258bdd1243dSDimitry Andric .addReg(SubReg);
259bdd1243dSDimitry Andric
2605f757f3fSDimitry Andric buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
261bdd1243dSDimitry Andric FI, FrameReg, DwordOff);
262bdd1243dSDimitry Andric DwordOff += 4;
263bdd1243dSDimitry Andric }
264bdd1243dSDimitry Andric }
265bdd1243dSDimitry Andric
saveToVGPRLane(const int FI) const266bdd1243dSDimitry Andric void saveToVGPRLane(const int FI) const {
267bdd1243dSDimitry Andric assert(!MFI.isDeadObjectIndex(FI));
268bdd1243dSDimitry Andric
269bdd1243dSDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
270bdd1243dSDimitry Andric ArrayRef<SIRegisterInfo::SpilledReg> Spill =
2715f757f3fSDimitry Andric FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
272bdd1243dSDimitry Andric assert(Spill.size() == NumSubRegs);
273bdd1243dSDimitry Andric
274bdd1243dSDimitry Andric for (unsigned I = 0; I < NumSubRegs; ++I) {
275bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
276bdd1243dSDimitry Andric ? SuperReg
277bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
2785f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
2795f757f3fSDimitry Andric Spill[I].VGPR)
280bdd1243dSDimitry Andric .addReg(SubReg)
281bdd1243dSDimitry Andric .addImm(Spill[I].Lane)
282bdd1243dSDimitry Andric .addReg(Spill[I].VGPR, RegState::Undef);
283bdd1243dSDimitry Andric }
284bdd1243dSDimitry Andric }
285bdd1243dSDimitry Andric
copyToScratchSGPR(Register DstReg) const286bdd1243dSDimitry Andric void copyToScratchSGPR(Register DstReg) const {
287bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
288bdd1243dSDimitry Andric .addReg(SuperReg)
289bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
290bdd1243dSDimitry Andric }
291bdd1243dSDimitry Andric
restoreFromMemory(const int FI)292bdd1243dSDimitry Andric void restoreFromMemory(const int FI) {
293bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
294bdd1243dSDimitry Andric
2955f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
296bdd1243dSDimitry Andric MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2975f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
298bdd1243dSDimitry Andric if (!TmpVGPR)
299bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register");
300bdd1243dSDimitry Andric
301bdd1243dSDimitry Andric for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
302bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
303bdd1243dSDimitry Andric ? SuperReg
304bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
305bdd1243dSDimitry Andric
3065f757f3fSDimitry Andric buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
3075f757f3fSDimitry Andric TmpVGPR, FI, FrameReg, DwordOff);
308bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
309bdd1243dSDimitry Andric .addReg(TmpVGPR, RegState::Kill);
310bdd1243dSDimitry Andric DwordOff += 4;
311bdd1243dSDimitry Andric }
312bdd1243dSDimitry Andric }
313bdd1243dSDimitry Andric
restoreFromVGPRLane(const int FI)314bdd1243dSDimitry Andric void restoreFromVGPRLane(const int FI) {
315bdd1243dSDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
316bdd1243dSDimitry Andric ArrayRef<SIRegisterInfo::SpilledReg> Spill =
3175f757f3fSDimitry Andric FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
318bdd1243dSDimitry Andric assert(Spill.size() == NumSubRegs);
319bdd1243dSDimitry Andric
320bdd1243dSDimitry Andric for (unsigned I = 0; I < NumSubRegs; ++I) {
321bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
322bdd1243dSDimitry Andric ? SuperReg
323bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
3245f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
325bdd1243dSDimitry Andric .addReg(Spill[I].VGPR)
326bdd1243dSDimitry Andric .addImm(Spill[I].Lane);
327bdd1243dSDimitry Andric }
328bdd1243dSDimitry Andric }
329bdd1243dSDimitry Andric
copyFromScratchSGPR(Register SrcReg) const330bdd1243dSDimitry Andric void copyFromScratchSGPR(Register SrcReg) const {
331bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
332bdd1243dSDimitry Andric .addReg(SrcReg)
333bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy);
334bdd1243dSDimitry Andric }
335bdd1243dSDimitry Andric
336bdd1243dSDimitry Andric public:
PrologEpilogSGPRSpillBuilder(Register Reg,const PrologEpilogSGPRSaveRestoreInfo SI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const DebugLoc & DL,const SIInstrInfo * TII,const SIRegisterInfo & TRI,LiveRegUnits & LiveUnits,Register FrameReg)337bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder(Register Reg,
338bdd1243dSDimitry Andric const PrologEpilogSGPRSaveRestoreInfo SI,
339bdd1243dSDimitry Andric MachineBasicBlock &MBB,
340bdd1243dSDimitry Andric MachineBasicBlock::iterator MI,
341bdd1243dSDimitry Andric const DebugLoc &DL, const SIInstrInfo *TII,
342bdd1243dSDimitry Andric const SIRegisterInfo &TRI,
3435f757f3fSDimitry Andric LiveRegUnits &LiveUnits, Register FrameReg)
344bdd1243dSDimitry Andric : MI(MI), MBB(MBB), MF(*MBB.getParent()),
345bdd1243dSDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
346bdd1243dSDimitry Andric FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
3475f757f3fSDimitry Andric SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
3485f757f3fSDimitry Andric FrameReg(FrameReg) {
349bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
350bdd1243dSDimitry Andric SplitParts = TRI.getRegSplitParts(RC, EltSize);
351bdd1243dSDimitry Andric NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
352bdd1243dSDimitry Andric
353bdd1243dSDimitry Andric assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
354bdd1243dSDimitry Andric }
355bdd1243dSDimitry Andric
save()356bdd1243dSDimitry Andric void save() {
357bdd1243dSDimitry Andric switch (SI.getKind()) {
358bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_MEM:
359bdd1243dSDimitry Andric return saveToMemory(SI.getIndex());
360bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_VGPR_LANE:
361bdd1243dSDimitry Andric return saveToVGPRLane(SI.getIndex());
362bdd1243dSDimitry Andric case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
363bdd1243dSDimitry Andric return copyToScratchSGPR(SI.getReg());
364bdd1243dSDimitry Andric }
365bdd1243dSDimitry Andric }
366bdd1243dSDimitry Andric
restore()367bdd1243dSDimitry Andric void restore() {
368bdd1243dSDimitry Andric switch (SI.getKind()) {
369bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_MEM:
370bdd1243dSDimitry Andric return restoreFromMemory(SI.getIndex());
371bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_VGPR_LANE:
372bdd1243dSDimitry Andric return restoreFromVGPRLane(SI.getIndex());
373bdd1243dSDimitry Andric case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
374bdd1243dSDimitry Andric return copyFromScratchSGPR(SI.getReg());
375bdd1243dSDimitry Andric }
376bdd1243dSDimitry Andric }
377bdd1243dSDimitry Andric };
378bdd1243dSDimitry Andric
379bdd1243dSDimitry Andric } // namespace llvm
380bdd1243dSDimitry Andric
3815ffd83dbSDimitry Andric // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
emitEntryFunctionFlatScratchInit(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register ScratchWaveOffsetReg) const3825ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionFlatScratchInit(
3835ffd83dbSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
3845ffd83dbSDimitry Andric const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
3855ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3860b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
3870b57cec5SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
3880b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric // We don't need this if we only have spills since there is no user facing
3910b57cec5SDimitry Andric // scratch.
3920b57cec5SDimitry Andric
3930b57cec5SDimitry Andric // TODO: If we know we don't have flat instructions earlier, we can omit
3940b57cec5SDimitry Andric // this from the input registers.
3950b57cec5SDimitry Andric //
3960b57cec5SDimitry Andric // TODO: We only need to know if we access scratch space through a flat
3970b57cec5SDimitry Andric // pointer. Because we only detect if flat instructions are used at all,
3980b57cec5SDimitry Andric // this will be used more often than necessary on VI.
3990b57cec5SDimitry Andric
400e8d8bef9SDimitry Andric Register FlatScrInitLo;
401e8d8bef9SDimitry Andric Register FlatScrInitHi;
402e8d8bef9SDimitry Andric
403e8d8bef9SDimitry Andric if (ST.isAmdPalOS()) {
404e8d8bef9SDimitry Andric // Extract the scratch offset from the descriptor in the GIT
4055f757f3fSDimitry Andric LiveRegUnits LiveUnits;
4065f757f3fSDimitry Andric LiveUnits.init(*TRI);
4075f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB);
408e8d8bef9SDimitry Andric
409e8d8bef9SDimitry Andric // Find unused reg to load flat scratch init into
410e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
411e8d8bef9SDimitry Andric Register FlatScrInit = AMDGPU::NoRegister;
412e8d8bef9SDimitry Andric ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
413e8d8bef9SDimitry Andric unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
414e8d8bef9SDimitry Andric AllSGPR64s = AllSGPR64s.slice(
415e8d8bef9SDimitry Andric std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
416e8d8bef9SDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
417e8d8bef9SDimitry Andric for (MCPhysReg Reg : AllSGPR64s) {
4185f757f3fSDimitry Andric if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
4195f757f3fSDimitry Andric MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
420e8d8bef9SDimitry Andric FlatScrInit = Reg;
421e8d8bef9SDimitry Andric break;
422e8d8bef9SDimitry Andric }
423e8d8bef9SDimitry Andric }
424e8d8bef9SDimitry Andric assert(FlatScrInit && "Failed to find free register for scratch init");
425e8d8bef9SDimitry Andric
426e8d8bef9SDimitry Andric FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427e8d8bef9SDimitry Andric FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
428e8d8bef9SDimitry Andric
429e8d8bef9SDimitry Andric buildGitPtr(MBB, I, DL, TII, FlatScrInit);
430e8d8bef9SDimitry Andric
431e8d8bef9SDimitry Andric // We now have the GIT ptr - now get the scratch descriptor from the entry
432e8d8bef9SDimitry Andric // at offset 0 (or offset 16 for a compute shader).
433e8d8bef9SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
434e8d8bef9SDimitry Andric const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
435e8d8bef9SDimitry Andric auto *MMO = MF.getMachineMemOperand(
436e8d8bef9SDimitry Andric PtrInfo,
437e8d8bef9SDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
438e8d8bef9SDimitry Andric MachineMemOperand::MODereferenceable,
439e8d8bef9SDimitry Andric 8, Align(4));
440e8d8bef9SDimitry Andric unsigned Offset =
441e8d8bef9SDimitry Andric MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
442e8d8bef9SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
443e8d8bef9SDimitry Andric unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
444e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
445e8d8bef9SDimitry Andric .addReg(FlatScrInit)
446e8d8bef9SDimitry Andric .addImm(EncodedOffset) // offset
447fe6060f1SDimitry Andric .addImm(0) // cpol
448e8d8bef9SDimitry Andric .addMemOperand(MMO);
449e8d8bef9SDimitry Andric
450e8d8bef9SDimitry Andric // Mask the offset in [47:0] of the descriptor
451e8d8bef9SDimitry Andric const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
452349cc55cSDimitry Andric auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
453e8d8bef9SDimitry Andric .addReg(FlatScrInitHi)
454e8d8bef9SDimitry Andric .addImm(0xffff);
455349cc55cSDimitry Andric And->getOperand(3).setIsDead(); // Mark SCC as dead.
456e8d8bef9SDimitry Andric } else {
4578bcb0991SDimitry Andric Register FlatScratchInitReg =
4588bcb0991SDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459e8d8bef9SDimitry Andric assert(FlatScratchInitReg);
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
4620b57cec5SDimitry Andric MRI.addLiveIn(FlatScratchInitReg);
4630b57cec5SDimitry Andric MBB.addLiveIn(FlatScratchInitReg);
4640b57cec5SDimitry Andric
465e8d8bef9SDimitry Andric FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466e8d8bef9SDimitry Andric FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
467e8d8bef9SDimitry Andric }
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric // Do a 64-bit pointer add.
4700b57cec5SDimitry Andric if (ST.flatScratchIsPointer()) {
4710b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
4720b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
4730b57cec5SDimitry Andric .addReg(FlatScrInitLo)
4740b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg);
475349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
476349cc55cSDimitry Andric FlatScrInitHi)
4770b57cec5SDimitry Andric .addReg(FlatScrInitHi)
4780b57cec5SDimitry Andric .addImm(0);
479349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
480349cc55cSDimitry Andric
481*0fca6ea1SDimitry Andric using namespace AMDGPU::Hwreg;
482*0fca6ea1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
483*0fca6ea1SDimitry Andric .addReg(FlatScrInitLo)
484*0fca6ea1SDimitry Andric .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));
485*0fca6ea1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))
486*0fca6ea1SDimitry Andric .addReg(FlatScrInitHi)
487*0fca6ea1SDimitry Andric .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));
4880b57cec5SDimitry Andric return;
4890b57cec5SDimitry Andric }
4900b57cec5SDimitry Andric
491e8d8bef9SDimitry Andric // For GFX9.
4920b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
4930b57cec5SDimitry Andric .addReg(FlatScrInitLo)
4940b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg);
495349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
496349cc55cSDimitry Andric AMDGPU::FLAT_SCR_HI)
4970b57cec5SDimitry Andric .addReg(FlatScrInitHi)
4980b57cec5SDimitry Andric .addImm(0);
499349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
5000b57cec5SDimitry Andric
5010b57cec5SDimitry Andric return;
5020b57cec5SDimitry Andric }
5030b57cec5SDimitry Andric
504e8d8bef9SDimitry Andric assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
5050b57cec5SDimitry Andric
5060b57cec5SDimitry Andric // Copy the size in bytes.
5070b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
5080b57cec5SDimitry Andric .addReg(FlatScrInitHi, RegState::Kill);
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric // Add wave offset in bytes to private base offset.
5110b57cec5SDimitry Andric // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
512fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
5130b57cec5SDimitry Andric .addReg(FlatScrInitLo)
5140b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg);
5150b57cec5SDimitry Andric
5160b57cec5SDimitry Andric // Convert offset to 256-byte units.
517349cc55cSDimitry Andric auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
518349cc55cSDimitry Andric AMDGPU::FLAT_SCR_HI)
5190b57cec5SDimitry Andric .addReg(FlatScrInitLo, RegState::Kill)
5200b57cec5SDimitry Andric .addImm(8);
521bdd1243dSDimitry Andric LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric
524e8d8bef9SDimitry Andric // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
525e8d8bef9SDimitry Andric // memory. They should have been removed by now.
allStackObjectsAreDead(const MachineFrameInfo & MFI)526e8d8bef9SDimitry Andric static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
527e8d8bef9SDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
528e8d8bef9SDimitry Andric I != E; ++I) {
529e8d8bef9SDimitry Andric if (!MFI.isDeadObjectIndex(I))
530e8d8bef9SDimitry Andric return false;
531e8d8bef9SDimitry Andric }
532e8d8bef9SDimitry Andric
533e8d8bef9SDimitry Andric return true;
534e8d8bef9SDimitry Andric }
535e8d8bef9SDimitry Andric
5365ffd83dbSDimitry Andric // Shift down registers reserved for the scratch RSRC.
getEntryFunctionReservedScratchRsrcReg(MachineFunction & MF) const5375ffd83dbSDimitry Andric Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
5380b57cec5SDimitry Andric MachineFunction &MF) const {
5390b57cec5SDimitry Andric
5405ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5415ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
5425ffd83dbSDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
5435ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
5445ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
5455ffd83dbSDimitry Andric
5465ffd83dbSDimitry Andric assert(MFI->isEntryFunction());
5475ffd83dbSDimitry Andric
5485ffd83dbSDimitry Andric Register ScratchRsrcReg = MFI->getScratchRSrcReg();
5495ffd83dbSDimitry Andric
550e8d8bef9SDimitry Andric if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
551e8d8bef9SDimitry Andric allStackObjectsAreDead(MF.getFrameInfo())))
5525ffd83dbSDimitry Andric return Register();
5530b57cec5SDimitry Andric
5540b57cec5SDimitry Andric if (ST.hasSGPRInitBug() ||
5550b57cec5SDimitry Andric ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
5560b57cec5SDimitry Andric return ScratchRsrcReg;
5570b57cec5SDimitry Andric
5580b57cec5SDimitry Andric // We reserved the last registers for this. Shift it down to the end of those
5590b57cec5SDimitry Andric // which were actually used.
5600b57cec5SDimitry Andric //
5610b57cec5SDimitry Andric // FIXME: It might be safer to use a pseudoregister before replacement.
5620b57cec5SDimitry Andric
5630b57cec5SDimitry Andric // FIXME: We should be able to eliminate unused input registers. We only
5640b57cec5SDimitry Andric // cannot do this for the resources required for scratch access. For now we
5650b57cec5SDimitry Andric // skip over user SGPRs and may leave unused holes.
5660b57cec5SDimitry Andric
5670b57cec5SDimitry Andric unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
5685ffd83dbSDimitry Andric ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
5690b57cec5SDimitry Andric AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
5700b57cec5SDimitry Andric
5710b57cec5SDimitry Andric // Skip the last N reserved elements because they should have already been
5720b57cec5SDimitry Andric // reserved for VCC etc.
5735ffd83dbSDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
5740b57cec5SDimitry Andric for (MCPhysReg Reg : AllSGPR128s) {
5750b57cec5SDimitry Andric // Pick the first unallocated one. Make sure we don't clobber the other
5765ffd83dbSDimitry Andric // reserved input we needed. Also for PAL, make sure we don't clobber
5775ffd83dbSDimitry Andric // the GIT pointer passed in SGPR0 or SGPR8.
5785ffd83dbSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
57906c3fb27SDimitry Andric (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
5800b57cec5SDimitry Andric MRI.replaceRegWith(ScratchRsrcReg, Reg);
5810b57cec5SDimitry Andric MFI->setScratchRSrcReg(Reg);
582*0fca6ea1SDimitry Andric MRI.reserveReg(Reg, TRI);
5830b57cec5SDimitry Andric return Reg;
5840b57cec5SDimitry Andric }
5850b57cec5SDimitry Andric }
5860b57cec5SDimitry Andric
5870b57cec5SDimitry Andric return ScratchRsrcReg;
5880b57cec5SDimitry Andric }
5890b57cec5SDimitry Andric
getScratchScaleFactor(const GCNSubtarget & ST)590e8d8bef9SDimitry Andric static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
591e8d8bef9SDimitry Andric return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
592e8d8bef9SDimitry Andric }
593e8d8bef9SDimitry Andric
emitEntryFunctionPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const5940b57cec5SDimitry Andric void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
5950b57cec5SDimitry Andric MachineBasicBlock &MBB) const {
5960b57cec5SDimitry Andric assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
5970b57cec5SDimitry Andric
5985ffd83dbSDimitry Andric // FIXME: If we only have SGPR spills, we won't actually be using scratch
5995ffd83dbSDimitry Andric // memory since these spill to VGPRs. We should be cleaning up these unused
6005ffd83dbSDimitry Andric // SGPR spill frame indices somewhere.
6010b57cec5SDimitry Andric
6020b57cec5SDimitry Andric // FIXME: We still have implicit uses on SGPR spill instructions in case they
6030b57cec5SDimitry Andric // need to spill to vector memory. It's likely that will not happen, but at
6040b57cec5SDimitry Andric // this point it appears we need the setup. This part of the prolog should be
6050b57cec5SDimitry Andric // emitted after frame indices are eliminated.
6060b57cec5SDimitry Andric
6075ffd83dbSDimitry Andric // FIXME: Remove all of the isPhysRegUsed checks
6080b57cec5SDimitry Andric
6095ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
6105ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
6115ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
6125ffd83dbSDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
6135ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
6145ffd83dbSDimitry Andric const Function &F = MF.getFunction();
615fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
6160b57cec5SDimitry Andric
6175ffd83dbSDimitry Andric assert(MFI->isEntryFunction());
6180b57cec5SDimitry Andric
6198bcb0991SDimitry Andric Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
6200b57cec5SDimitry Andric AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
6210b57cec5SDimitry Andric
6225ffd83dbSDimitry Andric // We need to do the replacement of the private segment buffer register even
6235ffd83dbSDimitry Andric // if there are no stack objects. There could be stores to undef or a
6245ffd83dbSDimitry Andric // constant without an associated object.
6255ffd83dbSDimitry Andric //
6265ffd83dbSDimitry Andric // This will return `Register()` in cases where there are no actual
6275ffd83dbSDimitry Andric // uses of the SRSRC.
628e8d8bef9SDimitry Andric Register ScratchRsrcReg;
629e8d8bef9SDimitry Andric if (!ST.enableFlatScratch())
630e8d8bef9SDimitry Andric ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
6310b57cec5SDimitry Andric
6325ffd83dbSDimitry Andric // Make the selected register live throughout the function.
6335ffd83dbSDimitry Andric if (ScratchRsrcReg) {
6340b57cec5SDimitry Andric for (MachineBasicBlock &OtherBB : MF) {
6355ffd83dbSDimitry Andric if (&OtherBB != &MBB) {
6360b57cec5SDimitry Andric OtherBB.addLiveIn(ScratchRsrcReg);
6370b57cec5SDimitry Andric }
6385ffd83dbSDimitry Andric }
6395ffd83dbSDimitry Andric }
6400b57cec5SDimitry Andric
6415ffd83dbSDimitry Andric // Now that we have fixed the reserved SRSRC we need to locate the
6425ffd83dbSDimitry Andric // (potentially) preloaded SRSRC.
6435ffd83dbSDimitry Andric Register PreloadedScratchRsrcReg;
6445ffd83dbSDimitry Andric if (ST.isAmdHsaOrMesa(F)) {
6455ffd83dbSDimitry Andric PreloadedScratchRsrcReg =
6465ffd83dbSDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
6475ffd83dbSDimitry Andric if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
6485ffd83dbSDimitry Andric // We added live-ins during argument lowering, but since they were not
6495ffd83dbSDimitry Andric // used they were deleted. We're adding the uses now, so add them back.
6505ffd83dbSDimitry Andric MRI.addLiveIn(PreloadedScratchRsrcReg);
6515ffd83dbSDimitry Andric MBB.addLiveIn(PreloadedScratchRsrcReg);
6525ffd83dbSDimitry Andric }
6535ffd83dbSDimitry Andric }
6545ffd83dbSDimitry Andric
6555ffd83dbSDimitry Andric // Debug location must be unknown since the first debug location is used to
6565ffd83dbSDimitry Andric // determine the end of the prologue.
6570b57cec5SDimitry Andric DebugLoc DL;
6580b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.begin();
6590b57cec5SDimitry Andric
6605ffd83dbSDimitry Andric // We found the SRSRC first because it needs four registers and has an
6615ffd83dbSDimitry Andric // alignment requirement. If the SRSRC that we found is clobbering with
6625ffd83dbSDimitry Andric // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
6635ffd83dbSDimitry Andric // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
6645ffd83dbSDimitry Andric // wave offset to a free SGPR.
6655ffd83dbSDimitry Andric Register ScratchWaveOffsetReg;
666349cc55cSDimitry Andric if (PreloadedScratchWaveOffsetReg &&
667349cc55cSDimitry Andric TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
6685ffd83dbSDimitry Andric ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
6695ffd83dbSDimitry Andric unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
6705ffd83dbSDimitry Andric AllSGPRs = AllSGPRs.slice(
6715ffd83dbSDimitry Andric std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
6725ffd83dbSDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
6735ffd83dbSDimitry Andric for (MCPhysReg Reg : AllSGPRs) {
6745ffd83dbSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
6755ffd83dbSDimitry Andric !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
6765ffd83dbSDimitry Andric ScratchWaveOffsetReg = Reg;
6770b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
6785ffd83dbSDimitry Andric .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
6795ffd83dbSDimitry Andric break;
6800b57cec5SDimitry Andric }
6810b57cec5SDimitry Andric }
682*0fca6ea1SDimitry Andric
683*0fca6ea1SDimitry Andric // FIXME: We can spill incoming arguments and restore at the end of the
684*0fca6ea1SDimitry Andric // prolog.
685*0fca6ea1SDimitry Andric if (!ScratchWaveOffsetReg)
686*0fca6ea1SDimitry Andric report_fatal_error(
687*0fca6ea1SDimitry Andric "could not find temporary scratch offset register in prolog");
6880b57cec5SDimitry Andric } else {
6895ffd83dbSDimitry Andric ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
6900b57cec5SDimitry Andric }
691349cc55cSDimitry Andric assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
6925ffd83dbSDimitry Andric
693*0fca6ea1SDimitry Andric if (hasFP(MF)) {
694*0fca6ea1SDimitry Andric Register FPReg = MFI->getFrameOffsetReg();
695*0fca6ea1SDimitry Andric assert(FPReg != AMDGPU::FP_REG);
696*0fca6ea1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
697*0fca6ea1SDimitry Andric }
698*0fca6ea1SDimitry Andric
699e8d8bef9SDimitry Andric if (requiresStackPointerReference(MF)) {
7005ffd83dbSDimitry Andric Register SPReg = MFI->getStackPtrOffsetReg();
7015ffd83dbSDimitry Andric assert(SPReg != AMDGPU::SP_REG);
7025ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
703fe6060f1SDimitry Andric .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
7045ffd83dbSDimitry Andric }
7055ffd83dbSDimitry Andric
706fe6060f1SDimitry Andric bool NeedsFlatScratchInit =
7075f757f3fSDimitry Andric MFI->getUserSGPRInfo().hasFlatScratchInit() &&
708fe6060f1SDimitry Andric (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
709fe6060f1SDimitry Andric (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
710fe6060f1SDimitry Andric
711fe6060f1SDimitry Andric if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
712349cc55cSDimitry Andric PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
7135ffd83dbSDimitry Andric MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
7145ffd83dbSDimitry Andric MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
7155ffd83dbSDimitry Andric }
7165ffd83dbSDimitry Andric
717fe6060f1SDimitry Andric if (NeedsFlatScratchInit) {
7185ffd83dbSDimitry Andric emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
7195ffd83dbSDimitry Andric }
7205ffd83dbSDimitry Andric
7215ffd83dbSDimitry Andric if (ScratchRsrcReg) {
7225ffd83dbSDimitry Andric emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
7235ffd83dbSDimitry Andric PreloadedScratchRsrcReg,
7245ffd83dbSDimitry Andric ScratchRsrcReg, ScratchWaveOffsetReg);
7250b57cec5SDimitry Andric }
7260b57cec5SDimitry Andric }
7270b57cec5SDimitry Andric
7285ffd83dbSDimitry Andric // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
emitEntryFunctionScratchRsrcRegSetup(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register PreloadedScratchRsrcReg,Register ScratchRsrcReg,Register ScratchWaveOffsetReg) const7295ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
7305ffd83dbSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
7315ffd83dbSDimitry Andric const DebugLoc &DL, Register PreloadedScratchRsrcReg,
7325ffd83dbSDimitry Andric Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
7330b57cec5SDimitry Andric
7345ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
7350b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
7360b57cec5SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
7375ffd83dbSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
7380b57cec5SDimitry Andric const Function &Fn = MF.getFunction();
7390b57cec5SDimitry Andric
7400b57cec5SDimitry Andric if (ST.isAmdPalOS()) {
7410b57cec5SDimitry Andric // The pointer to the GIT is formed from the offset passed in and either
7420b57cec5SDimitry Andric // the amdgpu-git-ptr-high function attribute or the top part of the PC
7438bcb0991SDimitry Andric Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
744fe6060f1SDimitry Andric Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7450b57cec5SDimitry Andric
746e8d8bef9SDimitry Andric buildGitPtr(MBB, I, DL, TII, Rsrc01);
7470b57cec5SDimitry Andric
7480b57cec5SDimitry Andric // We now have the GIT ptr - now get the scratch descriptor from the entry
7490b57cec5SDimitry Andric // at offset 0 (or offset 16 for a compute shader).
750480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7510b57cec5SDimitry Andric const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
7520b57cec5SDimitry Andric auto MMO = MF.getMachineMemOperand(PtrInfo,
7530b57cec5SDimitry Andric MachineMemOperand::MOLoad |
7540b57cec5SDimitry Andric MachineMemOperand::MOInvariant |
7550b57cec5SDimitry Andric MachineMemOperand::MODereferenceable,
7565ffd83dbSDimitry Andric 16, Align(4));
7570b57cec5SDimitry Andric unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
7580b57cec5SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
7595ffd83dbSDimitry Andric unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
7600b57cec5SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
7610b57cec5SDimitry Andric .addReg(Rsrc01)
7620b57cec5SDimitry Andric .addImm(EncodedOffset) // offset
763fe6060f1SDimitry Andric .addImm(0) // cpol
7640b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
7650b57cec5SDimitry Andric .addMemOperand(MMO);
766fe6060f1SDimitry Andric
767fe6060f1SDimitry Andric // The driver will always set the SRD for wave 64 (bits 118:117 of
768fe6060f1SDimitry Andric // descriptor / bits 22:21 of third sub-reg will be 0b11)
769fe6060f1SDimitry Andric // If the shader is actually wave32 we have to modify the const_index_stride
770fe6060f1SDimitry Andric // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
771fe6060f1SDimitry Andric // reason the driver does this is that there can be cases where it presents
772fe6060f1SDimitry Andric // 2 shaders with different wave size (e.g. VsFs).
773fe6060f1SDimitry Andric // TODO: convert to using SCRATCH instructions or multiple SRD buffers
774fe6060f1SDimitry Andric if (ST.isWave32()) {
775fe6060f1SDimitry Andric const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
776fe6060f1SDimitry Andric BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
777fe6060f1SDimitry Andric .addImm(21)
778fe6060f1SDimitry Andric .addReg(Rsrc03);
779fe6060f1SDimitry Andric }
7805ffd83dbSDimitry Andric } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
7810b57cec5SDimitry Andric assert(!ST.isAmdHsaOrMesa(Fn));
7820b57cec5SDimitry Andric const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
7830b57cec5SDimitry Andric
7848bcb0991SDimitry Andric Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
7858bcb0991SDimitry Andric Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7860b57cec5SDimitry Andric
7870b57cec5SDimitry Andric // Use relocations to get the pointer, and setup the other bits manually.
7880b57cec5SDimitry Andric uint64_t Rsrc23 = TII->getScratchRsrcWords23();
7890b57cec5SDimitry Andric
7905f757f3fSDimitry Andric if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {
7918bcb0991SDimitry Andric Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
7920b57cec5SDimitry Andric
7930b57cec5SDimitry Andric if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
7940b57cec5SDimitry Andric const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
7950b57cec5SDimitry Andric
7960b57cec5SDimitry Andric BuildMI(MBB, I, DL, Mov64, Rsrc01)
7970b57cec5SDimitry Andric .addReg(MFI->getImplicitBufferPtrUserSGPR())
7980b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
7990b57cec5SDimitry Andric } else {
8000b57cec5SDimitry Andric const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
8010b57cec5SDimitry Andric
802480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
8035ffd83dbSDimitry Andric auto MMO = MF.getMachineMemOperand(
8045ffd83dbSDimitry Andric PtrInfo,
8055ffd83dbSDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
8060b57cec5SDimitry Andric MachineMemOperand::MODereferenceable,
8075ffd83dbSDimitry Andric 8, Align(4));
8080b57cec5SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
8090b57cec5SDimitry Andric .addReg(MFI->getImplicitBufferPtrUserSGPR())
8100b57cec5SDimitry Andric .addImm(0) // offset
811fe6060f1SDimitry Andric .addImm(0) // cpol
8120b57cec5SDimitry Andric .addMemOperand(MMO)
8130b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8140b57cec5SDimitry Andric
8150b57cec5SDimitry Andric MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8160b57cec5SDimitry Andric MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric } else {
8198bcb0991SDimitry Andric Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8208bcb0991SDimitry Andric Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8210b57cec5SDimitry Andric
8220b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc0)
8230b57cec5SDimitry Andric .addExternalSymbol("SCRATCH_RSRC_DWORD0")
8240b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8250b57cec5SDimitry Andric
8260b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc1)
8270b57cec5SDimitry Andric .addExternalSymbol("SCRATCH_RSRC_DWORD1")
8280b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8290b57cec5SDimitry Andric }
8300b57cec5SDimitry Andric
8310b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc2)
8320b57cec5SDimitry Andric .addImm(Rsrc23 & 0xffffffff)
8330b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8340b57cec5SDimitry Andric
8350b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc3)
8360b57cec5SDimitry Andric .addImm(Rsrc23 >> 32)
8370b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8385ffd83dbSDimitry Andric } else if (ST.isAmdHsaOrMesa(Fn)) {
8395ffd83dbSDimitry Andric assert(PreloadedScratchRsrcReg);
8405ffd83dbSDimitry Andric
8415ffd83dbSDimitry Andric if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
8425ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
8435ffd83dbSDimitry Andric .addReg(PreloadedScratchRsrcReg, RegState::Kill);
8440b57cec5SDimitry Andric }
8450b57cec5SDimitry Andric }
8460b57cec5SDimitry Andric
8475ffd83dbSDimitry Andric // Add the scratch wave offset into the scratch RSRC.
8485ffd83dbSDimitry Andric //
8495ffd83dbSDimitry Andric // We only want to update the first 48 bits, which is the base address
8505ffd83dbSDimitry Andric // pointer, without touching the adjacent 16 bits of flags. We know this add
8515ffd83dbSDimitry Andric // cannot carry-out from bit 47, otherwise the scratch allocation would be
8525ffd83dbSDimitry Andric // impossible to fit in the 48-bit global address space.
8535ffd83dbSDimitry Andric //
8545ffd83dbSDimitry Andric // TODO: Evaluate if it is better to just construct an SRD using the flat
8555ffd83dbSDimitry Andric // scratch init and some constants rather than update the one we are passed.
8565ffd83dbSDimitry Andric Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8575ffd83dbSDimitry Andric Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8585ffd83dbSDimitry Andric
8595ffd83dbSDimitry Andric // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
8605ffd83dbSDimitry Andric // the kernel body via inreg arguments.
8615ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
8625ffd83dbSDimitry Andric .addReg(ScratchRsrcSub0)
8635ffd83dbSDimitry Andric .addReg(ScratchWaveOffsetReg)
8645ffd83dbSDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
865349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
8665ffd83dbSDimitry Andric .addReg(ScratchRsrcSub1)
8675ffd83dbSDimitry Andric .addImm(0)
8685ffd83dbSDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
869349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
8705ffd83dbSDimitry Andric }
8715ffd83dbSDimitry Andric
isSupportedStackID(TargetStackID::Value ID) const8720b57cec5SDimitry Andric bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
8730b57cec5SDimitry Andric switch (ID) {
8740b57cec5SDimitry Andric case TargetStackID::Default:
8750b57cec5SDimitry Andric case TargetStackID::NoAlloc:
8760b57cec5SDimitry Andric case TargetStackID::SGPRSpill:
8770b57cec5SDimitry Andric return true;
878e8d8bef9SDimitry Andric case TargetStackID::ScalableVector:
879fe6060f1SDimitry Andric case TargetStackID::WasmLocal:
8808bcb0991SDimitry Andric return false;
8810b57cec5SDimitry Andric }
8820b57cec5SDimitry Andric llvm_unreachable("Invalid TargetStackID::Value");
8830b57cec5SDimitry Andric }
8840b57cec5SDimitry Andric
885bdd1243dSDimitry Andric // Activate only the inactive lanes when \p EnableInactiveLanes is true.
886bdd1243dSDimitry Andric // Otherwise, activate all lanes. It returns the saved exec.
buildScratchExecCopy(LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,bool IsProlog,bool EnableInactiveLanes)8875f757f3fSDimitry Andric static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
8885ffd83dbSDimitry Andric MachineFunction &MF,
8895ffd83dbSDimitry Andric MachineBasicBlock &MBB,
8905ffd83dbSDimitry Andric MachineBasicBlock::iterator MBBI,
891bdd1243dSDimitry Andric const DebugLoc &DL, bool IsProlog,
892bdd1243dSDimitry Andric bool EnableInactiveLanes) {
8935ffd83dbSDimitry Andric Register ScratchExecCopy;
8945ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
8955ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
8965ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
8975ffd83dbSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
8985ffd83dbSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
8995ffd83dbSDimitry Andric
9005f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
9015ffd83dbSDimitry Andric
9025ffd83dbSDimitry Andric ScratchExecCopy = findScratchNonCalleeSaveRegister(
9035f757f3fSDimitry Andric MRI, LiveUnits, *TRI.getWaveMaskRegClass());
904fe6060f1SDimitry Andric if (!ScratchExecCopy)
905fe6060f1SDimitry Andric report_fatal_error("failed to find free scratch register");
9065ffd83dbSDimitry Andric
9075f757f3fSDimitry Andric LiveUnits.addReg(ScratchExecCopy);
9085ffd83dbSDimitry Andric
909bdd1243dSDimitry Andric const unsigned SaveExecOpc =
910bdd1243dSDimitry Andric ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
911bdd1243dSDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B32)
912bdd1243dSDimitry Andric : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
913bdd1243dSDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64);
914bdd1243dSDimitry Andric auto SaveExec =
915bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
916349cc55cSDimitry Andric SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
9175ffd83dbSDimitry Andric
9185ffd83dbSDimitry Andric return ScratchExecCopy;
9195ffd83dbSDimitry Andric }
9205ffd83dbSDimitry Andric
emitCSRSpillStores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,LiveRegUnits & LiveUnits,Register FrameReg,Register FramePtrRegScratchCopy) const921bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillStores(
922bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB,
9235f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
924bdd1243dSDimitry Andric Register FrameReg, Register FramePtrRegScratchCopy) const {
925bdd1243dSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
926bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
927bdd1243dSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
928bdd1243dSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
929bdd1243dSDimitry Andric
930bdd1243dSDimitry Andric // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
931bdd1243dSDimitry Andric // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
932bdd1243dSDimitry Andric // might end up flipping the EXEC bits twice.
933bdd1243dSDimitry Andric Register ScratchExecCopy;
934bdd1243dSDimitry Andric SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
935bdd1243dSDimitry Andric FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
936bdd1243dSDimitry Andric if (!WWMScratchRegs.empty())
937bdd1243dSDimitry Andric ScratchExecCopy =
9385f757f3fSDimitry Andric buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
939bdd1243dSDimitry Andric /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
940bdd1243dSDimitry Andric
941bdd1243dSDimitry Andric auto StoreWWMRegisters =
942bdd1243dSDimitry Andric [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
943bdd1243dSDimitry Andric for (const auto &Reg : WWMRegs) {
944bdd1243dSDimitry Andric Register VGPR = Reg.first;
945bdd1243dSDimitry Andric int FI = Reg.second;
9465f757f3fSDimitry Andric buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
947bdd1243dSDimitry Andric VGPR, FI, FrameReg);
948bdd1243dSDimitry Andric }
949bdd1243dSDimitry Andric };
950bdd1243dSDimitry Andric
951bdd1243dSDimitry Andric StoreWWMRegisters(WWMScratchRegs);
952bdd1243dSDimitry Andric if (!WWMCalleeSavedRegs.empty()) {
953bdd1243dSDimitry Andric if (ScratchExecCopy) {
954bdd1243dSDimitry Andric unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
95506c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
956bdd1243dSDimitry Andric } else {
9575f757f3fSDimitry Andric ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
958bdd1243dSDimitry Andric /*IsProlog*/ true,
959bdd1243dSDimitry Andric /*EnableInactiveLanes*/ false);
960bdd1243dSDimitry Andric }
961bdd1243dSDimitry Andric }
962bdd1243dSDimitry Andric
963bdd1243dSDimitry Andric StoreWWMRegisters(WWMCalleeSavedRegs);
964bdd1243dSDimitry Andric if (ScratchExecCopy) {
965bdd1243dSDimitry Andric // FIXME: Split block and make terminator.
966bdd1243dSDimitry Andric unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
96706c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
968bdd1243dSDimitry Andric .addReg(ScratchExecCopy, RegState::Kill);
9695f757f3fSDimitry Andric LiveUnits.addReg(ScratchExecCopy);
970bdd1243dSDimitry Andric }
971bdd1243dSDimitry Andric
972bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
973bdd1243dSDimitry Andric
974bdd1243dSDimitry Andric for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
975bdd1243dSDimitry Andric // Special handle FP spill:
976bdd1243dSDimitry Andric // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
977bdd1243dSDimitry Andric // Otherwise, FP has been moved to a temporary register and spill it
978bdd1243dSDimitry Andric // instead.
979bdd1243dSDimitry Andric Register Reg =
980bdd1243dSDimitry Andric Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
981bdd1243dSDimitry Andric if (!Reg)
982bdd1243dSDimitry Andric continue;
983bdd1243dSDimitry Andric
984bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
9855f757f3fSDimitry Andric LiveUnits, FrameReg);
986bdd1243dSDimitry Andric SB.save();
987bdd1243dSDimitry Andric }
988bdd1243dSDimitry Andric
989bdd1243dSDimitry Andric // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
990bdd1243dSDimitry Andric // such scratch registers live throughout the function.
991bdd1243dSDimitry Andric SmallVector<Register, 1> ScratchSGPRs;
992bdd1243dSDimitry Andric FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
993bdd1243dSDimitry Andric if (!ScratchSGPRs.empty()) {
994bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) {
995bdd1243dSDimitry Andric for (MCPhysReg Reg : ScratchSGPRs)
996bdd1243dSDimitry Andric MBB.addLiveIn(Reg);
997bdd1243dSDimitry Andric
998bdd1243dSDimitry Andric MBB.sortUniqueLiveIns();
999bdd1243dSDimitry Andric }
10005f757f3fSDimitry Andric if (!LiveUnits.empty()) {
1001bdd1243dSDimitry Andric for (MCPhysReg Reg : ScratchSGPRs)
10025f757f3fSDimitry Andric LiveUnits.addReg(Reg);
1003bdd1243dSDimitry Andric }
1004bdd1243dSDimitry Andric }
1005bdd1243dSDimitry Andric }
1006bdd1243dSDimitry Andric
emitCSRSpillRestores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,LiveRegUnits & LiveUnits,Register FrameReg,Register FramePtrRegScratchCopy) const1007bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillRestores(
1008bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB,
10095f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
1010bdd1243dSDimitry Andric Register FrameReg, Register FramePtrRegScratchCopy) const {
1011bdd1243dSDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1012bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1013bdd1243dSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
1014bdd1243dSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
1015bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1016bdd1243dSDimitry Andric
1017bdd1243dSDimitry Andric for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1018bdd1243dSDimitry Andric // Special handle FP restore:
1019bdd1243dSDimitry Andric // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1020bdd1243dSDimitry Andric // the FP value to a temporary register. The frame pointer should be
1021bdd1243dSDimitry Andric // overwritten only at the end when all other spills are restored from
1022bdd1243dSDimitry Andric // current frame.
1023bdd1243dSDimitry Andric Register Reg =
1024bdd1243dSDimitry Andric Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1025bdd1243dSDimitry Andric if (!Reg)
1026bdd1243dSDimitry Andric continue;
1027bdd1243dSDimitry Andric
1028bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
10295f757f3fSDimitry Andric LiveUnits, FrameReg);
1030bdd1243dSDimitry Andric SB.restore();
1031bdd1243dSDimitry Andric }
1032bdd1243dSDimitry Andric
1033bdd1243dSDimitry Andric // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1034bdd1243dSDimitry Andric // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1035bdd1243dSDimitry Andric // this, we might end up flipping the EXEC bits twice.
1036bdd1243dSDimitry Andric Register ScratchExecCopy;
1037bdd1243dSDimitry Andric SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1038bdd1243dSDimitry Andric FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1039bdd1243dSDimitry Andric if (!WWMScratchRegs.empty())
1040bdd1243dSDimitry Andric ScratchExecCopy =
10415f757f3fSDimitry Andric buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1042bdd1243dSDimitry Andric /*IsProlog*/ false, /*EnableInactiveLanes*/ true);
1043bdd1243dSDimitry Andric
1044bdd1243dSDimitry Andric auto RestoreWWMRegisters =
1045bdd1243dSDimitry Andric [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
1046bdd1243dSDimitry Andric for (const auto &Reg : WWMRegs) {
1047bdd1243dSDimitry Andric Register VGPR = Reg.first;
1048bdd1243dSDimitry Andric int FI = Reg.second;
10495f757f3fSDimitry Andric buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1050bdd1243dSDimitry Andric VGPR, FI, FrameReg);
1051bdd1243dSDimitry Andric }
1052bdd1243dSDimitry Andric };
1053bdd1243dSDimitry Andric
1054bdd1243dSDimitry Andric RestoreWWMRegisters(WWMScratchRegs);
1055bdd1243dSDimitry Andric if (!WWMCalleeSavedRegs.empty()) {
1056bdd1243dSDimitry Andric if (ScratchExecCopy) {
1057bdd1243dSDimitry Andric unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
105806c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1059bdd1243dSDimitry Andric } else {
10605f757f3fSDimitry Andric ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1061bdd1243dSDimitry Andric /*IsProlog*/ false,
1062bdd1243dSDimitry Andric /*EnableInactiveLanes*/ false);
1063bdd1243dSDimitry Andric }
1064bdd1243dSDimitry Andric }
1065bdd1243dSDimitry Andric
1066bdd1243dSDimitry Andric RestoreWWMRegisters(WWMCalleeSavedRegs);
1067bdd1243dSDimitry Andric if (ScratchExecCopy) {
1068bdd1243dSDimitry Andric // FIXME: Split block and make terminator.
1069bdd1243dSDimitry Andric unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
107006c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1071bdd1243dSDimitry Andric .addReg(ScratchExecCopy, RegState::Kill);
1072bdd1243dSDimitry Andric }
1073fe6060f1SDimitry Andric }
1074fe6060f1SDimitry Andric
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const10750b57cec5SDimitry Andric void SIFrameLowering::emitPrologue(MachineFunction &MF,
10760b57cec5SDimitry Andric MachineBasicBlock &MBB) const {
10770b57cec5SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
10780b57cec5SDimitry Andric if (FuncInfo->isEntryFunction()) {
10790b57cec5SDimitry Andric emitEntryFunctionPrologue(MF, MBB);
10800b57cec5SDimitry Andric return;
10810b57cec5SDimitry Andric }
10820b57cec5SDimitry Andric
108381ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
10840b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
10850b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
10860b57cec5SDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
1087bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
10880b57cec5SDimitry Andric
10895ffd83dbSDimitry Andric Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
10905ffd83dbSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
10915ffd83dbSDimitry Andric Register BasePtrReg =
10925ffd83dbSDimitry Andric TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
10935f757f3fSDimitry Andric LiveRegUnits LiveUnits;
10940b57cec5SDimitry Andric
10950b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.begin();
1096bdd1243dSDimitry Andric // DebugLoc must be unknown since the first instruction with DebugLoc is used
1097bdd1243dSDimitry Andric // to determine the end of the prologue.
10980b57cec5SDimitry Andric DebugLoc DL;
10990b57cec5SDimitry Andric
11005f757f3fSDimitry Andric if (FuncInfo->isChainFunction()) {
11015f757f3fSDimitry Andric // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
11025f757f3fSDimitry Andric // are free to set one up if they need it.
11035f757f3fSDimitry Andric bool UseSP = requiresStackPointerReference(MF);
11045f757f3fSDimitry Andric if (UseSP) {
11055f757f3fSDimitry Andric assert(StackPtrReg != AMDGPU::SP_REG);
11065f757f3fSDimitry Andric
11075f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
11085f757f3fSDimitry Andric .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));
11095f757f3fSDimitry Andric }
11105f757f3fSDimitry Andric }
11115f757f3fSDimitry Andric
11120b57cec5SDimitry Andric bool HasFP = false;
11135ffd83dbSDimitry Andric bool HasBP = false;
11140b57cec5SDimitry Andric uint32_t NumBytes = MFI.getStackSize();
11150b57cec5SDimitry Andric uint32_t RoundedSize = NumBytes;
11165ffd83dbSDimitry Andric
1117bdd1243dSDimitry Andric if (TRI.hasStackRealignment(MF))
1118bdd1243dSDimitry Andric HasFP = true;
1119fe6060f1SDimitry Andric
1120bdd1243dSDimitry Andric Register FramePtrRegScratchCopy;
1121bdd1243dSDimitry Andric if (!HasFP && !hasFP(MF)) {
1122bdd1243dSDimitry Andric // Emit the CSR spill stores with SP base register.
11235f757f3fSDimitry Andric emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
11245f757f3fSDimitry Andric FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1125bdd1243dSDimitry Andric FramePtrRegScratchCopy);
1126bdd1243dSDimitry Andric } else {
1127bdd1243dSDimitry Andric // CSR spill stores will use FP as base register.
1128bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy =
1129bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1130fe6060f1SDimitry Andric
11315f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1132bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) {
1133bdd1243dSDimitry Andric // Copy FP to the scratch register now and emit the CFI entry. It avoids
1134bdd1243dSDimitry Andric // the extra FP copy needed in the other two cases when FP is spilled to
1135bdd1243dSDimitry Andric // memory or to a VGPR lane.
1136bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(
1137bdd1243dSDimitry Andric FramePtrReg,
1138bdd1243dSDimitry Andric FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
11395f757f3fSDimitry Andric DL, TII, TRI, LiveUnits, FramePtrReg);
1140bdd1243dSDimitry Andric SB.save();
11415f757f3fSDimitry Andric LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1142bdd1243dSDimitry Andric } else {
1143bdd1243dSDimitry Andric // Copy FP into a new scratch register so that its previous value can be
1144bdd1243dSDimitry Andric // spilled after setting up the new frame.
1145bdd1243dSDimitry Andric FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
11465f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1147bdd1243dSDimitry Andric if (!FramePtrRegScratchCopy)
1148fe6060f1SDimitry Andric report_fatal_error("failed to find free scratch register");
1149fe6060f1SDimitry Andric
11505f757f3fSDimitry Andric LiveUnits.addReg(FramePtrRegScratchCopy);
1151bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1152bdd1243dSDimitry Andric .addReg(FramePtrReg);
1153fe6060f1SDimitry Andric }
11545ffd83dbSDimitry Andric }
11555ffd83dbSDimitry Andric
1156bdd1243dSDimitry Andric if (HasFP) {
11575ffd83dbSDimitry Andric const unsigned Alignment = MFI.getMaxAlign().value();
11580b57cec5SDimitry Andric
11590b57cec5SDimitry Andric RoundedSize += Alignment;
11605f757f3fSDimitry Andric if (LiveUnits.empty()) {
11615f757f3fSDimitry Andric LiveUnits.init(TRI);
11625f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB);
11630b57cec5SDimitry Andric }
11640b57cec5SDimitry Andric
1165fe6060f1SDimitry Andric // s_add_i32 s33, s32, NumBytes
1166fe6060f1SDimitry Andric // s_and_b32 s33, s33, 0b111...0000
1167fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
11680b57cec5SDimitry Andric .addReg(StackPtrReg)
1169e8d8bef9SDimitry Andric .addImm((Alignment - 1) * getScratchScaleFactor(ST))
11700b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
1171349cc55cSDimitry Andric auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1172fe6060f1SDimitry Andric .addReg(FramePtrReg, RegState::Kill)
1173e8d8bef9SDimitry Andric .addImm(-Alignment * getScratchScaleFactor(ST))
11740b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
1175349cc55cSDimitry Andric And->getOperand(3).setIsDead(); // Mark SCC as dead.
11760b57cec5SDimitry Andric FuncInfo->setIsStackRealigned(true);
11770b57cec5SDimitry Andric } else if ((HasFP = hasFP(MF))) {
11785ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
11795ffd83dbSDimitry Andric .addReg(StackPtrReg)
11805ffd83dbSDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
11815ffd83dbSDimitry Andric }
11825ffd83dbSDimitry Andric
1183bdd1243dSDimitry Andric // If FP is used, emit the CSR spills with FP base register.
1184bdd1243dSDimitry Andric if (HasFP) {
11855f757f3fSDimitry Andric emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1186bdd1243dSDimitry Andric FramePtrRegScratchCopy);
1187bdd1243dSDimitry Andric if (FramePtrRegScratchCopy)
11885f757f3fSDimitry Andric LiveUnits.removeReg(FramePtrRegScratchCopy);
1189bdd1243dSDimitry Andric }
1190bdd1243dSDimitry Andric
11910b57cec5SDimitry Andric // If we need a base pointer, set it up here. It's whatever the value of
11920b57cec5SDimitry Andric // the stack pointer is at this point. Any variable size objects will be
11930b57cec5SDimitry Andric // allocated after this, so we can still use the base pointer to reference
11945ffd83dbSDimitry Andric // the incoming arguments.
11955ffd83dbSDimitry Andric if ((HasBP = TRI.hasBasePointer(MF))) {
11965ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
11970b57cec5SDimitry Andric .addReg(StackPtrReg)
11980b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
11990b57cec5SDimitry Andric }
12000b57cec5SDimitry Andric
12010b57cec5SDimitry Andric if (HasFP && RoundedSize != 0) {
1202349cc55cSDimitry Andric auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
12030b57cec5SDimitry Andric .addReg(StackPtrReg)
1204e8d8bef9SDimitry Andric .addImm(RoundedSize * getScratchScaleFactor(ST))
12050b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
1206349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12070b57cec5SDimitry Andric }
12080b57cec5SDimitry Andric
1209bdd1243dSDimitry Andric bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1210bdd1243dSDimitry Andric (void)FPSaved;
1211bdd1243dSDimitry Andric assert((!HasFP || FPSaved) &&
12120b57cec5SDimitry Andric "Needed to save FP but didn't save it anywhere");
12130b57cec5SDimitry Andric
1214349cc55cSDimitry Andric // If we allow spilling to AGPRs we may have saved FP but then spill
1215349cc55cSDimitry Andric // everything into AGPRs instead of the stack.
1216bdd1243dSDimitry Andric assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
12170b57cec5SDimitry Andric "Saved FP but didn't need it");
12185ffd83dbSDimitry Andric
1219bdd1243dSDimitry Andric bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1220bdd1243dSDimitry Andric (void)BPSaved;
1221bdd1243dSDimitry Andric assert((!HasBP || BPSaved) &&
12225ffd83dbSDimitry Andric "Needed to save BP but didn't save it anywhere");
12235ffd83dbSDimitry Andric
1224bdd1243dSDimitry Andric assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
12250b57cec5SDimitry Andric }
12260b57cec5SDimitry Andric
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const12270b57cec5SDimitry Andric void SIFrameLowering::emitEpilogue(MachineFunction &MF,
12280b57cec5SDimitry Andric MachineBasicBlock &MBB) const {
12290b57cec5SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
12300b57cec5SDimitry Andric if (FuncInfo->isEntryFunction())
12310b57cec5SDimitry Andric return;
12320b57cec5SDimitry Andric
12330b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
12340b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
12355ffd83dbSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
1236bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
12375f757f3fSDimitry Andric LiveRegUnits LiveUnits;
1238bdd1243dSDimitry Andric // Get the insert location for the epilogue. If there were no terminators in
1239bdd1243dSDimitry Andric // the block, get the last instruction.
1240bdd1243dSDimitry Andric MachineBasicBlock::iterator MBBI = MBB.end();
12410b57cec5SDimitry Andric DebugLoc DL;
1242bdd1243dSDimitry Andric if (!MBB.empty()) {
1243bdd1243dSDimitry Andric MBBI = MBB.getLastNonDebugInstr();
1244bdd1243dSDimitry Andric if (MBBI != MBB.end())
1245bdd1243dSDimitry Andric DL = MBBI->getDebugLoc();
1246bdd1243dSDimitry Andric
1247bdd1243dSDimitry Andric MBBI = MBB.getFirstTerminator();
1248bdd1243dSDimitry Andric }
12490b57cec5SDimitry Andric
12500b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
12510b57cec5SDimitry Andric uint32_t NumBytes = MFI.getStackSize();
12525ffd83dbSDimitry Andric uint32_t RoundedSize = FuncInfo->isStackRealigned()
12535ffd83dbSDimitry Andric ? NumBytes + MFI.getMaxAlign().value()
12545ffd83dbSDimitry Andric : NumBytes;
12555ffd83dbSDimitry Andric const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1256bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1257bdd1243dSDimitry Andric bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
12585ffd83dbSDimitry Andric
1259bdd1243dSDimitry Andric Register FramePtrRegScratchCopy;
1260bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy =
1261bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1262bdd1243dSDimitry Andric if (FPSaved) {
1263bdd1243dSDimitry Andric // CSR spill restores should use FP as base register. If
1264bdd1243dSDimitry Andric // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1265bdd1243dSDimitry Andric // into a new scratch register and copy to FP later when other registers are
1266bdd1243dSDimitry Andric // restored from the current stack frame.
12675f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1268bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) {
12695f757f3fSDimitry Andric LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1270bdd1243dSDimitry Andric } else {
1271bdd1243dSDimitry Andric FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
12725f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1273bdd1243dSDimitry Andric if (!FramePtrRegScratchCopy)
1274bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register");
1275bdd1243dSDimitry Andric
12765f757f3fSDimitry Andric LiveUnits.addReg(FramePtrRegScratchCopy);
1277bdd1243dSDimitry Andric }
1278bdd1243dSDimitry Andric
12795f757f3fSDimitry Andric emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1280bdd1243dSDimitry Andric FramePtrRegScratchCopy);
1281bdd1243dSDimitry Andric }
12820b57cec5SDimitry Andric
12830b57cec5SDimitry Andric if (RoundedSize != 0 && hasFP(MF)) {
1284349cc55cSDimitry Andric auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
12850b57cec5SDimitry Andric .addReg(StackPtrReg)
1286fe6060f1SDimitry Andric .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
12870b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy);
1288349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12890b57cec5SDimitry Andric }
12900b57cec5SDimitry Andric
1291bdd1243dSDimitry Andric if (FPSaved) {
1292bdd1243dSDimitry Andric // Insert the copy to restore FP.
1293bdd1243dSDimitry Andric Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1294bdd1243dSDimitry Andric : FramePtrRegScratchCopy;
1295bdd1243dSDimitry Andric MachineInstrBuilder MIB =
12965ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1297bdd1243dSDimitry Andric .addReg(SrcReg);
1298bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy)
1299bdd1243dSDimitry Andric MIB.setMIFlag(MachineInstr::FrameDestroy);
1300bdd1243dSDimitry Andric } else {
1301bdd1243dSDimitry Andric // Insert the CSR spill restores with SP as the base register.
13025f757f3fSDimitry Andric emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1303bdd1243dSDimitry Andric FramePtrRegScratchCopy);
13040b57cec5SDimitry Andric }
13050b57cec5SDimitry Andric }
13060b57cec5SDimitry Andric
13070b57cec5SDimitry Andric #ifndef NDEBUG
allSGPRSpillsAreDead(const MachineFunction & MF)1308e8d8bef9SDimitry Andric static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1309e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
1310e8d8bef9SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13110b57cec5SDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
13120b57cec5SDimitry Andric I != E; ++I) {
13130b57cec5SDimitry Andric if (!MFI.isDeadObjectIndex(I) &&
13140b57cec5SDimitry Andric MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1315bdd1243dSDimitry Andric !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
13160b57cec5SDimitry Andric return false;
13170b57cec5SDimitry Andric }
13180b57cec5SDimitry Andric }
13190b57cec5SDimitry Andric
13200b57cec5SDimitry Andric return true;
13210b57cec5SDimitry Andric }
13220b57cec5SDimitry Andric #endif
13230b57cec5SDimitry Andric
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1324e8d8bef9SDimitry Andric StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1325e8d8bef9SDimitry Andric int FI,
13265ffd83dbSDimitry Andric Register &FrameReg) const {
13270b57cec5SDimitry Andric const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
13280b57cec5SDimitry Andric
13290b57cec5SDimitry Andric FrameReg = RI->getFrameRegister(MF);
1330e8d8bef9SDimitry Andric return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
13310b57cec5SDimitry Andric }
13320b57cec5SDimitry Andric
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const13330b57cec5SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameFinalized(
13340b57cec5SDimitry Andric MachineFunction &MF,
13350b57cec5SDimitry Andric RegScavenger *RS) const {
13360b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
13370b57cec5SDimitry Andric
13380b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1339fe6060f1SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
13400b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
1341fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
13420b57cec5SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13430b57cec5SDimitry Andric
1344bdd1243dSDimitry Andric // Allocate spill slots for WWM reserved VGPRs.
13455f757f3fSDimitry Andric // For chain functions, we only need to do this if we have calls to
13465f757f3fSDimitry Andric // llvm.amdgcn.cs.chain.
13475f757f3fSDimitry Andric bool IsChainWithoutCalls =
13485f757f3fSDimitry Andric FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall();
13495f757f3fSDimitry Andric if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) {
1350bdd1243dSDimitry Andric for (Register Reg : FuncInfo->getWWMReservedRegs()) {
1351bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1352bdd1243dSDimitry Andric FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1353bdd1243dSDimitry Andric TRI->getSpillAlign(*RC));
1354bdd1243dSDimitry Andric }
135581ad6265SDimitry Andric }
135681ad6265SDimitry Andric
1357fe6060f1SDimitry Andric const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1358fe6060f1SDimitry Andric && EnableSpillVGPRToAGPR;
1359fe6060f1SDimitry Andric
1360fe6060f1SDimitry Andric if (SpillVGPRToAGPR) {
1361fe6060f1SDimitry Andric // To track the spill frame indices handled in this pass.
1362fe6060f1SDimitry Andric BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
13630eae32dcSDimitry Andric BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1364fe6060f1SDimitry Andric
1365fe6060f1SDimitry Andric bool SeenDbgInstr = false;
1366fe6060f1SDimitry Andric
1367fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
1368349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
13690eae32dcSDimitry Andric int FrameIndex;
1370fe6060f1SDimitry Andric if (MI.isDebugInstr())
1371fe6060f1SDimitry Andric SeenDbgInstr = true;
1372fe6060f1SDimitry Andric
1373fe6060f1SDimitry Andric if (TII->isVGPRSpill(MI)) {
1374fe6060f1SDimitry Andric // Try to eliminate stack used by VGPR spills before frame
1375fe6060f1SDimitry Andric // finalization.
1376fe6060f1SDimitry Andric unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1377fe6060f1SDimitry Andric AMDGPU::OpName::vaddr);
1378fe6060f1SDimitry Andric int FI = MI.getOperand(FIOp).getIndex();
1379fe6060f1SDimitry Andric Register VReg =
1380fe6060f1SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1381fe6060f1SDimitry Andric if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1382fe6060f1SDimitry Andric TRI->isAGPR(MRI, VReg))) {
138306c3fb27SDimitry Andric assert(RS != nullptr);
13845f757f3fSDimitry Andric RS->enterBasicBlockEnd(MBB);
13855f757f3fSDimitry Andric RS->backward(std::next(MI.getIterator()));
1386fe6060f1SDimitry Andric TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1387fe6060f1SDimitry Andric SpillFIs.set(FI);
1388fe6060f1SDimitry Andric continue;
1389fe6060f1SDimitry Andric }
13900eae32dcSDimitry Andric } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
13910eae32dcSDimitry Andric TII->isLoadFromStackSlot(MI, FrameIndex))
139204eeddc0SDimitry Andric if (!MFI.isFixedObjectIndex(FrameIndex))
13930eae32dcSDimitry Andric NonVGPRSpillFIs.set(FrameIndex);
1394fe6060f1SDimitry Andric }
1395fe6060f1SDimitry Andric }
13960eae32dcSDimitry Andric
139781ad6265SDimitry Andric // Stack slot coloring may assign different objects to the same stack slot.
13980eae32dcSDimitry Andric // If not, then the VGPR to AGPR spill slot is dead.
13990eae32dcSDimitry Andric for (unsigned FI : SpillFIs.set_bits())
14000eae32dcSDimitry Andric if (!NonVGPRSpillFIs.test(FI))
14010eae32dcSDimitry Andric FuncInfo->setVGPRToAGPRSpillDead(FI);
1402fe6060f1SDimitry Andric
1403fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
1404fe6060f1SDimitry Andric for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1405fe6060f1SDimitry Andric MBB.addLiveIn(Reg);
1406fe6060f1SDimitry Andric
1407fe6060f1SDimitry Andric for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1408fe6060f1SDimitry Andric MBB.addLiveIn(Reg);
1409fe6060f1SDimitry Andric
1410fe6060f1SDimitry Andric MBB.sortUniqueLiveIns();
1411fe6060f1SDimitry Andric
1412fe6060f1SDimitry Andric if (!SpillFIs.empty() && SeenDbgInstr) {
1413fe6060f1SDimitry Andric // FIXME: The dead frame indices are replaced with a null register from
1414fe6060f1SDimitry Andric // the debug value instructions. We should instead, update it with the
1415fe6060f1SDimitry Andric // correct register value. But not sure the register value alone is
1416fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) {
1417fe6060f1SDimitry Andric if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1418bdd1243dSDimitry Andric !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
1419fe6060f1SDimitry Andric SpillFIs[MI.getOperand(0).getIndex()]) {
1420fe6060f1SDimitry Andric MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1421fe6060f1SDimitry Andric }
1422fe6060f1SDimitry Andric }
1423fe6060f1SDimitry Andric }
1424fe6060f1SDimitry Andric }
1425fe6060f1SDimitry Andric }
1426fe6060f1SDimitry Andric
142781ad6265SDimitry Andric // At this point we've already allocated all spilled SGPRs to VGPRs if we
142881ad6265SDimitry Andric // can. Any remaining SGPR spills will go to memory, so move them back to the
142981ad6265SDimitry Andric // default stack.
143081ad6265SDimitry Andric bool HaveSGPRToVMemSpill =
143181ad6265SDimitry Andric FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1432e8d8bef9SDimitry Andric assert(allSGPRSpillsAreDead(MF) &&
14330b57cec5SDimitry Andric "SGPR spill should have been removed in SILowerSGPRSpills");
14340b57cec5SDimitry Andric
14350b57cec5SDimitry Andric // FIXME: The other checks should be redundant with allStackObjectsAreDead,
14360b57cec5SDimitry Andric // but currently hasNonSpillStackObjects is set only from source
14370b57cec5SDimitry Andric // allocas. Stack temps produced from legalization are not counted currently.
14380b57cec5SDimitry Andric if (!allStackObjectsAreDead(MFI)) {
14390b57cec5SDimitry Andric assert(RS && "RegScavenger required if spilling");
14400b57cec5SDimitry Andric
1441fe6060f1SDimitry Andric // Add an emergency spill slot
1442fe6060f1SDimitry Andric RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
144381ad6265SDimitry Andric
144481ad6265SDimitry Andric // If we are spilling SGPRs to memory with a large frame, we may need a
144581ad6265SDimitry Andric // second VGPR emergency frame index.
144681ad6265SDimitry Andric if (HaveSGPRToVMemSpill &&
144781ad6265SDimitry Andric allocateScavengingFrameIndexesNearIncomingSP(MF)) {
144881ad6265SDimitry Andric RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
144981ad6265SDimitry Andric }
145081ad6265SDimitry Andric }
145181ad6265SDimitry Andric }
145281ad6265SDimitry Andric
processFunctionBeforeFrameIndicesReplaced(MachineFunction & MF,RegScavenger * RS) const145381ad6265SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
145481ad6265SDimitry Andric MachineFunction &MF, RegScavenger *RS) const {
145581ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
145681ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
145781ad6265SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
145881ad6265SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
145981ad6265SDimitry Andric
146081ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
146181ad6265SDimitry Andric // On gfx908, we had initially reserved highest available VGPR for AGPR
146281ad6265SDimitry Andric // copy. Now since we are done with RA, check if there exist an unused VGPR
146381ad6265SDimitry Andric // which is lower than the eariler reserved VGPR before RA. If one exist,
146481ad6265SDimitry Andric // use it for AGPR copy instead of one reserved before RA.
146581ad6265SDimitry Andric Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
146681ad6265SDimitry Andric Register UnusedLowVGPR =
146781ad6265SDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
146881ad6265SDimitry Andric if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
146981ad6265SDimitry Andric TRI->getHWRegIndex(VGPRForAGPRCopy))) {
147006c3fb27SDimitry Andric // Reserve this newly identified VGPR (for AGPR copy)
147106c3fb27SDimitry Andric // reserved registers should already be frozen at this point
147206c3fb27SDimitry Andric // so we can avoid calling MRI.freezeReservedRegs and just use
147306c3fb27SDimitry Andric // MRI.reserveReg
147481ad6265SDimitry Andric FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
147506c3fb27SDimitry Andric MRI.reserveReg(UnusedLowVGPR, TRI);
147681ad6265SDimitry Andric }
14770b57cec5SDimitry Andric }
147806c3fb27SDimitry Andric // We initally reserved the highest available SGPR pair for long branches
147906c3fb27SDimitry Andric // now, after RA, we shift down to a lower unused one if one exists
148006c3fb27SDimitry Andric Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
148106c3fb27SDimitry Andric Register UnusedLowSGPR =
148206c3fb27SDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
148306c3fb27SDimitry Andric // If LongBranchReservedReg is null then we didn't find a long branch
148406c3fb27SDimitry Andric // and never reserved a register to begin with so there is nothing to
148506c3fb27SDimitry Andric // shift down. Then if UnusedLowSGPR is null, there isn't available lower
148606c3fb27SDimitry Andric // register to use so just keep the original one we set.
148706c3fb27SDimitry Andric if (LongBranchReservedReg && UnusedLowSGPR) {
148806c3fb27SDimitry Andric FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
148906c3fb27SDimitry Andric MRI.reserveReg(UnusedLowSGPR, TRI);
149006c3fb27SDimitry Andric }
14910b57cec5SDimitry Andric }
14920b57cec5SDimitry Andric
1493bdd1243dSDimitry Andric // The special SGPR spills like the one needed for FP, BP or any reserved
1494bdd1243dSDimitry Andric // registers delayed until frame lowering.
determinePrologEpilogSGPRSaves(MachineFunction & MF,BitVector & SavedVGPRs,bool NeedExecCopyReservedReg) const1495bdd1243dSDimitry Andric void SIFrameLowering::determinePrologEpilogSGPRSaves(
149606c3fb27SDimitry Andric MachineFunction &MF, BitVector &SavedVGPRs,
149706c3fb27SDimitry Andric bool NeedExecCopyReservedReg) const {
14985ffd83dbSDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
149906c3fb27SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
1500bdd1243dSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15010b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
15020b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
15035f757f3fSDimitry Andric LiveRegUnits LiveUnits;
15045f757f3fSDimitry Andric LiveUnits.init(*TRI);
1505bdd1243dSDimitry Andric // Initially mark callee saved registers as used so we will not choose them
1506bdd1243dSDimitry Andric // while looking for scratch SGPRs.
1507bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1508bdd1243dSDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I)
15095f757f3fSDimitry Andric LiveUnits.addReg(CSRegs[I]);
15100b57cec5SDimitry Andric
151106c3fb27SDimitry Andric const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
151206c3fb27SDimitry Andric
1513*0fca6ea1SDimitry Andric Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();
1514*0fca6ea1SDimitry Andric if (NeedExecCopyReservedReg ||
1515*0fca6ea1SDimitry Andric (ReservedRegForExecCopy &&
1516*0fca6ea1SDimitry Andric MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {
1517*0fca6ea1SDimitry Andric MRI.reserveReg(ReservedRegForExecCopy, TRI);
15185f757f3fSDimitry Andric Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
151906c3fb27SDimitry Andric if (UnusedScratchReg) {
152006c3fb27SDimitry Andric // If found any unused scratch SGPR, reserve the register itself for Exec
152106c3fb27SDimitry Andric // copy and there is no need for any spills in that case.
152206c3fb27SDimitry Andric MFI->setSGPRForEXECCopy(UnusedScratchReg);
1523*0fca6ea1SDimitry Andric MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);
15245f757f3fSDimitry Andric LiveUnits.addReg(UnusedScratchReg);
152506c3fb27SDimitry Andric } else {
152606c3fb27SDimitry Andric // Needs spill.
1527*0fca6ea1SDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&
152806c3fb27SDimitry Andric "Re-reserving spill slot for EXEC copy register");
1529*0fca6ea1SDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,
153006c3fb27SDimitry Andric /*IncludeScratchCopy=*/false);
153106c3fb27SDimitry Andric }
1532*0fca6ea1SDimitry Andric } else if (ReservedRegForExecCopy) {
1533*0fca6ea1SDimitry Andric // Reset it at this point. There are no whole-wave copies and spills
1534*0fca6ea1SDimitry Andric // encountered.
1535*0fca6ea1SDimitry Andric MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
153606c3fb27SDimitry Andric }
153706c3fb27SDimitry Andric
15380b57cec5SDimitry Andric // hasFP only knows about stack objects that already exist. We're now
15390b57cec5SDimitry Andric // determining the stack slots that will be created, so we have to predict
15400b57cec5SDimitry Andric // them. Stack objects force FP usage with calls.
15410b57cec5SDimitry Andric //
15420b57cec5SDimitry Andric // Note a new VGPR CSR may be introduced if one is used for the spill, but we
15430b57cec5SDimitry Andric // don't want to report it here.
15440b57cec5SDimitry Andric //
15450b57cec5SDimitry Andric // FIXME: Is this really hasReservedCallFrame?
15460b57cec5SDimitry Andric const bool WillHaveFP =
15470b57cec5SDimitry Andric FrameInfo.hasCalls() &&
15480b57cec5SDimitry Andric (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
15490b57cec5SDimitry Andric
15505ffd83dbSDimitry Andric if (WillHaveFP || hasFP(MF)) {
1551bdd1243dSDimitry Andric Register FramePtrReg = MFI->getFrameOffsetReg();
1552bdd1243dSDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1553e8d8bef9SDimitry Andric "Re-reserving spill slot for FP");
15545f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
15550b57cec5SDimitry Andric }
15560b57cec5SDimitry Andric
15575ffd83dbSDimitry Andric if (TRI->hasBasePointer(MF)) {
1558bdd1243dSDimitry Andric Register BasePtrReg = TRI->getBaseRegister();
1559bdd1243dSDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1560bdd1243dSDimitry Andric "Re-reserving spill slot for BP");
15615f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1562bdd1243dSDimitry Andric }
1563bdd1243dSDimitry Andric }
1564e8d8bef9SDimitry Andric
1565bdd1243dSDimitry Andric // Only report VGPRs to generic code.
determineCalleeSaves(MachineFunction & MF,BitVector & SavedVGPRs,RegScavenger * RS) const1566bdd1243dSDimitry Andric void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1567bdd1243dSDimitry Andric BitVector &SavedVGPRs,
1568bdd1243dSDimitry Andric RegScavenger *RS) const {
1569bdd1243dSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15705f757f3fSDimitry Andric
15715f757f3fSDimitry Andric // If this is a function with the amdgpu_cs_chain[_preserve] calling
15725f757f3fSDimitry Andric // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
15735f757f3fSDimitry Andric // we don't need to save and restore anything.
15745f757f3fSDimitry Andric if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
15755f757f3fSDimitry Andric return;
15765f757f3fSDimitry Andric
15777a6dacacSDimitry Andric MFI->shiftSpillPhysVGPRsToLowestRange(MF);
15787a6dacacSDimitry Andric
15795f757f3fSDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1580bdd1243dSDimitry Andric if (MFI->isEntryFunction())
1581bdd1243dSDimitry Andric return;
1582bdd1243dSDimitry Andric
1583bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1584bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
158506c3fb27SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
158606c3fb27SDimitry Andric bool NeedExecCopyReservedReg = false;
1587bdd1243dSDimitry Andric
158806c3fb27SDimitry Andric MachineInstr *ReturnMI = nullptr;
1589bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) {
1590bdd1243dSDimitry Andric for (MachineInstr &MI : MBB) {
1591bdd1243dSDimitry Andric // WRITELANE instructions used for SGPR spills can overwrite the inactive
1592bdd1243dSDimitry Andric // lanes of VGPRs and callee must spill and restore them even if they are
1593bdd1243dSDimitry Andric // marked Caller-saved.
1594bdd1243dSDimitry Andric
1595bdd1243dSDimitry Andric // TODO: Handle this elsewhere at an early point. Walking through all MBBs
1596bdd1243dSDimitry Andric // here would be a bad heuristic. A better way should be by calling
1597bdd1243dSDimitry Andric // allocateWWMSpill during the regalloc pipeline whenever a physical
15985f757f3fSDimitry Andric // register is allocated for the intended virtual registers.
15995f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
1600bdd1243dSDimitry Andric MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
16015f757f3fSDimitry Andric else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
1602bdd1243dSDimitry Andric MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
160306c3fb27SDimitry Andric else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
160406c3fb27SDimitry Andric NeedExecCopyReservedReg = true;
160506c3fb27SDimitry Andric else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
16065f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
16075f757f3fSDimitry Andric (MFI->isChainFunction() &&
16085f757f3fSDimitry Andric TII->isChainCallOpcode(MI.getOpcode()))) {
160906c3fb27SDimitry Andric // We expect all return to be the same size.
161006c3fb27SDimitry Andric assert(!ReturnMI ||
161106c3fb27SDimitry Andric (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
161206c3fb27SDimitry Andric count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
161306c3fb27SDimitry Andric ReturnMI = &MI;
161406c3fb27SDimitry Andric }
161506c3fb27SDimitry Andric }
161606c3fb27SDimitry Andric }
161706c3fb27SDimitry Andric
161806c3fb27SDimitry Andric // Remove any VGPRs used in the return value because these do not need to be saved.
161906c3fb27SDimitry Andric // This prevents CSR restore from clobbering return VGPRs.
162006c3fb27SDimitry Andric if (ReturnMI) {
162106c3fb27SDimitry Andric for (auto &Op : ReturnMI->operands()) {
162206c3fb27SDimitry Andric if (Op.isReg())
162306c3fb27SDimitry Andric SavedVGPRs.reset(Op.getReg());
1624bdd1243dSDimitry Andric }
1625bdd1243dSDimitry Andric }
1626bdd1243dSDimitry Andric
1627bdd1243dSDimitry Andric // Ignore the SGPRs the default implementation found.
1628bdd1243dSDimitry Andric SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1629bdd1243dSDimitry Andric
1630bdd1243dSDimitry Andric // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1631bdd1243dSDimitry Andric // In gfx908 there was do AGPR loads and stores and thus spilling also
1632bdd1243dSDimitry Andric // require a temporary VGPR.
1633bdd1243dSDimitry Andric if (!ST.hasGFX90AInsts())
1634bdd1243dSDimitry Andric SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1635bdd1243dSDimitry Andric
163606c3fb27SDimitry Andric determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1637bdd1243dSDimitry Andric
1638bdd1243dSDimitry Andric // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1639bdd1243dSDimitry Andric // allow the default insertion to handle them.
1640bdd1243dSDimitry Andric for (auto &Reg : MFI->getWWMSpills())
1641bdd1243dSDimitry Andric SavedVGPRs.reset(Reg.first);
1642bdd1243dSDimitry Andric
1643bdd1243dSDimitry Andric // Mark all lane VGPRs as BB LiveIns.
1644bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) {
1645bdd1243dSDimitry Andric for (auto &Reg : MFI->getWWMSpills())
1646bdd1243dSDimitry Andric MBB.addLiveIn(Reg.first);
1647bdd1243dSDimitry Andric
1648bdd1243dSDimitry Andric MBB.sortUniqueLiveIns();
16490b57cec5SDimitry Andric }
16500b57cec5SDimitry Andric }
16510b57cec5SDimitry Andric
determineCalleeSavesSGPR(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const16520b57cec5SDimitry Andric void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
16530b57cec5SDimitry Andric BitVector &SavedRegs,
16540b57cec5SDimitry Andric RegScavenger *RS) const {
16550b57cec5SDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
16560b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
16570b57cec5SDimitry Andric if (MFI->isEntryFunction())
16580b57cec5SDimitry Andric return;
16590b57cec5SDimitry Andric
16600b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
16610b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
16620b57cec5SDimitry Andric
16630b57cec5SDimitry Andric // The SP is specifically managed and we don't want extra spills of it.
16640b57cec5SDimitry Andric SavedRegs.reset(MFI->getStackPtrOffsetReg());
1665e8d8bef9SDimitry Andric
1666e8d8bef9SDimitry Andric const BitVector AllSavedRegs = SavedRegs;
1667fe6060f1SDimitry Andric SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1668e8d8bef9SDimitry Andric
1669349cc55cSDimitry Andric // We have to anticipate introducing CSR VGPR spills or spill of caller
1670349cc55cSDimitry Andric // save VGPR reserved for SGPR spills as we now always create stack entry
167104eeddc0SDimitry Andric // for it, if we don't have any stack objects already, since we require a FP
167204eeddc0SDimitry Andric // if there is a call and stack. We will allocate a VGPR for SGPR spills if
167304eeddc0SDimitry Andric // there are any SGPR spills. Whether they are CSR spills or otherwise.
1674e8d8bef9SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1675349cc55cSDimitry Andric const bool WillHaveFP =
167604eeddc0SDimitry Andric FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1677e8d8bef9SDimitry Andric
1678e8d8bef9SDimitry Andric // FP will be specially managed like SP.
1679e8d8bef9SDimitry Andric if (WillHaveFP || hasFP(MF))
1680e8d8bef9SDimitry Andric SavedRegs.reset(MFI->getFrameOffsetReg());
168181ad6265SDimitry Andric
168281ad6265SDimitry Andric // Return address use with return instruction is hidden through the SI_RETURN
168381ad6265SDimitry Andric // pseudo. Given that and since the IPRA computes actual register usage and
168481ad6265SDimitry Andric // does not use CSR list, the clobbering of return address by function calls
168581ad6265SDimitry Andric // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
168681ad6265SDimitry Andric // usage collection. This will ensure save/restore of return address happens
168781ad6265SDimitry Andric // in those scenarios.
168881ad6265SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo();
168981ad6265SDimitry Andric Register RetAddrReg = TRI->getReturnAddressReg(MF);
169081ad6265SDimitry Andric if (!MFI->isEntryFunction() &&
169181ad6265SDimitry Andric (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
169281ad6265SDimitry Andric SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
169381ad6265SDimitry Andric SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
169481ad6265SDimitry Andric }
16950b57cec5SDimitry Andric }
16960b57cec5SDimitry Andric
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const16970b57cec5SDimitry Andric bool SIFrameLowering::assignCalleeSavedSpillSlots(
16980b57cec5SDimitry Andric MachineFunction &MF, const TargetRegisterInfo *TRI,
16990b57cec5SDimitry Andric std::vector<CalleeSavedInfo> &CSI) const {
17000b57cec5SDimitry Andric if (CSI.empty())
17010b57cec5SDimitry Andric return true; // Early exit if no callee saved registers are modified!
17020b57cec5SDimitry Andric
17030b57cec5SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
17045ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17055ffd83dbSDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo();
17065ffd83dbSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
17075ffd83dbSDimitry Andric Register BasePtrReg = RI->getBaseRegister();
1708bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy =
1709bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1710bdd1243dSDimitry Andric Register SGPRForBPSaveRestoreCopy =
1711bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1712bdd1243dSDimitry Andric if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1713bdd1243dSDimitry Andric return false;
1714bdd1243dSDimitry Andric
17155ffd83dbSDimitry Andric unsigned NumModifiedRegs = 0;
17165ffd83dbSDimitry Andric
1717bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy)
17185ffd83dbSDimitry Andric NumModifiedRegs++;
1719bdd1243dSDimitry Andric if (SGPRForBPSaveRestoreCopy)
17205ffd83dbSDimitry Andric NumModifiedRegs++;
17215ffd83dbSDimitry Andric
17220b57cec5SDimitry Andric for (auto &CS : CSI) {
1723bdd1243dSDimitry Andric if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
1724bdd1243dSDimitry Andric CS.setDstReg(SGPRForFPSaveRestoreCopy);
17255ffd83dbSDimitry Andric if (--NumModifiedRegs)
17265ffd83dbSDimitry Andric break;
1727bdd1243dSDimitry Andric } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
1728bdd1243dSDimitry Andric CS.setDstReg(SGPRForBPSaveRestoreCopy);
17295ffd83dbSDimitry Andric if (--NumModifiedRegs)
17300b57cec5SDimitry Andric break;
17310b57cec5SDimitry Andric }
17320b57cec5SDimitry Andric }
17330b57cec5SDimitry Andric
17340b57cec5SDimitry Andric return false;
17350b57cec5SDimitry Andric }
17360b57cec5SDimitry Andric
allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction & MF) const17374824e7fdSDimitry Andric bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
17384824e7fdSDimitry Andric const MachineFunction &MF) const {
17394824e7fdSDimitry Andric
17404824e7fdSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17414824e7fdSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
17425f757f3fSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
17434824e7fdSDimitry Andric uint64_t EstStackSize = MFI.estimateStackSize(MF);
17444824e7fdSDimitry Andric uint64_t MaxOffset = EstStackSize - 1;
17454824e7fdSDimitry Andric
17464824e7fdSDimitry Andric // We need the emergency stack slots to be allocated in range of the
17474824e7fdSDimitry Andric // MUBUF/flat scratch immediate offset from the base register, so assign these
17484824e7fdSDimitry Andric // first at the incoming SP position.
17494824e7fdSDimitry Andric //
17504824e7fdSDimitry Andric // TODO: We could try sorting the objects to find a hole in the first bytes
17514824e7fdSDimitry Andric // rather than allocating as close to possible. This could save a lot of space
17524824e7fdSDimitry Andric // on frames with alignment requirements.
17534824e7fdSDimitry Andric if (ST.enableFlatScratch()) {
17544824e7fdSDimitry Andric if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
17554824e7fdSDimitry Andric SIInstrFlags::FlatScratch))
17564824e7fdSDimitry Andric return false;
17574824e7fdSDimitry Andric } else {
17585f757f3fSDimitry Andric if (TII->isLegalMUBUFImmOffset(MaxOffset))
17594824e7fdSDimitry Andric return false;
17604824e7fdSDimitry Andric }
17614824e7fdSDimitry Andric
17624824e7fdSDimitry Andric return true;
17634824e7fdSDimitry Andric }
17644824e7fdSDimitry Andric
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const17650b57cec5SDimitry Andric MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
17660b57cec5SDimitry Andric MachineFunction &MF,
17670b57cec5SDimitry Andric MachineBasicBlock &MBB,
17680b57cec5SDimitry Andric MachineBasicBlock::iterator I) const {
17690b57cec5SDimitry Andric int64_t Amount = I->getOperand(0).getImm();
17700b57cec5SDimitry Andric if (Amount == 0)
17710b57cec5SDimitry Andric return MBB.erase(I);
17720b57cec5SDimitry Andric
17730b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17740b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
17750b57cec5SDimitry Andric const DebugLoc &DL = I->getDebugLoc();
17760b57cec5SDimitry Andric unsigned Opc = I->getOpcode();
17770b57cec5SDimitry Andric bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
17780b57cec5SDimitry Andric uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
17790b57cec5SDimitry Andric
17800b57cec5SDimitry Andric if (!hasReservedCallFrame(MF)) {
17815ffd83dbSDimitry Andric Amount = alignTo(Amount, getStackAlign());
17820b57cec5SDimitry Andric assert(isUInt<32>(Amount) && "exceeded stack address space size");
17830b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
17845ffd83dbSDimitry Andric Register SPReg = MFI->getStackPtrOffsetReg();
17850b57cec5SDimitry Andric
1786fe6060f1SDimitry Andric Amount *= getScratchScaleFactor(ST);
1787fe6060f1SDimitry Andric if (IsDestroy)
1788fe6060f1SDimitry Andric Amount = -Amount;
1789349cc55cSDimitry Andric auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
17900b57cec5SDimitry Andric .addReg(SPReg)
1791fe6060f1SDimitry Andric .addImm(Amount);
1792349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead.
17930b57cec5SDimitry Andric } else if (CalleePopAmount != 0) {
17940b57cec5SDimitry Andric llvm_unreachable("is this used?");
17950b57cec5SDimitry Andric }
17960b57cec5SDimitry Andric
17970b57cec5SDimitry Andric return MBB.erase(I);
17980b57cec5SDimitry Andric }
17990b57cec5SDimitry Andric
1800e8d8bef9SDimitry Andric /// Returns true if the frame will require a reference to the stack pointer.
1801e8d8bef9SDimitry Andric ///
1802e8d8bef9SDimitry Andric /// This is the set of conditions common to setting up the stack pointer in a
1803e8d8bef9SDimitry Andric /// kernel, and for using a frame pointer in a callable function.
1804e8d8bef9SDimitry Andric ///
1805e8d8bef9SDimitry Andric /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1806e8d8bef9SDimitry Andric /// references SP.
frameTriviallyRequiresSP(const MachineFrameInfo & MFI)1807e8d8bef9SDimitry Andric static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1808e8d8bef9SDimitry Andric return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1809e8d8bef9SDimitry Andric }
1810e8d8bef9SDimitry Andric
1811e8d8bef9SDimitry Andric // The FP for kernels is always known 0, so we never really need to setup an
1812e8d8bef9SDimitry Andric // explicit register for it. However, DisableFramePointerElim will force us to
1813e8d8bef9SDimitry Andric // use a register for it.
hasFP(const MachineFunction & MF) const18140b57cec5SDimitry Andric bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
18150b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
18165ffd83dbSDimitry Andric
18175f757f3fSDimitry Andric // For entry & chain functions we can use an immediate offset in most cases,
18185f757f3fSDimitry Andric // so the presence of calls doesn't imply we need a distinct frame pointer.
18195ffd83dbSDimitry Andric if (MFI.hasCalls() &&
18205f757f3fSDimitry Andric !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
18215f757f3fSDimitry Andric !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {
18220b57cec5SDimitry Andric // All offsets are unsigned, so need to be addressed in the same direction
18230b57cec5SDimitry Andric // as stack growth.
18240b57cec5SDimitry Andric
18250b57cec5SDimitry Andric // FIXME: This function is pretty broken, since it can be called before the
18260b57cec5SDimitry Andric // frame layout is determined or CSR spills are inserted.
18275ffd83dbSDimitry Andric return MFI.getStackSize() != 0;
18280b57cec5SDimitry Andric }
18290b57cec5SDimitry Andric
1830e8d8bef9SDimitry Andric return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1831fe6060f1SDimitry Andric MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1832fe6060f1SDimitry Andric MF) ||
18330b57cec5SDimitry Andric MF.getTarget().Options.DisableFramePointerElim(MF);
18340b57cec5SDimitry Andric }
1835e8d8bef9SDimitry Andric
1836e8d8bef9SDimitry Andric // This is essentially a reduced version of hasFP for entry functions. Since the
1837e8d8bef9SDimitry Andric // stack pointer is known 0 on entry to kernels, we never really need an FP
1838e8d8bef9SDimitry Andric // register. We may need to initialize the stack pointer depending on the frame
1839e8d8bef9SDimitry Andric // properties, which logically overlaps many of the cases where an ordinary
1840e8d8bef9SDimitry Andric // function would require an FP.
18415f757f3fSDimitry Andric // Also used for chain functions. While not technically entry functions, chain
18425f757f3fSDimitry Andric // functions may need to set up a stack pointer in some situations.
requiresStackPointerReference(const MachineFunction & MF) const1843e8d8bef9SDimitry Andric bool SIFrameLowering::requiresStackPointerReference(
1844e8d8bef9SDimitry Andric const MachineFunction &MF) const {
1845e8d8bef9SDimitry Andric // Callable functions always require a stack pointer reference.
18465f757f3fSDimitry Andric assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||
18475f757f3fSDimitry Andric MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&
18485f757f3fSDimitry Andric "only expected to call this for entry points and chain functions");
1849e8d8bef9SDimitry Andric
1850e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
1851e8d8bef9SDimitry Andric
1852e8d8bef9SDimitry Andric // Entry points ordinarily don't need to initialize SP. We have to set it up
1853e8d8bef9SDimitry Andric // for callees if there are any. Also note tail calls are impossible/don't
1854e8d8bef9SDimitry Andric // make any sense for kernels.
1855e8d8bef9SDimitry Andric if (MFI.hasCalls())
1856e8d8bef9SDimitry Andric return true;
1857e8d8bef9SDimitry Andric
1858e8d8bef9SDimitry Andric // We still need to initialize the SP if we're doing anything weird that
1859e8d8bef9SDimitry Andric // references the SP, like variable sized stack objects.
1860e8d8bef9SDimitry Andric return frameTriviallyRequiresSP(MFI);
1861e8d8bef9SDimitry Andric }
1862