xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
12fe6060f1SDimitry Andric #include "SIRegisterInfo.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
15fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
21fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
22fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
23fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
24fe6060f1SDimitry Andric #include <cassert>
25bdd1243dSDimitry Andric #include <optional>
26fe6060f1SDimitry Andric #include <vector>
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #define MAX_LANES 64
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
32bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
33bdd1243dSDimitry Andric   const SITargetLowering *TLI = STI->getTargetLowering();
34bdd1243dSDimitry Andric   return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
35bdd1243dSDimitry Andric }
36bdd1243dSDimitry Andric 
37bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
38bdd1243dSDimitry Andric                                              const GCNSubtarget *STI)
39bdd1243dSDimitry Andric   : AMDGPUMachineFunction(F, *STI),
40bdd1243dSDimitry Andric     Mode(F),
41bdd1243dSDimitry Andric     GWSResourcePSV(getTM(STI)),
420b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
430b57cec5SDimitry Andric     DispatchPtr(false),
440b57cec5SDimitry Andric     QueuePtr(false),
450b57cec5SDimitry Andric     KernargSegmentPtr(false),
460b57cec5SDimitry Andric     DispatchID(false),
470b57cec5SDimitry Andric     FlatScratchInit(false),
480b57cec5SDimitry Andric     WorkGroupIDX(false),
490b57cec5SDimitry Andric     WorkGroupIDY(false),
500b57cec5SDimitry Andric     WorkGroupIDZ(false),
510b57cec5SDimitry Andric     WorkGroupInfo(false),
52fcaf7f86SDimitry Andric     LDSKernelId(false),
530b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
540b57cec5SDimitry Andric     WorkItemIDX(false),
550b57cec5SDimitry Andric     WorkItemIDY(false),
560b57cec5SDimitry Andric     WorkItemIDZ(false),
570b57cec5SDimitry Andric     ImplicitBufferPtr(false),
580b57cec5SDimitry Andric     ImplicitArgPtr(false),
590b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
6081ad6265SDimitry Andric     HighBitsOf32BitAddress(0) {
61bdd1243dSDimitry Andric   const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
620b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
630b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
640b57cec5SDimitry Andric 
655ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
660b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
670b57cec5SDimitry Andric 
68*06c3fb27SDimitry Andric   VRegFlags.reserve(1024);
69*06c3fb27SDimitry Andric 
705ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
715ffd83dbSDimitry Andric   // calls.
725ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
735ffd83dbSDimitry Andric 
74349cc55cSDimitry Andric   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
75349cc55cSDimitry Andric                         CC == CallingConv::SPIR_KERNEL;
765ffd83dbSDimitry Andric 
77349cc55cSDimitry Andric   if (IsKernel) {
78349cc55cSDimitry Andric     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
790b57cec5SDimitry Andric       KernargSegmentPtr = true;
800b57cec5SDimitry Andric     WorkGroupIDX = true;
810b57cec5SDimitry Andric     WorkItemIDX = true;
820b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
830b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
840b57cec5SDimitry Andric   }
850b57cec5SDimitry Andric 
8681ad6265SDimitry Andric   MayNeedAGPRs = ST.hasMAIInsts();
8781ad6265SDimitry Andric 
880b57cec5SDimitry Andric   if (!isEntryFunction()) {
890eae32dcSDimitry Andric     if (CC != CallingConv::AMDGPU_Gfx)
90fe6060f1SDimitry Andric       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
91fe6060f1SDimitry Andric 
920b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
935ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
940b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
950b57cec5SDimitry Andric 
96e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
97e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
98e8d8bef9SDimitry Andric       // required for scratch access.
99e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
100e8d8bef9SDimitry Andric 
1010b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
1020b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
103e8d8bef9SDimitry Andric     }
1040b57cec5SDimitry Andric 
105349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
1060b57cec5SDimitry Andric       ImplicitArgPtr = true;
1070b57cec5SDimitry Andric   } else {
108349cc55cSDimitry Andric     ImplicitArgPtr = false;
1090b57cec5SDimitry Andric     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1100b57cec5SDimitry Andric                                MaxKernArgAlign);
11181ad6265SDimitry Andric 
11281ad6265SDimitry Andric     if (ST.hasGFX90AInsts() &&
11381ad6265SDimitry Andric         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
114bdd1243dSDimitry Andric         !mayUseAGPRs(F))
11581ad6265SDimitry Andric       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
1160b57cec5SDimitry Andric   }
117349cc55cSDimitry Andric 
118349cc55cSDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
119349cc55cSDimitry Andric   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
120349cc55cSDimitry Andric     PrivateSegmentBuffer = true;
121349cc55cSDimitry Andric   else if (ST.isMesaGfxShader(F))
122349cc55cSDimitry Andric     ImplicitBufferPtr = true;
1230b57cec5SDimitry Andric 
124*06c3fb27SDimitry Andric   if (!AMDGPU::isGraphics(CC) ||
125*06c3fb27SDimitry Andric       (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
126349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1270b57cec5SDimitry Andric       WorkGroupIDX = true;
1280b57cec5SDimitry Andric 
129349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1300b57cec5SDimitry Andric       WorkGroupIDY = true;
1310b57cec5SDimitry Andric 
132349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1330b57cec5SDimitry Andric       WorkGroupIDZ = true;
134*06c3fb27SDimitry Andric   }
1350b57cec5SDimitry Andric 
136*06c3fb27SDimitry Andric   if (!AMDGPU::isGraphics(CC)) {
137349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1380b57cec5SDimitry Andric       WorkItemIDX = true;
1390b57cec5SDimitry Andric 
14004eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
14104eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 1) != 0)
1420b57cec5SDimitry Andric       WorkItemIDY = true;
1430b57cec5SDimitry Andric 
14404eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
14504eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 2) != 0)
1460b57cec5SDimitry Andric       WorkItemIDZ = true;
147349cc55cSDimitry Andric 
148349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
149349cc55cSDimitry Andric       DispatchPtr = true;
150349cc55cSDimitry Andric 
151349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
152349cc55cSDimitry Andric       QueuePtr = true;
153349cc55cSDimitry Andric 
154349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
155349cc55cSDimitry Andric       DispatchID = true;
156fcaf7f86SDimitry Andric 
157fcaf7f86SDimitry Andric     if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
158fcaf7f86SDimitry Andric       LDSKernelId = true;
1595ffd83dbSDimitry Andric   }
1600b57cec5SDimitry Andric 
161349cc55cSDimitry Andric   // FIXME: This attribute is a hack, we just need an analysis on the function
162349cc55cSDimitry Andric   // to look for allocas.
1635ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
164349cc55cSDimitry Andric 
165349cc55cSDimitry Andric   // TODO: This could be refined a lot. The attribute is a poor way of
166349cc55cSDimitry Andric   // detecting calls or stack objects that may require it before argument
167349cc55cSDimitry Andric   // lowering.
168349cc55cSDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
169349cc55cSDimitry Andric       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
170349cc55cSDimitry Andric       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
171349cc55cSDimitry Andric       !ST.flatScratchIsArchitected()) {
172349cc55cSDimitry Andric     FlatScratchInit = true;
173349cc55cSDimitry Andric   }
174349cc55cSDimitry Andric 
1750b57cec5SDimitry Andric   if (isEntryFunction()) {
1760b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1770b57cec5SDimitry Andric     // enabled if Z is.
1780b57cec5SDimitry Andric     if (WorkItemIDZ)
1790b57cec5SDimitry Andric       WorkItemIDY = true;
1800b57cec5SDimitry Andric 
181fe6060f1SDimitry Andric     if (!ST.flatScratchIsArchitected()) {
1820b57cec5SDimitry Andric       PrivateSegmentWaveByteOffset = true;
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1850b57cec5SDimitry Andric       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1860b57cec5SDimitry Andric           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1870b57cec5SDimitry Andric         ArgInfo.PrivateSegmentWaveByteOffset =
1880b57cec5SDimitry Andric             ArgDescriptor::createRegister(AMDGPU::SGPR5);
1890b57cec5SDimitry Andric     }
190fe6060f1SDimitry Andric   }
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1930b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1940b57cec5SDimitry Andric   if (!S.empty())
1950b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1980b57cec5SDimitry Andric   S = A.getValueAsString();
1990b57cec5SDimitry Andric   if (!S.empty())
2000b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
2010b57cec5SDimitry Andric 
20281ad6265SDimitry Andric   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
20381ad6265SDimitry Andric   // VGPR available at all times. For now, reserve highest available VGPR. After
20481ad6265SDimitry Andric   // RA, shift it to the lowest available unused VGPR if the one exist.
20581ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
20681ad6265SDimitry Andric     VGPRForAGPRCopy =
20781ad6265SDimitry Andric         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
20881ad6265SDimitry Andric   }
20981ad6265SDimitry Andric }
21081ad6265SDimitry Andric 
21181ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone(
21281ad6265SDimitry Andric     BumpPtrAllocator &Allocator, MachineFunction &DestMF,
21381ad6265SDimitry Andric     const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
21481ad6265SDimitry Andric     const {
21581ad6265SDimitry Andric   return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
2160b57cec5SDimitry Andric }
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
2190b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
2200b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
2210b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
2220b57cec5SDimitry Andric                  MF.getFunction()));
2230b57cec5SDimitry Andric }
2240b57cec5SDimitry Andric 
2255ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
2260b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
2270b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
2280b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2298bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
2300b57cec5SDimitry Andric   NumUserSGPRs += 4;
2310b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
2320b57cec5SDimitry Andric }
2330b57cec5SDimitry Andric 
2345ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2350b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2360b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2370b57cec5SDimitry Andric   NumUserSGPRs += 2;
2380b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric 
2415ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2420b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2430b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2440b57cec5SDimitry Andric   NumUserSGPRs += 2;
2450b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric 
2485ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2490b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2500b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2510b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2520b57cec5SDimitry Andric   NumUserSGPRs += 2;
2530b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2540b57cec5SDimitry Andric }
2550b57cec5SDimitry Andric 
2565ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2570b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2580b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2590b57cec5SDimitry Andric   NumUserSGPRs += 2;
2600b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
2635ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2640b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2650b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2660b57cec5SDimitry Andric   NumUserSGPRs += 2;
2670b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2680b57cec5SDimitry Andric }
2690b57cec5SDimitry Andric 
2705ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2710b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2720b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2730b57cec5SDimitry Andric   NumUserSGPRs += 2;
2740b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2750b57cec5SDimitry Andric }
2760b57cec5SDimitry Andric 
277fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() {
278fcaf7f86SDimitry Andric   ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
279fcaf7f86SDimitry Andric   NumUserSGPRs += 1;
280fcaf7f86SDimitry Andric   return ArgInfo.LDSKernelId.getRegister();
281fcaf7f86SDimitry Andric }
282fcaf7f86SDimitry Andric 
283bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
284bdd1243dSDimitry Andric                                              uint64_t Size, Align Alignment) {
285bdd1243dSDimitry Andric   // Skip if it is an entry function or the register is already added.
286bdd1243dSDimitry Andric   if (isEntryFunction() || WWMSpills.count(VGPR))
287bdd1243dSDimitry Andric     return;
288bdd1243dSDimitry Andric 
289bdd1243dSDimitry Andric   WWMSpills.insert(std::make_pair(
290bdd1243dSDimitry Andric       VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
291bdd1243dSDimitry Andric }
292bdd1243dSDimitry Andric 
293bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers.
294bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters(
295bdd1243dSDimitry Andric     MachineFunction &MF,
296bdd1243dSDimitry Andric     SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
297bdd1243dSDimitry Andric     SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
298bdd1243dSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
299bdd1243dSDimitry Andric   for (auto &Reg : WWMSpills) {
300bdd1243dSDimitry Andric     if (isCalleeSavedReg(CSRegs, Reg.first))
301bdd1243dSDimitry Andric       CalleeSavedRegs.push_back(Reg);
302bdd1243dSDimitry Andric     else
303bdd1243dSDimitry Andric       ScratchRegs.push_back(Reg);
304bdd1243dSDimitry Andric   }
305bdd1243dSDimitry Andric }
306bdd1243dSDimitry Andric 
3075ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
308bdd1243dSDimitry Andric                                              MCPhysReg Reg) const {
3090b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
3100b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
3110b57cec5SDimitry Andric       return true;
3120b57cec5SDimitry Andric   }
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric   return false;
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric 
317*06c3fb27SDimitry Andric bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
318*06c3fb27SDimitry Andric     MachineFunction &MF, int FI, unsigned LaneIndex) {
319bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
320bdd1243dSDimitry Andric   Register LaneVGPR;
321bdd1243dSDimitry Andric   if (!LaneIndex) {
322*06c3fb27SDimitry Andric     LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
323bdd1243dSDimitry Andric     SpillVGPRs.push_back(LaneVGPR);
324bdd1243dSDimitry Andric   } else {
325bdd1243dSDimitry Andric     LaneVGPR = SpillVGPRs.back();
326bdd1243dSDimitry Andric   }
327bdd1243dSDimitry Andric 
328*06c3fb27SDimitry Andric   SGPRSpillsToVirtualVGPRLanes[FI].push_back(
329bdd1243dSDimitry Andric       SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
330bdd1243dSDimitry Andric   return true;
331bdd1243dSDimitry Andric }
332bdd1243dSDimitry Andric 
333*06c3fb27SDimitry Andric bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
334bdd1243dSDimitry Andric     MachineFunction &MF, int FI, unsigned LaneIndex) {
335bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
336bdd1243dSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
337bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
338bdd1243dSDimitry Andric   Register LaneVGPR;
339bdd1243dSDimitry Andric   if (!LaneIndex) {
340bdd1243dSDimitry Andric     LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
341bdd1243dSDimitry Andric     if (LaneVGPR == AMDGPU::NoRegister) {
342bdd1243dSDimitry Andric       // We have no VGPRs left for spilling SGPRs. Reset because we will not
343bdd1243dSDimitry Andric       // partially spill the SGPR to VGPRs.
344*06c3fb27SDimitry Andric       SGPRSpillsToPhysicalVGPRLanes.erase(FI);
345bdd1243dSDimitry Andric       return false;
346bdd1243dSDimitry Andric     }
347bdd1243dSDimitry Andric 
348bdd1243dSDimitry Andric     allocateWWMSpill(MF, LaneVGPR);
349*06c3fb27SDimitry Andric     reserveWWMRegister(LaneVGPR);
350*06c3fb27SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
351*06c3fb27SDimitry Andric       MBB.addLiveIn(LaneVGPR);
352*06c3fb27SDimitry Andric       MBB.sortUniqueLiveIns();
353*06c3fb27SDimitry Andric     }
354bdd1243dSDimitry Andric   } else {
355*06c3fb27SDimitry Andric     LaneVGPR = WWMReservedRegs.back();
356bdd1243dSDimitry Andric   }
357bdd1243dSDimitry Andric 
358*06c3fb27SDimitry Andric   SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
359bdd1243dSDimitry Andric       SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
360bdd1243dSDimitry Andric   return true;
361bdd1243dSDimitry Andric }
362bdd1243dSDimitry Andric 
363bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
364bdd1243dSDimitry Andric                                                         int FI,
365bdd1243dSDimitry Andric                                                         bool IsPrologEpilog) {
366bdd1243dSDimitry Andric   std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
367*06c3fb27SDimitry Andric       IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI]
368*06c3fb27SDimitry Andric                      : SGPRSpillsToVirtualVGPRLanes[FI];
3690b57cec5SDimitry Andric 
3700b57cec5SDimitry Andric   // This has already been allocated.
3710b57cec5SDimitry Andric   if (!SpillLanes.empty())
3720b57cec5SDimitry Andric     return true;
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3750b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3760b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3795ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
3800b57cec5SDimitry Andric 
3815ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
3825ffd83dbSDimitry Andric     return false;
3835ffd83dbSDimitry Andric 
3845ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
385bdd1243dSDimitry Andric   assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
386bdd1243dSDimitry Andric          "not spilling SGPRs to VGPRs");
3870b57cec5SDimitry Andric 
388bdd1243dSDimitry Andric   unsigned &NumSpillLanes =
389*06c3fb27SDimitry Andric       IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes;
3900b57cec5SDimitry Andric 
391bdd1243dSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
392bdd1243dSDimitry Andric     unsigned LaneIndex = (NumSpillLanes % WaveSize);
393fe6060f1SDimitry Andric 
394*06c3fb27SDimitry Andric     bool Allocated = IsPrologEpilog
395*06c3fb27SDimitry Andric                          ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex)
396*06c3fb27SDimitry Andric                          : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
397bdd1243dSDimitry Andric     if (!Allocated) {
398bdd1243dSDimitry Andric       NumSpillLanes -= I;
3990b57cec5SDimitry Andric       return false;
4000b57cec5SDimitry Andric     }
4010b57cec5SDimitry Andric   }
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric   return true;
4040b57cec5SDimitry Andric }
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
4070b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
4080b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
4090b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
4100b57cec5SDimitry Andric                                                     int FI,
4110b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
4120b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
4130b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
4140b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric   // This has already been allocated.
4210b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
4220b57cec5SDimitry Andric     return Spill.FullyAllocated;
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
4250b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
4260b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric   const TargetRegisterClass &RC =
4290b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
4300b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
4330b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
4340b57cec5SDimitry Andric   Spill.FullyAllocated = true;
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
4370b57cec5SDimitry Andric   // once.
4380b57cec5SDimitry Andric   BitVector OtherUsedRegs;
4390b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   const uint32_t *CSRMask =
4420b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4430b57cec5SDimitry Andric   if (CSRMask)
4440b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
4470b57cec5SDimitry Andric   // usage.
4480b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4490b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4500b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4510b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
454349cc55cSDimitry Andric   for (int I = NumLanes - 1; I >= 0; --I) {
4550b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4560b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4570b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4580b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4590b57cec5SDimitry Andric         });
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4620b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4630b57cec5SDimitry Andric       break;
4640b57cec5SDimitry Andric     }
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4670b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
468bdd1243dSDimitry Andric     MRI.reserveReg(*NextSpillReg, TRI);
4690b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4700b57cec5SDimitry Andric   }
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric   return Spill.FullyAllocated;
4730b57cec5SDimitry Andric }
4740b57cec5SDimitry Andric 
47581ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices(
47681ad6265SDimitry Andric     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
477*06c3fb27SDimitry Andric   // Remove dead frame indices from function frame, however keep FP & BP since
478*06c3fb27SDimitry Andric   // spills for them haven't been inserted yet. And also make sure to remove the
479*06c3fb27SDimitry Andric   // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
480*06c3fb27SDimitry Andric   // otherwise, it could result in an unexpected side effect and bug, in case of
481*06c3fb27SDimitry Andric   // any re-mapping of freed frame indices by later pass(es) like "stack slot
482bdd1243dSDimitry Andric   // coloring".
483*06c3fb27SDimitry Andric   for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {
4840b57cec5SDimitry Andric     MFI.RemoveStackObject(R.first);
485*06c3fb27SDimitry Andric     SGPRSpillsToVirtualVGPRLanes.erase(R.first);
4860b57cec5SDimitry Andric   }
4870b57cec5SDimitry Andric 
488*06c3fb27SDimitry Andric   // Remove the dead frame indices of CSR SGPRs which are spilled to physical
489*06c3fb27SDimitry Andric   // VGPR lanes during SILowerSGPRSpills pass.
490*06c3fb27SDimitry Andric   if (!ResetSGPRSpillStackIDs) {
491*06c3fb27SDimitry Andric     for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {
492*06c3fb27SDimitry Andric       MFI.RemoveStackObject(R.first);
493*06c3fb27SDimitry Andric       SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
494*06c3fb27SDimitry Andric     }
495*06c3fb27SDimitry Andric   }
49681ad6265SDimitry Andric   bool HaveSGPRToMemory = false;
49781ad6265SDimitry Andric 
49881ad6265SDimitry Andric   if (ResetSGPRSpillStackIDs) {
499bdd1243dSDimitry Andric     // All other SGPRs must be allocated on the default stack, so reset the
50081ad6265SDimitry Andric     // stack ID.
501bdd1243dSDimitry Andric     for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
502bdd1243dSDimitry Andric          ++I) {
503bdd1243dSDimitry Andric       if (!checkIndexInPrologEpilogSGPRSpills(I)) {
504bdd1243dSDimitry Andric         if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {
505bdd1243dSDimitry Andric           MFI.setStackID(I, TargetStackID::Default);
50681ad6265SDimitry Andric           HaveSGPRToMemory = true;
50781ad6265SDimitry Andric         }
50881ad6265SDimitry Andric       }
50981ad6265SDimitry Andric     }
51081ad6265SDimitry Andric   }
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
5130eae32dcSDimitry Andric     if (R.second.IsDead)
5140b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
5150b57cec5SDimitry Andric   }
51681ad6265SDimitry Andric 
51781ad6265SDimitry Andric   return HaveSGPRToMemory;
51881ad6265SDimitry Andric }
51981ad6265SDimitry Andric 
520fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
521fe6060f1SDimitry Andric                                          const SIRegisterInfo &TRI) {
522fe6060f1SDimitry Andric   if (ScavengeFI)
523fe6060f1SDimitry Andric     return *ScavengeFI;
524fe6060f1SDimitry Andric   if (isEntryFunction()) {
525fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateFixedObject(
526fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
527fe6060f1SDimitry Andric   } else {
528fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateStackObject(
529fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
530fe6060f1SDimitry Andric         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
531fe6060f1SDimitry Andric   }
532fe6060f1SDimitry Andric   return *ScavengeFI;
533fe6060f1SDimitry Andric }
534fe6060f1SDimitry Andric 
5350b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
5360b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
5370b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
5380b57cec5SDimitry Andric }
5390b57cec5SDimitry Andric 
5400b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
5410b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
5420b57cec5SDimitry Andric }
5430b57cec5SDimitry Andric 
544*06c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
545*06c3fb27SDimitry Andric   VRegFlags.grow(Reg);
546*06c3fb27SDimitry Andric }
547*06c3fb27SDimitry Andric 
548*06c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
549*06c3fb27SDimitry Andric                                                          Register SrcReg) {
550*06c3fb27SDimitry Andric   VRegFlags.grow(NewReg);
551*06c3fb27SDimitry Andric   VRegFlags[NewReg] = VRegFlags[SrcReg];
552*06c3fb27SDimitry Andric }
553*06c3fb27SDimitry Andric 
5545ffd83dbSDimitry Andric Register
5555ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
5565ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5575ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
5585ffd83dbSDimitry Andric     return Register();
5595ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5605ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
5615ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
5625ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
5635ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
5645ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5655ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
5665ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
5675ffd83dbSDimitry Andric       return GitPtrLo;
5685ffd83dbSDimitry Andric     default:
5695ffd83dbSDimitry Andric       return GitPtrLo;
5705ffd83dbSDimitry Andric     }
5715ffd83dbSDimitry Andric   }
5725ffd83dbSDimitry Andric   return GitPtrLo;
5735ffd83dbSDimitry Andric }
5745ffd83dbSDimitry Andric 
5755ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
5760b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
5770b57cec5SDimitry Andric   yaml::StringValue Dest;
5780b57cec5SDimitry Andric   {
5790b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
5800b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
5810b57cec5SDimitry Andric   }
5820b57cec5SDimitry Andric   return Dest;
5830b57cec5SDimitry Andric }
5840b57cec5SDimitry Andric 
585bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo>
5860b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
5870b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
5880b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
5890b57cec5SDimitry Andric 
590bdd1243dSDimitry Andric   auto convertArg = [&](std::optional<yaml::SIArgument> &A,
5910b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
5920b57cec5SDimitry Andric     if (!Arg)
5930b57cec5SDimitry Andric       return false;
5940b57cec5SDimitry Andric 
5950b57cec5SDimitry Andric     // Create a register or stack argument.
5960b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5970b57cec5SDimitry Andric     if (Arg.isRegister()) {
5980b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5990b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
6000b57cec5SDimitry Andric     } else
6010b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
6020b57cec5SDimitry Andric     // Check and update the optional mask.
6030b57cec5SDimitry Andric     if (Arg.isMasked())
6040b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric     A = SA;
6070b57cec5SDimitry Andric     return true;
6080b57cec5SDimitry Andric   };
6090b57cec5SDimitry Andric 
6100b57cec5SDimitry Andric   bool Any = false;
6110b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
6120b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
6130b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
6140b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
6150b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
6160b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
617fcaf7f86SDimitry Andric   Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
6180b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
6190b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
6200b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
6210b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
6220b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
6230b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
6240b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
6250b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
6260b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
6270b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
6280b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
6290b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
6300b57cec5SDimitry Andric 
6310b57cec5SDimitry Andric   if (Any)
6320b57cec5SDimitry Andric     return AI;
6330b57cec5SDimitry Andric 
634bdd1243dSDimitry Andric   return std::nullopt;
6350b57cec5SDimitry Andric }
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
638fe6060f1SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
639fe6060f1SDimitry Andric     const llvm::MachineFunction &MF)
6400b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
641e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
64281ad6265SDimitry Andric       GDSSize(MFI.getGDSSize()),
643e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
6440b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
645e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
646e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
647e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
6488bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
649e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
6500b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
6510b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
6520b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
65381ad6265SDimitry Andric       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
65481ad6265SDimitry Andric       ReturnsVoid(MFI.returnsVoid()),
655*06c3fb27SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
656*06c3fb27SDimitry Andric       PSInputAddr(MFI.getPSInputAddr()),
657*06c3fb27SDimitry Andric       PSInputEnable(MFI.getPSInputEnable()),
658*06c3fb27SDimitry Andric       Mode(MFI.getMode()) {
659bdd1243dSDimitry Andric   for (Register Reg : MFI.getWWMReservedRegs())
66081ad6265SDimitry Andric     WWMReservedRegs.push_back(regToString(Reg, TRI));
66181ad6265SDimitry Andric 
662*06c3fb27SDimitry Andric   if (MFI.getLongBranchReservedReg())
663*06c3fb27SDimitry Andric     LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI);
66481ad6265SDimitry Andric   if (MFI.getVGPRForAGPRCopy())
66581ad6265SDimitry Andric     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
666*06c3fb27SDimitry Andric 
667*06c3fb27SDimitry Andric   if (MFI.getSGPRForEXECCopy())
668*06c3fb27SDimitry Andric     SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI);
669*06c3fb27SDimitry Andric 
670fe6060f1SDimitry Andric   auto SFI = MFI.getOptionalScavengeFI();
671fe6060f1SDimitry Andric   if (SFI)
672fe6060f1SDimitry Andric     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
673e8d8bef9SDimitry Andric }
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
6760b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
6770b57cec5SDimitry Andric }
6780b57cec5SDimitry Andric 
6790b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
680fe6060f1SDimitry Andric     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
681fe6060f1SDimitry Andric     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
6820b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
68381ad6265SDimitry Andric   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
6840b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
68581ad6265SDimitry Andric   GDSSize = YamlMFI.GDSSize;
686e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
687*06c3fb27SDimitry Andric   PSInputAddr = YamlMFI.PSInputAddr;
688*06c3fb27SDimitry Andric   PSInputEnable = YamlMFI.PSInputEnable;
6898bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
690e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
6910b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
6920b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
6930b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
6940b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
695e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
696e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
69781ad6265SDimitry Andric   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
69881ad6265SDimitry Andric   ReturnsVoid = YamlMFI.ReturnsVoid;
699fe6060f1SDimitry Andric 
700fe6060f1SDimitry Andric   if (YamlMFI.ScavengeFI) {
701fe6060f1SDimitry Andric     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
702fe6060f1SDimitry Andric     if (!FIOrErr) {
703fe6060f1SDimitry Andric       // Create a diagnostic for a the frame index.
704fe6060f1SDimitry Andric       const MemoryBuffer &Buffer =
705fe6060f1SDimitry Andric           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
706fe6060f1SDimitry Andric 
707fe6060f1SDimitry Andric       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
708fe6060f1SDimitry Andric                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
709bdd1243dSDimitry Andric                            "", std::nullopt, std::nullopt);
710fe6060f1SDimitry Andric       SourceRange = YamlMFI.ScavengeFI->SourceRange;
711fe6060f1SDimitry Andric       return true;
712fe6060f1SDimitry Andric     }
713fe6060f1SDimitry Andric     ScavengeFI = *FIOrErr;
714fe6060f1SDimitry Andric   } else {
715bdd1243dSDimitry Andric     ScavengeFI = std::nullopt;
716fe6060f1SDimitry Andric   }
7170b57cec5SDimitry Andric   return false;
7180b57cec5SDimitry Andric }
7195ffd83dbSDimitry Andric 
720bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
721bdd1243dSDimitry Andric   for (const BasicBlock &BB : F) {
72281ad6265SDimitry Andric     for (const Instruction &I : BB) {
72381ad6265SDimitry Andric       const auto *CB = dyn_cast<CallBase>(&I);
72481ad6265SDimitry Andric       if (!CB)
72581ad6265SDimitry Andric         continue;
72681ad6265SDimitry Andric 
72781ad6265SDimitry Andric       if (CB->isInlineAsm()) {
72881ad6265SDimitry Andric         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
72981ad6265SDimitry Andric         for (const auto &CI : IA->ParseConstraints()) {
73081ad6265SDimitry Andric           for (StringRef Code : CI.Codes) {
73181ad6265SDimitry Andric             Code.consume_front("{");
73281ad6265SDimitry Andric             if (Code.startswith("a"))
73381ad6265SDimitry Andric               return true;
73481ad6265SDimitry Andric           }
73581ad6265SDimitry Andric         }
73681ad6265SDimitry Andric         continue;
73781ad6265SDimitry Andric       }
73881ad6265SDimitry Andric 
73981ad6265SDimitry Andric       const Function *Callee =
74081ad6265SDimitry Andric           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
74181ad6265SDimitry Andric       if (!Callee)
74281ad6265SDimitry Andric         return true;
74381ad6265SDimitry Andric 
74481ad6265SDimitry Andric       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
74581ad6265SDimitry Andric         return true;
74681ad6265SDimitry Andric     }
74781ad6265SDimitry Andric   }
74881ad6265SDimitry Andric 
74981ad6265SDimitry Andric   return false;
75081ad6265SDimitry Andric }
75181ad6265SDimitry Andric 
752349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
753349cc55cSDimitry Andric   if (UsesAGPRs)
754349cc55cSDimitry Andric     return *UsesAGPRs;
755349cc55cSDimitry Andric 
75681ad6265SDimitry Andric   if (!mayNeedAGPRs()) {
75781ad6265SDimitry Andric     UsesAGPRs = false;
75881ad6265SDimitry Andric     return false;
75981ad6265SDimitry Andric   }
76081ad6265SDimitry Andric 
761349cc55cSDimitry Andric   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
762349cc55cSDimitry Andric       MF.getFrameInfo().hasCalls()) {
763349cc55cSDimitry Andric     UsesAGPRs = true;
764349cc55cSDimitry Andric     return true;
765349cc55cSDimitry Andric   }
766349cc55cSDimitry Andric 
767349cc55cSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
768349cc55cSDimitry Andric 
769349cc55cSDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
770349cc55cSDimitry Andric     const Register Reg = Register::index2VirtReg(I);
771349cc55cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
772349cc55cSDimitry Andric     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
773349cc55cSDimitry Andric       UsesAGPRs = true;
774349cc55cSDimitry Andric       return true;
775349cc55cSDimitry Andric     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
776349cc55cSDimitry Andric       // Defer caching UsesAGPRs, function might not yet been regbank selected.
777349cc55cSDimitry Andric       return true;
778349cc55cSDimitry Andric     }
779349cc55cSDimitry Andric   }
780349cc55cSDimitry Andric 
781349cc55cSDimitry Andric   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
782349cc55cSDimitry Andric     if (MRI.isPhysRegUsed(Reg)) {
783349cc55cSDimitry Andric       UsesAGPRs = true;
784349cc55cSDimitry Andric       return true;
785349cc55cSDimitry Andric     }
786349cc55cSDimitry Andric   }
787349cc55cSDimitry Andric 
788349cc55cSDimitry Andric   UsesAGPRs = false;
789349cc55cSDimitry Andric   return false;
790349cc55cSDimitry Andric }
791