xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
12fe6060f1SDimitry Andric #include "SIRegisterInfo.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
15fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
21fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
22fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
23fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
24fe6060f1SDimitry Andric #include <cassert>
25*bdd1243dSDimitry Andric #include <optional>
26fe6060f1SDimitry Andric #include <vector>
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #define MAX_LANES 64
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
32*bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
33*bdd1243dSDimitry Andric   const SITargetLowering *TLI = STI->getTargetLowering();
34*bdd1243dSDimitry Andric   return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
35*bdd1243dSDimitry Andric }
36*bdd1243dSDimitry Andric 
37*bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
38*bdd1243dSDimitry Andric                                              const GCNSubtarget *STI)
39*bdd1243dSDimitry Andric   : AMDGPUMachineFunction(F, *STI),
40*bdd1243dSDimitry Andric     Mode(F),
41*bdd1243dSDimitry Andric     GWSResourcePSV(getTM(STI)),
420b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
430b57cec5SDimitry Andric     DispatchPtr(false),
440b57cec5SDimitry Andric     QueuePtr(false),
450b57cec5SDimitry Andric     KernargSegmentPtr(false),
460b57cec5SDimitry Andric     DispatchID(false),
470b57cec5SDimitry Andric     FlatScratchInit(false),
480b57cec5SDimitry Andric     WorkGroupIDX(false),
490b57cec5SDimitry Andric     WorkGroupIDY(false),
500b57cec5SDimitry Andric     WorkGroupIDZ(false),
510b57cec5SDimitry Andric     WorkGroupInfo(false),
52fcaf7f86SDimitry Andric     LDSKernelId(false),
530b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
540b57cec5SDimitry Andric     WorkItemIDX(false),
550b57cec5SDimitry Andric     WorkItemIDY(false),
560b57cec5SDimitry Andric     WorkItemIDZ(false),
570b57cec5SDimitry Andric     ImplicitBufferPtr(false),
580b57cec5SDimitry Andric     ImplicitArgPtr(false),
590b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
6081ad6265SDimitry Andric     HighBitsOf32BitAddress(0) {
61*bdd1243dSDimitry Andric   const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
620b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
630b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
640b57cec5SDimitry Andric 
655ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
660b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
670b57cec5SDimitry Andric 
685ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
695ffd83dbSDimitry Andric   // calls.
705ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
715ffd83dbSDimitry Andric 
72349cc55cSDimitry Andric   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
73349cc55cSDimitry Andric                         CC == CallingConv::SPIR_KERNEL;
745ffd83dbSDimitry Andric 
75349cc55cSDimitry Andric   if (IsKernel) {
76349cc55cSDimitry Andric     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
770b57cec5SDimitry Andric       KernargSegmentPtr = true;
780b57cec5SDimitry Andric     WorkGroupIDX = true;
790b57cec5SDimitry Andric     WorkItemIDX = true;
800b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
810b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
820b57cec5SDimitry Andric   }
830b57cec5SDimitry Andric 
8481ad6265SDimitry Andric   MayNeedAGPRs = ST.hasMAIInsts();
8581ad6265SDimitry Andric 
860b57cec5SDimitry Andric   if (!isEntryFunction()) {
870eae32dcSDimitry Andric     if (CC != CallingConv::AMDGPU_Gfx)
88fe6060f1SDimitry Andric       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
89fe6060f1SDimitry Andric 
900b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
915ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
920b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
930b57cec5SDimitry Andric 
94e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
95e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
96e8d8bef9SDimitry Andric       // required for scratch access.
97e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
98e8d8bef9SDimitry Andric 
990b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
1000b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
101e8d8bef9SDimitry Andric     }
1020b57cec5SDimitry Andric 
103349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
1040b57cec5SDimitry Andric       ImplicitArgPtr = true;
1050b57cec5SDimitry Andric   } else {
106349cc55cSDimitry Andric     ImplicitArgPtr = false;
1070b57cec5SDimitry Andric     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1080b57cec5SDimitry Andric                                MaxKernArgAlign);
10981ad6265SDimitry Andric 
11081ad6265SDimitry Andric     if (ST.hasGFX90AInsts() &&
11181ad6265SDimitry Andric         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
112*bdd1243dSDimitry Andric         !mayUseAGPRs(F))
11381ad6265SDimitry Andric       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
1140b57cec5SDimitry Andric   }
115349cc55cSDimitry Andric 
116349cc55cSDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
117349cc55cSDimitry Andric   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
118349cc55cSDimitry Andric     PrivateSegmentBuffer = true;
119349cc55cSDimitry Andric   else if (ST.isMesaGfxShader(F))
120349cc55cSDimitry Andric     ImplicitBufferPtr = true;
1210b57cec5SDimitry Andric 
1220eae32dcSDimitry Andric   if (!AMDGPU::isGraphics(CC)) {
123349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1240b57cec5SDimitry Andric       WorkGroupIDX = true;
1250b57cec5SDimitry Andric 
126349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1270b57cec5SDimitry Andric       WorkGroupIDY = true;
1280b57cec5SDimitry Andric 
129349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1300b57cec5SDimitry Andric       WorkGroupIDZ = true;
1310b57cec5SDimitry Andric 
132349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1330b57cec5SDimitry Andric       WorkItemIDX = true;
1340b57cec5SDimitry Andric 
13504eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
13604eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 1) != 0)
1370b57cec5SDimitry Andric       WorkItemIDY = true;
1380b57cec5SDimitry Andric 
13904eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
14004eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 2) != 0)
1410b57cec5SDimitry Andric       WorkItemIDZ = true;
142349cc55cSDimitry Andric 
143349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
144349cc55cSDimitry Andric       DispatchPtr = true;
145349cc55cSDimitry Andric 
146349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
147349cc55cSDimitry Andric       QueuePtr = true;
148349cc55cSDimitry Andric 
149349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
150349cc55cSDimitry Andric       DispatchID = true;
151fcaf7f86SDimitry Andric 
152fcaf7f86SDimitry Andric     if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
153fcaf7f86SDimitry Andric       LDSKernelId = true;
1545ffd83dbSDimitry Andric   }
1550b57cec5SDimitry Andric 
156349cc55cSDimitry Andric   // FIXME: This attribute is a hack, we just need an analysis on the function
157349cc55cSDimitry Andric   // to look for allocas.
1585ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
159349cc55cSDimitry Andric 
160349cc55cSDimitry Andric   // TODO: This could be refined a lot. The attribute is a poor way of
161349cc55cSDimitry Andric   // detecting calls or stack objects that may require it before argument
162349cc55cSDimitry Andric   // lowering.
163349cc55cSDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
164349cc55cSDimitry Andric       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
165349cc55cSDimitry Andric       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
166349cc55cSDimitry Andric       !ST.flatScratchIsArchitected()) {
167349cc55cSDimitry Andric     FlatScratchInit = true;
168349cc55cSDimitry Andric   }
169349cc55cSDimitry Andric 
1700b57cec5SDimitry Andric   if (isEntryFunction()) {
1710b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1720b57cec5SDimitry Andric     // enabled if Z is.
1730b57cec5SDimitry Andric     if (WorkItemIDZ)
1740b57cec5SDimitry Andric       WorkItemIDY = true;
1750b57cec5SDimitry Andric 
176fe6060f1SDimitry Andric     if (!ST.flatScratchIsArchitected()) {
1770b57cec5SDimitry Andric       PrivateSegmentWaveByteOffset = true;
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1800b57cec5SDimitry Andric       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1810b57cec5SDimitry Andric           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1820b57cec5SDimitry Andric         ArgInfo.PrivateSegmentWaveByteOffset =
1830b57cec5SDimitry Andric             ArgDescriptor::createRegister(AMDGPU::SGPR5);
1840b57cec5SDimitry Andric     }
185fe6060f1SDimitry Andric   }
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1880b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1890b57cec5SDimitry Andric   if (!S.empty())
1900b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1930b57cec5SDimitry Andric   S = A.getValueAsString();
1940b57cec5SDimitry Andric   if (!S.empty())
1950b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1960b57cec5SDimitry Andric 
19781ad6265SDimitry Andric   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
19881ad6265SDimitry Andric   // VGPR available at all times. For now, reserve highest available VGPR. After
19981ad6265SDimitry Andric   // RA, shift it to the lowest available unused VGPR if the one exist.
20081ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
20181ad6265SDimitry Andric     VGPRForAGPRCopy =
20281ad6265SDimitry Andric         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
20381ad6265SDimitry Andric   }
20481ad6265SDimitry Andric }
20581ad6265SDimitry Andric 
20681ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone(
20781ad6265SDimitry Andric     BumpPtrAllocator &Allocator, MachineFunction &DestMF,
20881ad6265SDimitry Andric     const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
20981ad6265SDimitry Andric     const {
21081ad6265SDimitry Andric   return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
2140b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
2150b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
2160b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
2170b57cec5SDimitry Andric                  MF.getFunction()));
2180b57cec5SDimitry Andric }
2190b57cec5SDimitry Andric 
2205ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
2210b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
2220b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
2230b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2248bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
2250b57cec5SDimitry Andric   NumUserSGPRs += 4;
2260b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
2270b57cec5SDimitry Andric }
2280b57cec5SDimitry Andric 
2295ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2300b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2310b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2320b57cec5SDimitry Andric   NumUserSGPRs += 2;
2330b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric 
2365ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2370b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2380b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2390b57cec5SDimitry Andric   NumUserSGPRs += 2;
2400b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric 
2435ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2440b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2450b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2460b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2470b57cec5SDimitry Andric   NumUserSGPRs += 2;
2480b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2490b57cec5SDimitry Andric }
2500b57cec5SDimitry Andric 
2515ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2520b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2530b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2540b57cec5SDimitry Andric   NumUserSGPRs += 2;
2550b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric 
2585ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2590b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2600b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2610b57cec5SDimitry Andric   NumUserSGPRs += 2;
2620b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric 
2655ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2660b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2670b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2680b57cec5SDimitry Andric   NumUserSGPRs += 2;
2690b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2700b57cec5SDimitry Andric }
2710b57cec5SDimitry Andric 
272fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() {
273fcaf7f86SDimitry Andric   ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
274fcaf7f86SDimitry Andric   NumUserSGPRs += 1;
275fcaf7f86SDimitry Andric   return ArgInfo.LDSKernelId.getRegister();
276fcaf7f86SDimitry Andric }
277fcaf7f86SDimitry Andric 
278*bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
279*bdd1243dSDimitry Andric                                              uint64_t Size, Align Alignment) {
280*bdd1243dSDimitry Andric   // Skip if it is an entry function or the register is already added.
281*bdd1243dSDimitry Andric   if (isEntryFunction() || WWMSpills.count(VGPR))
282*bdd1243dSDimitry Andric     return;
283*bdd1243dSDimitry Andric 
284*bdd1243dSDimitry Andric   WWMSpills.insert(std::make_pair(
285*bdd1243dSDimitry Andric       VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
286*bdd1243dSDimitry Andric }
287*bdd1243dSDimitry Andric 
288*bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers.
289*bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters(
290*bdd1243dSDimitry Andric     MachineFunction &MF,
291*bdd1243dSDimitry Andric     SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
292*bdd1243dSDimitry Andric     SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
293*bdd1243dSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
294*bdd1243dSDimitry Andric   for (auto &Reg : WWMSpills) {
295*bdd1243dSDimitry Andric     if (isCalleeSavedReg(CSRegs, Reg.first))
296*bdd1243dSDimitry Andric       CalleeSavedRegs.push_back(Reg);
297*bdd1243dSDimitry Andric     else
298*bdd1243dSDimitry Andric       ScratchRegs.push_back(Reg);
299*bdd1243dSDimitry Andric   }
300*bdd1243dSDimitry Andric }
301*bdd1243dSDimitry Andric 
3025ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
303*bdd1243dSDimitry Andric                                              MCPhysReg Reg) const {
3040b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
3050b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
3060b57cec5SDimitry Andric       return true;
3070b57cec5SDimitry Andric   }
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   return false;
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric 
312*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF,
313*bdd1243dSDimitry Andric                                                       int FI,
314*bdd1243dSDimitry Andric                                                       unsigned LaneIndex) {
3150b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
316*bdd1243dSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
317*bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
318*bdd1243dSDimitry Andric   Register LaneVGPR;
319*bdd1243dSDimitry Andric   if (!LaneIndex) {
320*bdd1243dSDimitry Andric     LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
321*bdd1243dSDimitry Andric     if (LaneVGPR == AMDGPU::NoRegister) {
322*bdd1243dSDimitry Andric       // We have no VGPRs left for spilling SGPRs. Reset because we will not
323*bdd1243dSDimitry Andric       // partially spill the SGPR to VGPRs.
324*bdd1243dSDimitry Andric       SGPRSpillToVGPRLanes.erase(FI);
325*bdd1243dSDimitry Andric       return false;
3260b57cec5SDimitry Andric     }
3270b57cec5SDimitry Andric 
328*bdd1243dSDimitry Andric     SpillVGPRs.push_back(LaneVGPR);
329*bdd1243dSDimitry Andric     // Add this register as live-in to all blocks to avoid machine verifier
330*bdd1243dSDimitry Andric     // complaining about use of an undefined physical register.
331*bdd1243dSDimitry Andric     for (MachineBasicBlock &BB : MF)
332*bdd1243dSDimitry Andric       BB.addLiveIn(LaneVGPR);
333*bdd1243dSDimitry Andric   } else {
334*bdd1243dSDimitry Andric     LaneVGPR = SpillVGPRs.back();
335*bdd1243dSDimitry Andric   }
336*bdd1243dSDimitry Andric 
337*bdd1243dSDimitry Andric   SGPRSpillToVGPRLanes[FI].push_back(
338*bdd1243dSDimitry Andric       SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
339*bdd1243dSDimitry Andric   return true;
340*bdd1243dSDimitry Andric }
341*bdd1243dSDimitry Andric 
342*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
343*bdd1243dSDimitry Andric     MachineFunction &MF, int FI, unsigned LaneIndex) {
344*bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
345*bdd1243dSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
346*bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
347*bdd1243dSDimitry Andric   Register LaneVGPR;
348*bdd1243dSDimitry Andric   if (!LaneIndex) {
349*bdd1243dSDimitry Andric     LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
350*bdd1243dSDimitry Andric     if (LaneVGPR == AMDGPU::NoRegister) {
351*bdd1243dSDimitry Andric       // We have no VGPRs left for spilling SGPRs. Reset because we will not
352*bdd1243dSDimitry Andric       // partially spill the SGPR to VGPRs.
353*bdd1243dSDimitry Andric       PrologEpilogSGPRSpillToVGPRLanes.erase(FI);
354*bdd1243dSDimitry Andric       return false;
355*bdd1243dSDimitry Andric     }
356*bdd1243dSDimitry Andric 
357*bdd1243dSDimitry Andric     allocateWWMSpill(MF, LaneVGPR);
358*bdd1243dSDimitry Andric   } else {
359*bdd1243dSDimitry Andric     LaneVGPR = WWMSpills.back().first;
360*bdd1243dSDimitry Andric   }
361*bdd1243dSDimitry Andric 
362*bdd1243dSDimitry Andric   PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(
363*bdd1243dSDimitry Andric       SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
364*bdd1243dSDimitry Andric   return true;
365*bdd1243dSDimitry Andric }
366*bdd1243dSDimitry Andric 
367*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
368*bdd1243dSDimitry Andric                                                         int FI,
369*bdd1243dSDimitry Andric                                                         bool IsPrologEpilog) {
370*bdd1243dSDimitry Andric   std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
371*bdd1243dSDimitry Andric       IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]
372*bdd1243dSDimitry Andric                      : SGPRSpillToVGPRLanes[FI];
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   // This has already been allocated.
3750b57cec5SDimitry Andric   if (!SpillLanes.empty())
3760b57cec5SDimitry Andric     return true;
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3790b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3800b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3835ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
3840b57cec5SDimitry Andric 
3855ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
3865ffd83dbSDimitry Andric     return false;
3875ffd83dbSDimitry Andric 
3885ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
389*bdd1243dSDimitry Andric   assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
390*bdd1243dSDimitry Andric          "not spilling SGPRs to VGPRs");
3910b57cec5SDimitry Andric 
392*bdd1243dSDimitry Andric   unsigned &NumSpillLanes =
393*bdd1243dSDimitry Andric       IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;
3940b57cec5SDimitry Andric 
395*bdd1243dSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
396*bdd1243dSDimitry Andric     unsigned LaneIndex = (NumSpillLanes % WaveSize);
397fe6060f1SDimitry Andric 
398*bdd1243dSDimitry Andric     bool Allocated =
399*bdd1243dSDimitry Andric         IsPrologEpilog
400*bdd1243dSDimitry Andric             ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)
401*bdd1243dSDimitry Andric             : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);
402*bdd1243dSDimitry Andric     if (!Allocated) {
403*bdd1243dSDimitry Andric       NumSpillLanes -= I;
4040b57cec5SDimitry Andric       return false;
4050b57cec5SDimitry Andric     }
4060b57cec5SDimitry Andric   }
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric   return true;
4090b57cec5SDimitry Andric }
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
4120b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
4130b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
4140b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
4150b57cec5SDimitry Andric                                                     int FI,
4160b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
4170b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
4180b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
4190b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   // This has already been allocated.
4260b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
4270b57cec5SDimitry Andric     return Spill.FullyAllocated;
4280b57cec5SDimitry Andric 
4290b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
4300b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
4310b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric   const TargetRegisterClass &RC =
4340b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
4350b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
4360b57cec5SDimitry Andric 
4370b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
4380b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
4390b57cec5SDimitry Andric   Spill.FullyAllocated = true;
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
4420b57cec5SDimitry Andric   // once.
4430b57cec5SDimitry Andric   BitVector OtherUsedRegs;
4440b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric   const uint32_t *CSRMask =
4470b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4480b57cec5SDimitry Andric   if (CSRMask)
4490b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
4520b57cec5SDimitry Andric   // usage.
4530b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4540b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4550b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4560b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
459349cc55cSDimitry Andric   for (int I = NumLanes - 1; I >= 0; --I) {
4600b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4610b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4620b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4630b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4640b57cec5SDimitry Andric         });
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4670b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4680b57cec5SDimitry Andric       break;
4690b57cec5SDimitry Andric     }
4700b57cec5SDimitry Andric 
4710b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4720b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
473*bdd1243dSDimitry Andric     MRI.reserveReg(*NextSpillReg, TRI);
4740b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4750b57cec5SDimitry Andric   }
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric   return Spill.FullyAllocated;
4780b57cec5SDimitry Andric }
4790b57cec5SDimitry Andric 
48081ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices(
48181ad6265SDimitry Andric     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
482*bdd1243dSDimitry Andric   // Remove dead frame indices from function frame. And also make sure to remove
483*bdd1243dSDimitry Andric   // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it
484*bdd1243dSDimitry Andric   // could result in an unexpected side effect and bug, in case of any
485*bdd1243dSDimitry Andric   // re-mapping of freed frame indices by later pass(es) like "stack slot
486*bdd1243dSDimitry Andric   // coloring".
487*bdd1243dSDimitry Andric   for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) {
4880b57cec5SDimitry Andric     MFI.RemoveStackObject(R.first);
489*bdd1243dSDimitry Andric     SGPRSpillToVGPRLanes.erase(R.first);
4900b57cec5SDimitry Andric   }
4910b57cec5SDimitry Andric 
49281ad6265SDimitry Andric   bool HaveSGPRToMemory = false;
49381ad6265SDimitry Andric 
49481ad6265SDimitry Andric   if (ResetSGPRSpillStackIDs) {
495*bdd1243dSDimitry Andric     // All other SGPRs must be allocated on the default stack, so reset the
49681ad6265SDimitry Andric     // stack ID.
497*bdd1243dSDimitry Andric     for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
498*bdd1243dSDimitry Andric          ++I) {
499*bdd1243dSDimitry Andric       if (!checkIndexInPrologEpilogSGPRSpills(I)) {
500*bdd1243dSDimitry Andric         if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {
501*bdd1243dSDimitry Andric           MFI.setStackID(I, TargetStackID::Default);
50281ad6265SDimitry Andric           HaveSGPRToMemory = true;
50381ad6265SDimitry Andric         }
50481ad6265SDimitry Andric       }
50581ad6265SDimitry Andric     }
50681ad6265SDimitry Andric   }
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
5090eae32dcSDimitry Andric     if (R.second.IsDead)
5100b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
5110b57cec5SDimitry Andric   }
51281ad6265SDimitry Andric 
51381ad6265SDimitry Andric   return HaveSGPRToMemory;
51481ad6265SDimitry Andric }
51581ad6265SDimitry Andric 
516fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
517fe6060f1SDimitry Andric                                          const SIRegisterInfo &TRI) {
518fe6060f1SDimitry Andric   if (ScavengeFI)
519fe6060f1SDimitry Andric     return *ScavengeFI;
520fe6060f1SDimitry Andric   if (isEntryFunction()) {
521fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateFixedObject(
522fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
523fe6060f1SDimitry Andric   } else {
524fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateStackObject(
525fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
526fe6060f1SDimitry Andric         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
527fe6060f1SDimitry Andric   }
528fe6060f1SDimitry Andric   return *ScavengeFI;
529fe6060f1SDimitry Andric }
530fe6060f1SDimitry Andric 
5310b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
5320b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
5330b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
5340b57cec5SDimitry Andric }
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
5370b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
5380b57cec5SDimitry Andric }
5390b57cec5SDimitry Andric 
5405ffd83dbSDimitry Andric Register
5415ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
5425ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5435ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
5445ffd83dbSDimitry Andric     return Register();
5455ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5465ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
5475ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
5485ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
5495ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
5505ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5515ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
5525ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
5535ffd83dbSDimitry Andric       return GitPtrLo;
5545ffd83dbSDimitry Andric     default:
5555ffd83dbSDimitry Andric       return GitPtrLo;
5565ffd83dbSDimitry Andric     }
5575ffd83dbSDimitry Andric   }
5585ffd83dbSDimitry Andric   return GitPtrLo;
5595ffd83dbSDimitry Andric }
5605ffd83dbSDimitry Andric 
5615ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
5620b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
5630b57cec5SDimitry Andric   yaml::StringValue Dest;
5640b57cec5SDimitry Andric   {
5650b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
5660b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
5670b57cec5SDimitry Andric   }
5680b57cec5SDimitry Andric   return Dest;
5690b57cec5SDimitry Andric }
5700b57cec5SDimitry Andric 
571*bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo>
5720b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
5730b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
5740b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
5750b57cec5SDimitry Andric 
576*bdd1243dSDimitry Andric   auto convertArg = [&](std::optional<yaml::SIArgument> &A,
5770b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
5780b57cec5SDimitry Andric     if (!Arg)
5790b57cec5SDimitry Andric       return false;
5800b57cec5SDimitry Andric 
5810b57cec5SDimitry Andric     // Create a register or stack argument.
5820b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5830b57cec5SDimitry Andric     if (Arg.isRegister()) {
5840b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5850b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
5860b57cec5SDimitry Andric     } else
5870b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
5880b57cec5SDimitry Andric     // Check and update the optional mask.
5890b57cec5SDimitry Andric     if (Arg.isMasked())
5900b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric     A = SA;
5930b57cec5SDimitry Andric     return true;
5940b57cec5SDimitry Andric   };
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric   bool Any = false;
5970b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
5980b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
5990b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
6000b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
6010b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
6020b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
603fcaf7f86SDimitry Andric   Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
6040b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
6050b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
6060b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
6070b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
6080b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
6090b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
6100b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
6110b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
6120b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
6130b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
6140b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
6150b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric   if (Any)
6180b57cec5SDimitry Andric     return AI;
6190b57cec5SDimitry Andric 
620*bdd1243dSDimitry Andric   return std::nullopt;
6210b57cec5SDimitry Andric }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
624fe6060f1SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
625fe6060f1SDimitry Andric     const llvm::MachineFunction &MF)
6260b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
627e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
62881ad6265SDimitry Andric       GDSSize(MFI.getGDSSize()),
629e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
6300b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
631e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
632e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
633e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
6348bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
635e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
6360b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
6370b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
6380b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
63981ad6265SDimitry Andric       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
64081ad6265SDimitry Andric       ReturnsVoid(MFI.returnsVoid()),
641e8d8bef9SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
642*bdd1243dSDimitry Andric   for (Register Reg : MFI.getWWMReservedRegs())
64381ad6265SDimitry Andric     WWMReservedRegs.push_back(regToString(Reg, TRI));
64481ad6265SDimitry Andric 
64581ad6265SDimitry Andric   if (MFI.getVGPRForAGPRCopy())
64681ad6265SDimitry Andric     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
647fe6060f1SDimitry Andric   auto SFI = MFI.getOptionalScavengeFI();
648fe6060f1SDimitry Andric   if (SFI)
649fe6060f1SDimitry Andric     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
650e8d8bef9SDimitry Andric }
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
6530b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
6540b57cec5SDimitry Andric }
6550b57cec5SDimitry Andric 
6560b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
657fe6060f1SDimitry Andric     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
658fe6060f1SDimitry Andric     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
6590b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
66081ad6265SDimitry Andric   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
6610b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
66281ad6265SDimitry Andric   GDSSize = YamlMFI.GDSSize;
663e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
6648bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
665e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
6660b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
6670b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
6680b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
6690b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
670e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
671e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
67281ad6265SDimitry Andric   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
67381ad6265SDimitry Andric   ReturnsVoid = YamlMFI.ReturnsVoid;
674fe6060f1SDimitry Andric 
675fe6060f1SDimitry Andric   if (YamlMFI.ScavengeFI) {
676fe6060f1SDimitry Andric     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
677fe6060f1SDimitry Andric     if (!FIOrErr) {
678fe6060f1SDimitry Andric       // Create a diagnostic for a the frame index.
679fe6060f1SDimitry Andric       const MemoryBuffer &Buffer =
680fe6060f1SDimitry Andric           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
681fe6060f1SDimitry Andric 
682fe6060f1SDimitry Andric       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
683fe6060f1SDimitry Andric                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
684*bdd1243dSDimitry Andric                            "", std::nullopt, std::nullopt);
685fe6060f1SDimitry Andric       SourceRange = YamlMFI.ScavengeFI->SourceRange;
686fe6060f1SDimitry Andric       return true;
687fe6060f1SDimitry Andric     }
688fe6060f1SDimitry Andric     ScavengeFI = *FIOrErr;
689fe6060f1SDimitry Andric   } else {
690*bdd1243dSDimitry Andric     ScavengeFI = std::nullopt;
691fe6060f1SDimitry Andric   }
6920b57cec5SDimitry Andric   return false;
6930b57cec5SDimitry Andric }
6945ffd83dbSDimitry Andric 
695*bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
696*bdd1243dSDimitry Andric   for (const BasicBlock &BB : F) {
69781ad6265SDimitry Andric     for (const Instruction &I : BB) {
69881ad6265SDimitry Andric       const auto *CB = dyn_cast<CallBase>(&I);
69981ad6265SDimitry Andric       if (!CB)
70081ad6265SDimitry Andric         continue;
70181ad6265SDimitry Andric 
70281ad6265SDimitry Andric       if (CB->isInlineAsm()) {
70381ad6265SDimitry Andric         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
70481ad6265SDimitry Andric         for (const auto &CI : IA->ParseConstraints()) {
70581ad6265SDimitry Andric           for (StringRef Code : CI.Codes) {
70681ad6265SDimitry Andric             Code.consume_front("{");
70781ad6265SDimitry Andric             if (Code.startswith("a"))
70881ad6265SDimitry Andric               return true;
70981ad6265SDimitry Andric           }
71081ad6265SDimitry Andric         }
71181ad6265SDimitry Andric         continue;
71281ad6265SDimitry Andric       }
71381ad6265SDimitry Andric 
71481ad6265SDimitry Andric       const Function *Callee =
71581ad6265SDimitry Andric           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
71681ad6265SDimitry Andric       if (!Callee)
71781ad6265SDimitry Andric         return true;
71881ad6265SDimitry Andric 
71981ad6265SDimitry Andric       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
72081ad6265SDimitry Andric         return true;
72181ad6265SDimitry Andric     }
72281ad6265SDimitry Andric   }
72381ad6265SDimitry Andric 
72481ad6265SDimitry Andric   return false;
72581ad6265SDimitry Andric }
72681ad6265SDimitry Andric 
727349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
728349cc55cSDimitry Andric   if (UsesAGPRs)
729349cc55cSDimitry Andric     return *UsesAGPRs;
730349cc55cSDimitry Andric 
73181ad6265SDimitry Andric   if (!mayNeedAGPRs()) {
73281ad6265SDimitry Andric     UsesAGPRs = false;
73381ad6265SDimitry Andric     return false;
73481ad6265SDimitry Andric   }
73581ad6265SDimitry Andric 
736349cc55cSDimitry Andric   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
737349cc55cSDimitry Andric       MF.getFrameInfo().hasCalls()) {
738349cc55cSDimitry Andric     UsesAGPRs = true;
739349cc55cSDimitry Andric     return true;
740349cc55cSDimitry Andric   }
741349cc55cSDimitry Andric 
742349cc55cSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
743349cc55cSDimitry Andric 
744349cc55cSDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
745349cc55cSDimitry Andric     const Register Reg = Register::index2VirtReg(I);
746349cc55cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
747349cc55cSDimitry Andric     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
748349cc55cSDimitry Andric       UsesAGPRs = true;
749349cc55cSDimitry Andric       return true;
750349cc55cSDimitry Andric     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
751349cc55cSDimitry Andric       // Defer caching UsesAGPRs, function might not yet been regbank selected.
752349cc55cSDimitry Andric       return true;
753349cc55cSDimitry Andric     }
754349cc55cSDimitry Andric   }
755349cc55cSDimitry Andric 
756349cc55cSDimitry Andric   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
757349cc55cSDimitry Andric     if (MRI.isPhysRegUsed(Reg)) {
758349cc55cSDimitry Andric       UsesAGPRs = true;
759349cc55cSDimitry Andric       return true;
760349cc55cSDimitry Andric     }
761349cc55cSDimitry Andric   }
762349cc55cSDimitry Andric 
763349cc55cSDimitry Andric   UsesAGPRs = false;
764349cc55cSDimitry Andric   return false;
765349cc55cSDimitry Andric }
766