xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
10fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
11*5f757f3fSDimitry Andric #include "AMDGPUTargetMachine.h"
12*5f757f3fSDimitry Andric #include "GCNSubtarget.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14*5f757f3fSDimitry Andric #include "SIRegisterInfo.h"
15fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
17*5f757f3fSDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
25fe6060f1SDimitry Andric #include <cassert>
26bdd1243dSDimitry Andric #include <optional>
27fe6060f1SDimitry Andric #include <vector>
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric #define MAX_LANES 64
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric using namespace llvm;
320b57cec5SDimitry Andric 
33bdd1243dSDimitry Andric const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
34bdd1243dSDimitry Andric   const SITargetLowering *TLI = STI->getTargetLowering();
35bdd1243dSDimitry Andric   return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
36bdd1243dSDimitry Andric }
37bdd1243dSDimitry Andric 
38bdd1243dSDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
39bdd1243dSDimitry Andric                                              const GCNSubtarget *STI)
40*5f757f3fSDimitry Andric     : AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
41*5f757f3fSDimitry Andric       UserSGPRInfo(F, *STI), WorkGroupIDX(false), WorkGroupIDY(false),
42*5f757f3fSDimitry Andric       WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
43*5f757f3fSDimitry Andric       PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
44*5f757f3fSDimitry Andric       WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
45*5f757f3fSDimitry Andric       GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
46bdd1243dSDimitry Andric   const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
470b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
480b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
490b57cec5SDimitry Andric 
505ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
510b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
520b57cec5SDimitry Andric 
5306c3fb27SDimitry Andric   VRegFlags.reserve(1024);
5406c3fb27SDimitry Andric 
55349cc55cSDimitry Andric   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
56349cc55cSDimitry Andric                         CC == CallingConv::SPIR_KERNEL;
575ffd83dbSDimitry Andric 
58349cc55cSDimitry Andric   if (IsKernel) {
590b57cec5SDimitry Andric     WorkGroupIDX = true;
600b57cec5SDimitry Andric     WorkItemIDX = true;
610b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
620b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
630b57cec5SDimitry Andric   }
640b57cec5SDimitry Andric 
6581ad6265SDimitry Andric   MayNeedAGPRs = ST.hasMAIInsts();
6681ad6265SDimitry Andric 
67*5f757f3fSDimitry Andric   if (AMDGPU::isChainCC(CC)) {
68*5f757f3fSDimitry Andric     // Chain functions don't receive an SP from their caller, but are free to
69*5f757f3fSDimitry Andric     // set one up. For now, we can use s32 to match what amdgpu_gfx functions
70*5f757f3fSDimitry Andric     // would use if called, but this can be revisited.
71*5f757f3fSDimitry Andric     // FIXME: Only reserve this if we actually need it.
72*5f757f3fSDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
73*5f757f3fSDimitry Andric 
74*5f757f3fSDimitry Andric     ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
75*5f757f3fSDimitry Andric 
76*5f757f3fSDimitry Andric     ArgInfo.PrivateSegmentBuffer =
77*5f757f3fSDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
78*5f757f3fSDimitry Andric 
79*5f757f3fSDimitry Andric     ImplicitArgPtr = false;
80*5f757f3fSDimitry Andric   } else if (!isEntryFunction()) {
810eae32dcSDimitry Andric     if (CC != CallingConv::AMDGPU_Gfx)
82fe6060f1SDimitry Andric       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
83fe6060f1SDimitry Andric 
840b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
855ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
860b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
870b57cec5SDimitry Andric 
88e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
89e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
90e8d8bef9SDimitry Andric       // required for scratch access.
91e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
92e8d8bef9SDimitry Andric 
930b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
940b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
95e8d8bef9SDimitry Andric     }
960b57cec5SDimitry Andric 
97349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
980b57cec5SDimitry Andric       ImplicitArgPtr = true;
990b57cec5SDimitry Andric   } else {
100349cc55cSDimitry Andric     ImplicitArgPtr = false;
1010b57cec5SDimitry Andric     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1020b57cec5SDimitry Andric                                MaxKernArgAlign);
10381ad6265SDimitry Andric 
10481ad6265SDimitry Andric     if (ST.hasGFX90AInsts() &&
10581ad6265SDimitry Andric         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
106bdd1243dSDimitry Andric         !mayUseAGPRs(F))
10781ad6265SDimitry Andric       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
1080b57cec5SDimitry Andric   }
109349cc55cSDimitry Andric 
11006c3fb27SDimitry Andric   if (!AMDGPU::isGraphics(CC) ||
11106c3fb27SDimitry Andric       (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
112349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1130b57cec5SDimitry Andric       WorkGroupIDX = true;
1140b57cec5SDimitry Andric 
115349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1160b57cec5SDimitry Andric       WorkGroupIDY = true;
1170b57cec5SDimitry Andric 
118349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1190b57cec5SDimitry Andric       WorkGroupIDZ = true;
12006c3fb27SDimitry Andric   }
1210b57cec5SDimitry Andric 
12206c3fb27SDimitry Andric   if (!AMDGPU::isGraphics(CC)) {
123349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1240b57cec5SDimitry Andric       WorkItemIDX = true;
1250b57cec5SDimitry Andric 
12604eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
12704eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 1) != 0)
1280b57cec5SDimitry Andric       WorkItemIDY = true;
1290b57cec5SDimitry Andric 
13004eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
13104eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 2) != 0)
1320b57cec5SDimitry Andric       WorkItemIDZ = true;
133349cc55cSDimitry Andric 
134fcaf7f86SDimitry Andric     if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
135fcaf7f86SDimitry Andric       LDSKernelId = true;
1365ffd83dbSDimitry Andric   }
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   if (isEntryFunction()) {
1390b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1400b57cec5SDimitry Andric     // enabled if Z is.
1410b57cec5SDimitry Andric     if (WorkItemIDZ)
1420b57cec5SDimitry Andric       WorkItemIDY = true;
1430b57cec5SDimitry Andric 
144fe6060f1SDimitry Andric     if (!ST.flatScratchIsArchitected()) {
1450b57cec5SDimitry Andric       PrivateSegmentWaveByteOffset = true;
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1480b57cec5SDimitry Andric       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1490b57cec5SDimitry Andric           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1500b57cec5SDimitry Andric         ArgInfo.PrivateSegmentWaveByteOffset =
1510b57cec5SDimitry Andric             ArgDescriptor::createRegister(AMDGPU::SGPR5);
1520b57cec5SDimitry Andric     }
153fe6060f1SDimitry Andric   }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1560b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1570b57cec5SDimitry Andric   if (!S.empty())
1580b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1610b57cec5SDimitry Andric   S = A.getValueAsString();
1620b57cec5SDimitry Andric   if (!S.empty())
1630b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1640b57cec5SDimitry Andric 
16581ad6265SDimitry Andric   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
16681ad6265SDimitry Andric   // VGPR available at all times. For now, reserve highest available VGPR. After
16781ad6265SDimitry Andric   // RA, shift it to the lowest available unused VGPR if the one exist.
16881ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
16981ad6265SDimitry Andric     VGPRForAGPRCopy =
17081ad6265SDimitry Andric         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
17181ad6265SDimitry Andric   }
17281ad6265SDimitry Andric }
17381ad6265SDimitry Andric 
17481ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone(
17581ad6265SDimitry Andric     BumpPtrAllocator &Allocator, MachineFunction &DestMF,
17681ad6265SDimitry Andric     const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
17781ad6265SDimitry Andric     const {
17881ad6265SDimitry Andric   return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
1820b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
1830b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
1840b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
1850b57cec5SDimitry Andric                  MF.getFunction()));
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric 
1885ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
1890b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
1900b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
1910b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
1928bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
1930b57cec5SDimitry Andric   NumUserSGPRs += 4;
1940b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric 
1975ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
1980b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
1990b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2000b57cec5SDimitry Andric   NumUserSGPRs += 2;
2010b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric 
2045ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2050b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2060b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2070b57cec5SDimitry Andric   NumUserSGPRs += 2;
2080b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
2115ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2120b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2130b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2140b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2150b57cec5SDimitry Andric   NumUserSGPRs += 2;
2160b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric 
2195ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2200b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2210b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2220b57cec5SDimitry Andric   NumUserSGPRs += 2;
2230b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric 
2265ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2270b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2280b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2290b57cec5SDimitry Andric   NumUserSGPRs += 2;
2300b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric 
2335ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2340b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2350b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2360b57cec5SDimitry Andric   NumUserSGPRs += 2;
2370b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric 
240fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() {
241fcaf7f86SDimitry Andric   ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
242fcaf7f86SDimitry Andric   NumUserSGPRs += 1;
243fcaf7f86SDimitry Andric   return ArgInfo.LDSKernelId.getRegister();
244fcaf7f86SDimitry Andric }
245fcaf7f86SDimitry Andric 
246*5f757f3fSDimitry Andric SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
247*5f757f3fSDimitry Andric     const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
248*5f757f3fSDimitry Andric     unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
249*5f757f3fSDimitry Andric   assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&
250*5f757f3fSDimitry Andric          "Preload kernel argument allocated twice.");
251*5f757f3fSDimitry Andric   NumUserSGPRs += PaddingSGPRs;
252*5f757f3fSDimitry Andric   // If the available register tuples are aligned with the kernarg to be
253*5f757f3fSDimitry Andric   // preloaded use that register, otherwise we need to use a set of SGPRs and
254*5f757f3fSDimitry Andric   // merge them.
255*5f757f3fSDimitry Andric   Register PreloadReg =
256*5f757f3fSDimitry Andric       TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
257*5f757f3fSDimitry Andric   if (PreloadReg &&
258*5f757f3fSDimitry Andric       (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
259*5f757f3fSDimitry Andric     ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
260*5f757f3fSDimitry Andric     NumUserSGPRs += AllocSizeDWord;
261*5f757f3fSDimitry Andric   } else {
262*5f757f3fSDimitry Andric     for (unsigned I = 0; I < AllocSizeDWord; ++I) {
263*5f757f3fSDimitry Andric       ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
264*5f757f3fSDimitry Andric       NumUserSGPRs++;
265*5f757f3fSDimitry Andric     }
266*5f757f3fSDimitry Andric   }
267*5f757f3fSDimitry Andric 
268*5f757f3fSDimitry Andric   // Track the actual number of SGPRs that HW will preload to.
269*5f757f3fSDimitry Andric   UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
270*5f757f3fSDimitry Andric   return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
271*5f757f3fSDimitry Andric }
272*5f757f3fSDimitry Andric 
273bdd1243dSDimitry Andric void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
274bdd1243dSDimitry Andric                                              uint64_t Size, Align Alignment) {
275bdd1243dSDimitry Andric   // Skip if it is an entry function or the register is already added.
276bdd1243dSDimitry Andric   if (isEntryFunction() || WWMSpills.count(VGPR))
277bdd1243dSDimitry Andric     return;
278bdd1243dSDimitry Andric 
279*5f757f3fSDimitry Andric   // Skip if this is a function with the amdgpu_cs_chain or
280*5f757f3fSDimitry Andric   // amdgpu_cs_chain_preserve calling convention and this is a scratch register.
281*5f757f3fSDimitry Andric   // We never need to allocate a spill for these because we don't even need to
282*5f757f3fSDimitry Andric   // restore the inactive lanes for them (they're scratchier than the usual
283*5f757f3fSDimitry Andric   // scratch registers).
284*5f757f3fSDimitry Andric   if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR))
285*5f757f3fSDimitry Andric     return;
286*5f757f3fSDimitry Andric 
287bdd1243dSDimitry Andric   WWMSpills.insert(std::make_pair(
288bdd1243dSDimitry Andric       VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
289bdd1243dSDimitry Andric }
290bdd1243dSDimitry Andric 
291bdd1243dSDimitry Andric // Separate out the callee-saved and scratch registers.
292bdd1243dSDimitry Andric void SIMachineFunctionInfo::splitWWMSpillRegisters(
293bdd1243dSDimitry Andric     MachineFunction &MF,
294bdd1243dSDimitry Andric     SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
295bdd1243dSDimitry Andric     SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
296bdd1243dSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
297bdd1243dSDimitry Andric   for (auto &Reg : WWMSpills) {
298bdd1243dSDimitry Andric     if (isCalleeSavedReg(CSRegs, Reg.first))
299bdd1243dSDimitry Andric       CalleeSavedRegs.push_back(Reg);
300bdd1243dSDimitry Andric     else
301bdd1243dSDimitry Andric       ScratchRegs.push_back(Reg);
302bdd1243dSDimitry Andric   }
303bdd1243dSDimitry Andric }
304bdd1243dSDimitry Andric 
3055ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
306bdd1243dSDimitry Andric                                              MCPhysReg Reg) const {
3070b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
3080b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
3090b57cec5SDimitry Andric       return true;
3100b57cec5SDimitry Andric   }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   return false;
3130b57cec5SDimitry Andric }
3140b57cec5SDimitry Andric 
315*5f757f3fSDimitry Andric bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
316*5f757f3fSDimitry Andric     MachineFunction &MF, int FI, unsigned LaneIndex) {
317bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
318bdd1243dSDimitry Andric   Register LaneVGPR;
319bdd1243dSDimitry Andric   if (!LaneIndex) {
320*5f757f3fSDimitry Andric     LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
321bdd1243dSDimitry Andric     SpillVGPRs.push_back(LaneVGPR);
322bdd1243dSDimitry Andric   } else {
323bdd1243dSDimitry Andric     LaneVGPR = SpillVGPRs.back();
324bdd1243dSDimitry Andric   }
325bdd1243dSDimitry Andric 
326*5f757f3fSDimitry Andric   SGPRSpillsToVirtualVGPRLanes[FI].push_back(
327bdd1243dSDimitry Andric       SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
328bdd1243dSDimitry Andric   return true;
329bdd1243dSDimitry Andric }
330bdd1243dSDimitry Andric 
331*5f757f3fSDimitry Andric bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
332bdd1243dSDimitry Andric     MachineFunction &MF, int FI, unsigned LaneIndex) {
333bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
334bdd1243dSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
335bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
336bdd1243dSDimitry Andric   Register LaneVGPR;
337bdd1243dSDimitry Andric   if (!LaneIndex) {
338bdd1243dSDimitry Andric     LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
339bdd1243dSDimitry Andric     if (LaneVGPR == AMDGPU::NoRegister) {
340bdd1243dSDimitry Andric       // We have no VGPRs left for spilling SGPRs. Reset because we will not
341bdd1243dSDimitry Andric       // partially spill the SGPR to VGPRs.
342*5f757f3fSDimitry Andric       SGPRSpillsToPhysicalVGPRLanes.erase(FI);
343bdd1243dSDimitry Andric       return false;
344bdd1243dSDimitry Andric     }
345bdd1243dSDimitry Andric 
346bdd1243dSDimitry Andric     allocateWWMSpill(MF, LaneVGPR);
347*5f757f3fSDimitry Andric     reserveWWMRegister(LaneVGPR);
348*5f757f3fSDimitry Andric     for (MachineBasicBlock &MBB : MF) {
349*5f757f3fSDimitry Andric       MBB.addLiveIn(LaneVGPR);
350*5f757f3fSDimitry Andric       MBB.sortUniqueLiveIns();
351*5f757f3fSDimitry Andric     }
352*5f757f3fSDimitry Andric     SpillPhysVGPRs.push_back(LaneVGPR);
353bdd1243dSDimitry Andric   } else {
354*5f757f3fSDimitry Andric     LaneVGPR = SpillPhysVGPRs.back();
355bdd1243dSDimitry Andric   }
356bdd1243dSDimitry Andric 
357*5f757f3fSDimitry Andric   SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
358bdd1243dSDimitry Andric       SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
359bdd1243dSDimitry Andric   return true;
360bdd1243dSDimitry Andric }
361bdd1243dSDimitry Andric 
362bdd1243dSDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
363bdd1243dSDimitry Andric                                                         int FI,
364bdd1243dSDimitry Andric                                                         bool IsPrologEpilog) {
365bdd1243dSDimitry Andric   std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
366*5f757f3fSDimitry Andric       IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI]
367*5f757f3fSDimitry Andric                      : SGPRSpillsToVirtualVGPRLanes[FI];
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   // This has already been allocated.
3700b57cec5SDimitry Andric   if (!SpillLanes.empty())
3710b57cec5SDimitry Andric     return true;
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3740b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3750b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3785ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
3790b57cec5SDimitry Andric 
3805ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
3815ffd83dbSDimitry Andric     return false;
3825ffd83dbSDimitry Andric 
3835ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
384bdd1243dSDimitry Andric   assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
385bdd1243dSDimitry Andric          "not spilling SGPRs to VGPRs");
3860b57cec5SDimitry Andric 
387bdd1243dSDimitry Andric   unsigned &NumSpillLanes =
388*5f757f3fSDimitry Andric       IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes;
3890b57cec5SDimitry Andric 
390bdd1243dSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
391bdd1243dSDimitry Andric     unsigned LaneIndex = (NumSpillLanes % WaveSize);
392fe6060f1SDimitry Andric 
393*5f757f3fSDimitry Andric     bool Allocated = IsPrologEpilog
394*5f757f3fSDimitry Andric                          ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex)
395*5f757f3fSDimitry Andric                          : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
396bdd1243dSDimitry Andric     if (!Allocated) {
397bdd1243dSDimitry Andric       NumSpillLanes -= I;
3980b57cec5SDimitry Andric       return false;
3990b57cec5SDimitry Andric     }
4000b57cec5SDimitry Andric   }
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   return true;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
4060b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
4070b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
4080b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
4090b57cec5SDimitry Andric                                                     int FI,
4100b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
4110b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
4120b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
4130b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric   // This has already been allocated.
4200b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
4210b57cec5SDimitry Andric     return Spill.FullyAllocated;
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
4240b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
4250b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   const TargetRegisterClass &RC =
4280b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
4290b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
4320b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
4330b57cec5SDimitry Andric   Spill.FullyAllocated = true;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
4360b57cec5SDimitry Andric   // once.
4370b57cec5SDimitry Andric   BitVector OtherUsedRegs;
4380b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric   const uint32_t *CSRMask =
4410b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4420b57cec5SDimitry Andric   if (CSRMask)
4430b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
4460b57cec5SDimitry Andric   // usage.
4470b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4480b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4490b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4500b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
453349cc55cSDimitry Andric   for (int I = NumLanes - 1; I >= 0; --I) {
4540b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4550b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4560b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4570b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4580b57cec5SDimitry Andric         });
4590b57cec5SDimitry Andric 
4600b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4610b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4620b57cec5SDimitry Andric       break;
4630b57cec5SDimitry Andric     }
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4660b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
467bdd1243dSDimitry Andric     MRI.reserveReg(*NextSpillReg, TRI);
4680b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4690b57cec5SDimitry Andric   }
4700b57cec5SDimitry Andric 
4710b57cec5SDimitry Andric   return Spill.FullyAllocated;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
47481ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices(
47581ad6265SDimitry Andric     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
476*5f757f3fSDimitry Andric   // Remove dead frame indices from function frame, however keep FP & BP since
477*5f757f3fSDimitry Andric   // spills for them haven't been inserted yet. And also make sure to remove the
478*5f757f3fSDimitry Andric   // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
479*5f757f3fSDimitry Andric   // otherwise, it could result in an unexpected side effect and bug, in case of
480*5f757f3fSDimitry Andric   // any re-mapping of freed frame indices by later pass(es) like "stack slot
481bdd1243dSDimitry Andric   // coloring".
482*5f757f3fSDimitry Andric   for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {
4830b57cec5SDimitry Andric     MFI.RemoveStackObject(R.first);
484*5f757f3fSDimitry Andric     SGPRSpillsToVirtualVGPRLanes.erase(R.first);
4850b57cec5SDimitry Andric   }
4860b57cec5SDimitry Andric 
487*5f757f3fSDimitry Andric   // Remove the dead frame indices of CSR SGPRs which are spilled to physical
488*5f757f3fSDimitry Andric   // VGPR lanes during SILowerSGPRSpills pass.
489*5f757f3fSDimitry Andric   if (!ResetSGPRSpillStackIDs) {
490*5f757f3fSDimitry Andric     for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {
491*5f757f3fSDimitry Andric       MFI.RemoveStackObject(R.first);
492*5f757f3fSDimitry Andric       SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
493*5f757f3fSDimitry Andric     }
494*5f757f3fSDimitry Andric   }
49581ad6265SDimitry Andric   bool HaveSGPRToMemory = false;
49681ad6265SDimitry Andric 
49781ad6265SDimitry Andric   if (ResetSGPRSpillStackIDs) {
498bdd1243dSDimitry Andric     // All other SGPRs must be allocated on the default stack, so reset the
49981ad6265SDimitry Andric     // stack ID.
500bdd1243dSDimitry Andric     for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
501bdd1243dSDimitry Andric          ++I) {
502bdd1243dSDimitry Andric       if (!checkIndexInPrologEpilogSGPRSpills(I)) {
503bdd1243dSDimitry Andric         if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {
504bdd1243dSDimitry Andric           MFI.setStackID(I, TargetStackID::Default);
50581ad6265SDimitry Andric           HaveSGPRToMemory = true;
50681ad6265SDimitry Andric         }
50781ad6265SDimitry Andric       }
50881ad6265SDimitry Andric     }
50981ad6265SDimitry Andric   }
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
5120eae32dcSDimitry Andric     if (R.second.IsDead)
5130b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
5140b57cec5SDimitry Andric   }
51581ad6265SDimitry Andric 
51681ad6265SDimitry Andric   return HaveSGPRToMemory;
51781ad6265SDimitry Andric }
51881ad6265SDimitry Andric 
519fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
520fe6060f1SDimitry Andric                                          const SIRegisterInfo &TRI) {
521fe6060f1SDimitry Andric   if (ScavengeFI)
522fe6060f1SDimitry Andric     return *ScavengeFI;
523*5f757f3fSDimitry Andric   if (isBottomOfStack()) {
524fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateFixedObject(
525fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
526fe6060f1SDimitry Andric   } else {
527fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateStackObject(
528fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
529fe6060f1SDimitry Andric         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
530fe6060f1SDimitry Andric   }
531fe6060f1SDimitry Andric   return *ScavengeFI;
532fe6060f1SDimitry Andric }
533fe6060f1SDimitry Andric 
5340b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
5350b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
5360b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
5370b57cec5SDimitry Andric }
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
5400b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
5410b57cec5SDimitry Andric }
5420b57cec5SDimitry Andric 
54306c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
54406c3fb27SDimitry Andric   VRegFlags.grow(Reg);
54506c3fb27SDimitry Andric }
54606c3fb27SDimitry Andric 
54706c3fb27SDimitry Andric void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
54806c3fb27SDimitry Andric                                                          Register SrcReg) {
54906c3fb27SDimitry Andric   VRegFlags.grow(NewReg);
55006c3fb27SDimitry Andric   VRegFlags[NewReg] = VRegFlags[SrcReg];
55106c3fb27SDimitry Andric }
55206c3fb27SDimitry Andric 
5535ffd83dbSDimitry Andric Register
5545ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
5555ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5565ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
5575ffd83dbSDimitry Andric     return Register();
5585ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5595ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
5605ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
5615ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
5625ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
5635ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5645ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
5655ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
5665ffd83dbSDimitry Andric       return GitPtrLo;
5675ffd83dbSDimitry Andric     default:
5685ffd83dbSDimitry Andric       return GitPtrLo;
5695ffd83dbSDimitry Andric     }
5705ffd83dbSDimitry Andric   }
5715ffd83dbSDimitry Andric   return GitPtrLo;
5725ffd83dbSDimitry Andric }
5735ffd83dbSDimitry Andric 
5745ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
5750b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
5760b57cec5SDimitry Andric   yaml::StringValue Dest;
5770b57cec5SDimitry Andric   {
5780b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
5790b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
5800b57cec5SDimitry Andric   }
5810b57cec5SDimitry Andric   return Dest;
5820b57cec5SDimitry Andric }
5830b57cec5SDimitry Andric 
584bdd1243dSDimitry Andric static std::optional<yaml::SIArgumentInfo>
5850b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
5860b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
5870b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
5880b57cec5SDimitry Andric 
589bdd1243dSDimitry Andric   auto convertArg = [&](std::optional<yaml::SIArgument> &A,
5900b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
5910b57cec5SDimitry Andric     if (!Arg)
5920b57cec5SDimitry Andric       return false;
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric     // Create a register or stack argument.
5950b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5960b57cec5SDimitry Andric     if (Arg.isRegister()) {
5970b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5980b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
5990b57cec5SDimitry Andric     } else
6000b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
6010b57cec5SDimitry Andric     // Check and update the optional mask.
6020b57cec5SDimitry Andric     if (Arg.isMasked())
6030b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
6040b57cec5SDimitry Andric 
6050b57cec5SDimitry Andric     A = SA;
6060b57cec5SDimitry Andric     return true;
6070b57cec5SDimitry Andric   };
6080b57cec5SDimitry Andric 
609*5f757f3fSDimitry Andric   // TODO: Need to serialize kernarg preloads.
6100b57cec5SDimitry Andric   bool Any = false;
6110b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
6120b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
6130b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
6140b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
6150b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
6160b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
617fcaf7f86SDimitry Andric   Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
6180b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
6190b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
6200b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
6210b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
6220b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
6230b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
6240b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
6250b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
6260b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
6270b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
6280b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
6290b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
6300b57cec5SDimitry Andric 
6310b57cec5SDimitry Andric   if (Any)
6320b57cec5SDimitry Andric     return AI;
6330b57cec5SDimitry Andric 
634bdd1243dSDimitry Andric   return std::nullopt;
6350b57cec5SDimitry Andric }
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
638fe6060f1SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
639fe6060f1SDimitry Andric     const llvm::MachineFunction &MF)
6400b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
641e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
64281ad6265SDimitry Andric       GDSSize(MFI.getGDSSize()),
643e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
6440b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
645e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
646e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
647e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
6488bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
649e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
6500b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
6510b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
6520b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
65381ad6265SDimitry Andric       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
65481ad6265SDimitry Andric       ReturnsVoid(MFI.returnsVoid()),
65506c3fb27SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
65606c3fb27SDimitry Andric       PSInputAddr(MFI.getPSInputAddr()),
65706c3fb27SDimitry Andric       PSInputEnable(MFI.getPSInputEnable()),
65806c3fb27SDimitry Andric       Mode(MFI.getMode()) {
659bdd1243dSDimitry Andric   for (Register Reg : MFI.getWWMReservedRegs())
66081ad6265SDimitry Andric     WWMReservedRegs.push_back(regToString(Reg, TRI));
66181ad6265SDimitry Andric 
66206c3fb27SDimitry Andric   if (MFI.getLongBranchReservedReg())
66306c3fb27SDimitry Andric     LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI);
66481ad6265SDimitry Andric   if (MFI.getVGPRForAGPRCopy())
66581ad6265SDimitry Andric     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
66606c3fb27SDimitry Andric 
66706c3fb27SDimitry Andric   if (MFI.getSGPRForEXECCopy())
66806c3fb27SDimitry Andric     SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI);
66906c3fb27SDimitry Andric 
670fe6060f1SDimitry Andric   auto SFI = MFI.getOptionalScavengeFI();
671fe6060f1SDimitry Andric   if (SFI)
672fe6060f1SDimitry Andric     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
673e8d8bef9SDimitry Andric }
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
6760b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
6770b57cec5SDimitry Andric }
6780b57cec5SDimitry Andric 
6790b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
680fe6060f1SDimitry Andric     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
681fe6060f1SDimitry Andric     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
6820b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
68381ad6265SDimitry Andric   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
6840b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
68581ad6265SDimitry Andric   GDSSize = YamlMFI.GDSSize;
686e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
68706c3fb27SDimitry Andric   PSInputAddr = YamlMFI.PSInputAddr;
68806c3fb27SDimitry Andric   PSInputEnable = YamlMFI.PSInputEnable;
6898bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
690e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
6910b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
6920b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
6930b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
6940b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
695e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
696e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
69781ad6265SDimitry Andric   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
69881ad6265SDimitry Andric   ReturnsVoid = YamlMFI.ReturnsVoid;
699fe6060f1SDimitry Andric 
700fe6060f1SDimitry Andric   if (YamlMFI.ScavengeFI) {
701fe6060f1SDimitry Andric     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
702fe6060f1SDimitry Andric     if (!FIOrErr) {
703fe6060f1SDimitry Andric       // Create a diagnostic for a the frame index.
704fe6060f1SDimitry Andric       const MemoryBuffer &Buffer =
705fe6060f1SDimitry Andric           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
706fe6060f1SDimitry Andric 
707fe6060f1SDimitry Andric       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
708fe6060f1SDimitry Andric                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
709bdd1243dSDimitry Andric                            "", std::nullopt, std::nullopt);
710fe6060f1SDimitry Andric       SourceRange = YamlMFI.ScavengeFI->SourceRange;
711fe6060f1SDimitry Andric       return true;
712fe6060f1SDimitry Andric     }
713fe6060f1SDimitry Andric     ScavengeFI = *FIOrErr;
714fe6060f1SDimitry Andric   } else {
715bdd1243dSDimitry Andric     ScavengeFI = std::nullopt;
716fe6060f1SDimitry Andric   }
7170b57cec5SDimitry Andric   return false;
7180b57cec5SDimitry Andric }
7195ffd83dbSDimitry Andric 
720bdd1243dSDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
721bdd1243dSDimitry Andric   for (const BasicBlock &BB : F) {
72281ad6265SDimitry Andric     for (const Instruction &I : BB) {
72381ad6265SDimitry Andric       const auto *CB = dyn_cast<CallBase>(&I);
72481ad6265SDimitry Andric       if (!CB)
72581ad6265SDimitry Andric         continue;
72681ad6265SDimitry Andric 
72781ad6265SDimitry Andric       if (CB->isInlineAsm()) {
72881ad6265SDimitry Andric         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
72981ad6265SDimitry Andric         for (const auto &CI : IA->ParseConstraints()) {
73081ad6265SDimitry Andric           for (StringRef Code : CI.Codes) {
73181ad6265SDimitry Andric             Code.consume_front("{");
732*5f757f3fSDimitry Andric             if (Code.starts_with("a"))
73381ad6265SDimitry Andric               return true;
73481ad6265SDimitry Andric           }
73581ad6265SDimitry Andric         }
73681ad6265SDimitry Andric         continue;
73781ad6265SDimitry Andric       }
73881ad6265SDimitry Andric 
73981ad6265SDimitry Andric       const Function *Callee =
74081ad6265SDimitry Andric           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
74181ad6265SDimitry Andric       if (!Callee)
74281ad6265SDimitry Andric         return true;
74381ad6265SDimitry Andric 
74481ad6265SDimitry Andric       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
74581ad6265SDimitry Andric         return true;
74681ad6265SDimitry Andric     }
74781ad6265SDimitry Andric   }
74881ad6265SDimitry Andric 
74981ad6265SDimitry Andric   return false;
75081ad6265SDimitry Andric }
75181ad6265SDimitry Andric 
752349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
753349cc55cSDimitry Andric   if (UsesAGPRs)
754349cc55cSDimitry Andric     return *UsesAGPRs;
755349cc55cSDimitry Andric 
75681ad6265SDimitry Andric   if (!mayNeedAGPRs()) {
75781ad6265SDimitry Andric     UsesAGPRs = false;
75881ad6265SDimitry Andric     return false;
75981ad6265SDimitry Andric   }
76081ad6265SDimitry Andric 
761349cc55cSDimitry Andric   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
762349cc55cSDimitry Andric       MF.getFrameInfo().hasCalls()) {
763349cc55cSDimitry Andric     UsesAGPRs = true;
764349cc55cSDimitry Andric     return true;
765349cc55cSDimitry Andric   }
766349cc55cSDimitry Andric 
767349cc55cSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
768349cc55cSDimitry Andric 
769349cc55cSDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
770349cc55cSDimitry Andric     const Register Reg = Register::index2VirtReg(I);
771349cc55cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
772349cc55cSDimitry Andric     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
773349cc55cSDimitry Andric       UsesAGPRs = true;
774349cc55cSDimitry Andric       return true;
775349cc55cSDimitry Andric     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
776349cc55cSDimitry Andric       // Defer caching UsesAGPRs, function might not yet been regbank selected.
777349cc55cSDimitry Andric       return true;
778349cc55cSDimitry Andric     }
779349cc55cSDimitry Andric   }
780349cc55cSDimitry Andric 
781349cc55cSDimitry Andric   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
782349cc55cSDimitry Andric     if (MRI.isPhysRegUsed(Reg)) {
783349cc55cSDimitry Andric       UsesAGPRs = true;
784349cc55cSDimitry Andric       return true;
785349cc55cSDimitry Andric     }
786349cc55cSDimitry Andric   }
787349cc55cSDimitry Andric 
788349cc55cSDimitry Andric   UsesAGPRs = false;
789349cc55cSDimitry Andric   return false;
790349cc55cSDimitry Andric }
791