xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision fcaf7f8644a9988098ac6be2165bce3ea4786e91)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
12fe6060f1SDimitry Andric #include "SIRegisterInfo.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
15fe6060f1SDimitry Andric #include "llvm/ADT/Optional.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
25fe6060f1SDimitry Andric #include <cassert>
26fe6060f1SDimitry Andric #include <vector>
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #define MAX_LANES 64
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
330b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
3481ad6265SDimitry Andric     BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
3581ad6265SDimitry Andric     ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
3681ad6265SDimitry Andric     GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
370b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
380b57cec5SDimitry Andric     DispatchPtr(false),
390b57cec5SDimitry Andric     QueuePtr(false),
400b57cec5SDimitry Andric     KernargSegmentPtr(false),
410b57cec5SDimitry Andric     DispatchID(false),
420b57cec5SDimitry Andric     FlatScratchInit(false),
430b57cec5SDimitry Andric     WorkGroupIDX(false),
440b57cec5SDimitry Andric     WorkGroupIDY(false),
450b57cec5SDimitry Andric     WorkGroupIDZ(false),
460b57cec5SDimitry Andric     WorkGroupInfo(false),
47*fcaf7f86SDimitry Andric     LDSKernelId(false),
480b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
490b57cec5SDimitry Andric     WorkItemIDX(false),
500b57cec5SDimitry Andric     WorkItemIDY(false),
510b57cec5SDimitry Andric     WorkItemIDZ(false),
520b57cec5SDimitry Andric     ImplicitBufferPtr(false),
530b57cec5SDimitry Andric     ImplicitArgPtr(false),
540b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
5581ad6265SDimitry Andric     HighBitsOf32BitAddress(0) {
560b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
570b57cec5SDimitry Andric   const Function &F = MF.getFunction();
580b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
590b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
600b57cec5SDimitry Andric 
615ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
620b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
630b57cec5SDimitry Andric 
645ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
655ffd83dbSDimitry Andric   // calls.
665ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
675ffd83dbSDimitry Andric 
68349cc55cSDimitry Andric   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
69349cc55cSDimitry Andric                         CC == CallingConv::SPIR_KERNEL;
705ffd83dbSDimitry Andric 
71349cc55cSDimitry Andric   if (IsKernel) {
72349cc55cSDimitry Andric     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
730b57cec5SDimitry Andric       KernargSegmentPtr = true;
740b57cec5SDimitry Andric     WorkGroupIDX = true;
750b57cec5SDimitry Andric     WorkItemIDX = true;
760b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
770b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
780b57cec5SDimitry Andric   }
790b57cec5SDimitry Andric 
8081ad6265SDimitry Andric   MayNeedAGPRs = ST.hasMAIInsts();
8181ad6265SDimitry Andric 
820b57cec5SDimitry Andric   if (!isEntryFunction()) {
830eae32dcSDimitry Andric     if (CC != CallingConv::AMDGPU_Gfx)
84fe6060f1SDimitry Andric       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
85fe6060f1SDimitry Andric 
860b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
875ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
880b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
890b57cec5SDimitry Andric 
90e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
91e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
92e8d8bef9SDimitry Andric       // required for scratch access.
93e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
94e8d8bef9SDimitry Andric 
950b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
960b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
97e8d8bef9SDimitry Andric     }
980b57cec5SDimitry Andric 
99349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
1000b57cec5SDimitry Andric       ImplicitArgPtr = true;
1010b57cec5SDimitry Andric   } else {
102349cc55cSDimitry Andric     ImplicitArgPtr = false;
1030b57cec5SDimitry Andric     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1040b57cec5SDimitry Andric                                MaxKernArgAlign);
10581ad6265SDimitry Andric 
10681ad6265SDimitry Andric     if (ST.hasGFX90AInsts() &&
10781ad6265SDimitry Andric         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
10881ad6265SDimitry Andric         !mayUseAGPRs(MF))
10981ad6265SDimitry Andric       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
1100b57cec5SDimitry Andric   }
111349cc55cSDimitry Andric 
112349cc55cSDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
113349cc55cSDimitry Andric   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
114349cc55cSDimitry Andric     PrivateSegmentBuffer = true;
115349cc55cSDimitry Andric   else if (ST.isMesaGfxShader(F))
116349cc55cSDimitry Andric     ImplicitBufferPtr = true;
1170b57cec5SDimitry Andric 
1180eae32dcSDimitry Andric   if (!AMDGPU::isGraphics(CC)) {
119349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1200b57cec5SDimitry Andric       WorkGroupIDX = true;
1210b57cec5SDimitry Andric 
122349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1230b57cec5SDimitry Andric       WorkGroupIDY = true;
1240b57cec5SDimitry Andric 
125349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1260b57cec5SDimitry Andric       WorkGroupIDZ = true;
1270b57cec5SDimitry Andric 
128349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1290b57cec5SDimitry Andric       WorkItemIDX = true;
1300b57cec5SDimitry Andric 
13104eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
13204eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 1) != 0)
1330b57cec5SDimitry Andric       WorkItemIDY = true;
1340b57cec5SDimitry Andric 
13504eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
13604eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 2) != 0)
1370b57cec5SDimitry Andric       WorkItemIDZ = true;
138349cc55cSDimitry Andric 
139349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
140349cc55cSDimitry Andric       DispatchPtr = true;
141349cc55cSDimitry Andric 
142349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
143349cc55cSDimitry Andric       QueuePtr = true;
144349cc55cSDimitry Andric 
145349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
146349cc55cSDimitry Andric       DispatchID = true;
147*fcaf7f86SDimitry Andric 
148*fcaf7f86SDimitry Andric     if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
149*fcaf7f86SDimitry Andric       LDSKernelId = true;
1505ffd83dbSDimitry Andric   }
1510b57cec5SDimitry Andric 
152349cc55cSDimitry Andric   // FIXME: This attribute is a hack, we just need an analysis on the function
153349cc55cSDimitry Andric   // to look for allocas.
1545ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
155349cc55cSDimitry Andric 
156349cc55cSDimitry Andric   // TODO: This could be refined a lot. The attribute is a poor way of
157349cc55cSDimitry Andric   // detecting calls or stack objects that may require it before argument
158349cc55cSDimitry Andric   // lowering.
159349cc55cSDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
160349cc55cSDimitry Andric       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
161349cc55cSDimitry Andric       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
162349cc55cSDimitry Andric       !ST.flatScratchIsArchitected()) {
163349cc55cSDimitry Andric     FlatScratchInit = true;
164349cc55cSDimitry Andric   }
165349cc55cSDimitry Andric 
1660b57cec5SDimitry Andric   if (isEntryFunction()) {
1670b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1680b57cec5SDimitry Andric     // enabled if Z is.
1690b57cec5SDimitry Andric     if (WorkItemIDZ)
1700b57cec5SDimitry Andric       WorkItemIDY = true;
1710b57cec5SDimitry Andric 
172fe6060f1SDimitry Andric     if (!ST.flatScratchIsArchitected()) {
1730b57cec5SDimitry Andric       PrivateSegmentWaveByteOffset = true;
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1760b57cec5SDimitry Andric       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1770b57cec5SDimitry Andric           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1780b57cec5SDimitry Andric         ArgInfo.PrivateSegmentWaveByteOffset =
1790b57cec5SDimitry Andric             ArgDescriptor::createRegister(AMDGPU::SGPR5);
1800b57cec5SDimitry Andric     }
181fe6060f1SDimitry Andric   }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1840b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1850b57cec5SDimitry Andric   if (!S.empty())
1860b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1890b57cec5SDimitry Andric   S = A.getValueAsString();
1900b57cec5SDimitry Andric   if (!S.empty())
1910b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1920b57cec5SDimitry Andric 
19381ad6265SDimitry Andric   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
19481ad6265SDimitry Andric   // VGPR available at all times. For now, reserve highest available VGPR. After
19581ad6265SDimitry Andric   // RA, shift it to the lowest available unused VGPR if the one exist.
19681ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
19781ad6265SDimitry Andric     VGPRForAGPRCopy =
19881ad6265SDimitry Andric         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
19981ad6265SDimitry Andric   }
20081ad6265SDimitry Andric }
20181ad6265SDimitry Andric 
20281ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone(
20381ad6265SDimitry Andric     BumpPtrAllocator &Allocator, MachineFunction &DestMF,
20481ad6265SDimitry Andric     const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
20581ad6265SDimitry Andric     const {
20681ad6265SDimitry Andric   return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
2070b57cec5SDimitry Andric }
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
2100b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
2110b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
2120b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
2130b57cec5SDimitry Andric                  MF.getFunction()));
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric 
2165ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
2170b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
2180b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
2190b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2208bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
2210b57cec5SDimitry Andric   NumUserSGPRs += 4;
2220b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
2230b57cec5SDimitry Andric }
2240b57cec5SDimitry Andric 
2255ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2260b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2270b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2280b57cec5SDimitry Andric   NumUserSGPRs += 2;
2290b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2300b57cec5SDimitry Andric }
2310b57cec5SDimitry Andric 
2325ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2330b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2340b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2350b57cec5SDimitry Andric   NumUserSGPRs += 2;
2360b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric 
2395ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2400b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2410b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2420b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2430b57cec5SDimitry Andric   NumUserSGPRs += 2;
2440b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2450b57cec5SDimitry Andric }
2460b57cec5SDimitry Andric 
2475ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2480b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2490b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2500b57cec5SDimitry Andric   NumUserSGPRs += 2;
2510b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2520b57cec5SDimitry Andric }
2530b57cec5SDimitry Andric 
2545ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2550b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2560b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2570b57cec5SDimitry Andric   NumUserSGPRs += 2;
2580b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2590b57cec5SDimitry Andric }
2600b57cec5SDimitry Andric 
2615ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2620b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2630b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2640b57cec5SDimitry Andric   NumUserSGPRs += 2;
2650b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2660b57cec5SDimitry Andric }
2670b57cec5SDimitry Andric 
268*fcaf7f86SDimitry Andric Register SIMachineFunctionInfo::addLDSKernelId() {
269*fcaf7f86SDimitry Andric   ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
270*fcaf7f86SDimitry Andric   NumUserSGPRs += 1;
271*fcaf7f86SDimitry Andric   return ArgInfo.LDSKernelId.getRegister();
272*fcaf7f86SDimitry Andric }
273*fcaf7f86SDimitry Andric 
2745ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
2755ffd83dbSDimitry Andric                                              MCPhysReg Reg) {
2760b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
2770b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
2780b57cec5SDimitry Andric       return true;
2790b57cec5SDimitry Andric   }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   return false;
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
2850b57cec5SDimitry Andric /// SGPR spilling.
2860b57cec5SDimitry Andric //
2870b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
2880b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
2890b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
2900b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2910b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2920b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
2930b57cec5SDimitry Andric }
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
2960b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
2970b57cec5SDimitry Andric                                                     int FI) {
29881ad6265SDimitry Andric   std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric   // This has already been allocated.
3010b57cec5SDimitry Andric   if (!SpillLanes.empty())
3020b57cec5SDimitry Andric     return true;
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3050b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3060b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3070b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3080b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
3090b57cec5SDimitry Andric 
3100b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3115ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
3120b57cec5SDimitry Andric 
3135ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
3145ffd83dbSDimitry Andric     return false;
3155ffd83dbSDimitry Andric 
3165ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
3175ffd83dbSDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
3200b57cec5SDimitry Andric   // VGPRs.
3215ffd83dbSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
3225ffd83dbSDimitry Andric     Register LaneVGPR;
3230b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
3240b57cec5SDimitry Andric 
32504eeddc0SDimitry Andric     if (VGPRIndex == 0) {
3260b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
3270b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
3280b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
3290b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
3300b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
3310b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
332fe6060f1SDimitry Andric 
33304eeddc0SDimitry Andric         // FIXME: We can run out of free registers with split allocation if
33404eeddc0SDimitry Andric         // IPRA is enabled and a called function already uses every VGPR.
335fe6060f1SDimitry Andric #if 0
336fe6060f1SDimitry Andric         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
337fe6060f1SDimitry Andric                                                   "VGPRs for SGPR spilling",
338fe6060f1SDimitry Andric                                                   0, DS_Error);
339fe6060f1SDimitry Andric         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
340fe6060f1SDimitry Andric #endif
3410b57cec5SDimitry Andric         return false;
3420b57cec5SDimitry Andric       }
3430b57cec5SDimitry Andric 
344fe6060f1SDimitry Andric       Optional<int> SpillFI;
345fe6060f1SDimitry Andric       // We need to preserve inactive lanes, so always save, even caller-save
346fe6060f1SDimitry Andric       // registers.
347fe6060f1SDimitry Andric       if (!isEntryFunction()) {
348fe6060f1SDimitry Andric         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
3490b57cec5SDimitry Andric       }
3500b57cec5SDimitry Andric 
351fe6060f1SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
3520b57cec5SDimitry Andric 
35381ad6265SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifier
3540b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
3550b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
3560b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
3570b57cec5SDimitry Andric     } else {
3580b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
3590b57cec5SDimitry Andric     }
3600b57cec5SDimitry Andric 
36181ad6265SDimitry Andric     SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
3620b57cec5SDimitry Andric   }
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric   return true;
3650b57cec5SDimitry Andric }
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
3680b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
3690b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
3700b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
3710b57cec5SDimitry Andric                                                     int FI,
3720b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
3730b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3740b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3750b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
3780b57cec5SDimitry Andric 
3790b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric   // This has already been allocated.
3820b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
3830b57cec5SDimitry Andric     return Spill.FullyAllocated;
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3860b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
3870b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric   const TargetRegisterClass &RC =
3900b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
3910b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
3940b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3950b57cec5SDimitry Andric   Spill.FullyAllocated = true;
3960b57cec5SDimitry Andric 
3970b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
3980b57cec5SDimitry Andric   // once.
3990b57cec5SDimitry Andric   BitVector OtherUsedRegs;
4000b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   const uint32_t *CSRMask =
4030b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4040b57cec5SDimitry Andric   if (CSRMask)
4050b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
4080b57cec5SDimitry Andric   // usage.
4090b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4100b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4110b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4120b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
415349cc55cSDimitry Andric   for (int I = NumLanes - 1; I >= 0; --I) {
4160b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4170b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4180b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4190b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4200b57cec5SDimitry Andric         });
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4230b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4240b57cec5SDimitry Andric       break;
4250b57cec5SDimitry Andric     }
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4280b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
4290b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4300b57cec5SDimitry Andric   }
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric   return Spill.FullyAllocated;
4330b57cec5SDimitry Andric }
4340b57cec5SDimitry Andric 
43581ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices(
43681ad6265SDimitry Andric     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
437349cc55cSDimitry Andric   // Remove dead frame indices from function frame, however keep FP & BP since
438349cc55cSDimitry Andric   // spills for them haven't been inserted yet. And also make sure to remove the
439349cc55cSDimitry Andric   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
440349cc55cSDimitry Andric   // result in an unexpected side effect and bug, in case of any re-mapping of
441349cc55cSDimitry Andric   // freed frame indices by later pass(es) like "stack slot coloring".
442349cc55cSDimitry Andric   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
443349cc55cSDimitry Andric     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
4440b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
445349cc55cSDimitry Andric       SGPRToVGPRSpills.erase(R.first);
446349cc55cSDimitry Andric     }
4470b57cec5SDimitry Andric   }
4480b57cec5SDimitry Andric 
44981ad6265SDimitry Andric   bool HaveSGPRToMemory = false;
45081ad6265SDimitry Andric 
45181ad6265SDimitry Andric   if (ResetSGPRSpillStackIDs) {
45281ad6265SDimitry Andric     // All other SPGRs must be allocated on the default stack, so reset the
45381ad6265SDimitry Andric     // stack ID.
4540b57cec5SDimitry Andric     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
45581ad6265SDimitry Andric          ++i) {
45681ad6265SDimitry Andric       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
45781ad6265SDimitry Andric         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
4580b57cec5SDimitry Andric           MFI.setStackID(i, TargetStackID::Default);
45981ad6265SDimitry Andric           HaveSGPRToMemory = true;
46081ad6265SDimitry Andric         }
46181ad6265SDimitry Andric       }
46281ad6265SDimitry Andric     }
46381ad6265SDimitry Andric   }
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
4660eae32dcSDimitry Andric     if (R.second.IsDead)
4670b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4680b57cec5SDimitry Andric   }
46981ad6265SDimitry Andric 
47081ad6265SDimitry Andric   return HaveSGPRToMemory;
47181ad6265SDimitry Andric }
47281ad6265SDimitry Andric 
47381ad6265SDimitry Andric void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
47481ad6265SDimitry Andric     MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
47581ad6265SDimitry Andric   assert(WWMReservedFrameIndexes.empty());
47681ad6265SDimitry Andric 
47781ad6265SDimitry Andric   WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
47881ad6265SDimitry Andric 
47981ad6265SDimitry Andric   int I = 0;
48081ad6265SDimitry Andric   for (Register VGPR : WWMReservedRegs) {
48181ad6265SDimitry Andric     const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
48281ad6265SDimitry Andric     WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
48381ad6265SDimitry Andric         TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
48481ad6265SDimitry Andric   }
4850b57cec5SDimitry Andric }
4860b57cec5SDimitry Andric 
487fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
488fe6060f1SDimitry Andric                                          const SIRegisterInfo &TRI) {
489fe6060f1SDimitry Andric   if (ScavengeFI)
490fe6060f1SDimitry Andric     return *ScavengeFI;
491fe6060f1SDimitry Andric   if (isEntryFunction()) {
492fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateFixedObject(
493fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
494fe6060f1SDimitry Andric   } else {
495fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateStackObject(
496fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
497fe6060f1SDimitry Andric         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
498fe6060f1SDimitry Andric   }
499fe6060f1SDimitry Andric   return *ScavengeFI;
500fe6060f1SDimitry Andric }
501fe6060f1SDimitry Andric 
5020b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
5030b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
5040b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
5080b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
5090b57cec5SDimitry Andric }
5100b57cec5SDimitry Andric 
5115ffd83dbSDimitry Andric Register
5125ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
5135ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5145ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
5155ffd83dbSDimitry Andric     return Register();
5165ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5175ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
5185ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
5195ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
5205ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
5215ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5225ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
5235ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
5245ffd83dbSDimitry Andric       return GitPtrLo;
5255ffd83dbSDimitry Andric     default:
5265ffd83dbSDimitry Andric       return GitPtrLo;
5275ffd83dbSDimitry Andric     }
5285ffd83dbSDimitry Andric   }
5295ffd83dbSDimitry Andric   return GitPtrLo;
5305ffd83dbSDimitry Andric }
5315ffd83dbSDimitry Andric 
5325ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
5330b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
5340b57cec5SDimitry Andric   yaml::StringValue Dest;
5350b57cec5SDimitry Andric   {
5360b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
5370b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
5380b57cec5SDimitry Andric   }
5390b57cec5SDimitry Andric   return Dest;
5400b57cec5SDimitry Andric }
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
5430b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
5440b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
5450b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
5480b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
5490b57cec5SDimitry Andric     if (!Arg)
5500b57cec5SDimitry Andric       return false;
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric     // Create a register or stack argument.
5530b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5540b57cec5SDimitry Andric     if (Arg.isRegister()) {
5550b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5560b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
5570b57cec5SDimitry Andric     } else
5580b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
5590b57cec5SDimitry Andric     // Check and update the optional mask.
5600b57cec5SDimitry Andric     if (Arg.isMasked())
5610b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric     A = SA;
5640b57cec5SDimitry Andric     return true;
5650b57cec5SDimitry Andric   };
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric   bool Any = false;
5680b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
5690b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
5700b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
5710b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
5720b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
5730b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
574*fcaf7f86SDimitry Andric   Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
5750b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
5760b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
5770b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
5780b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
5790b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
5800b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
5810b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
5820b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
5830b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
5840b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
5850b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
5860b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric   if (Any)
5890b57cec5SDimitry Andric     return AI;
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric   return None;
5920b57cec5SDimitry Andric }
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
595fe6060f1SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
596fe6060f1SDimitry Andric     const llvm::MachineFunction &MF)
5970b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
598e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
59981ad6265SDimitry Andric       GDSSize(MFI.getGDSSize()),
600e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
6010b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
602e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
603e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
604e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
6058bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
606e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
6070b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
6080b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
6090b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
61081ad6265SDimitry Andric       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
61181ad6265SDimitry Andric       ReturnsVoid(MFI.returnsVoid()),
612e8d8bef9SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
61381ad6265SDimitry Andric   for (Register Reg : MFI.WWMReservedRegs)
61481ad6265SDimitry Andric     WWMReservedRegs.push_back(regToString(Reg, TRI));
61581ad6265SDimitry Andric 
61681ad6265SDimitry Andric   if (MFI.getVGPRForAGPRCopy())
61781ad6265SDimitry Andric     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
618fe6060f1SDimitry Andric   auto SFI = MFI.getOptionalScavengeFI();
619fe6060f1SDimitry Andric   if (SFI)
620fe6060f1SDimitry Andric     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
621e8d8bef9SDimitry Andric }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
6240b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
6250b57cec5SDimitry Andric }
6260b57cec5SDimitry Andric 
6270b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
628fe6060f1SDimitry Andric     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
629fe6060f1SDimitry Andric     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
6300b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
63181ad6265SDimitry Andric   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
6320b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
63381ad6265SDimitry Andric   GDSSize = YamlMFI.GDSSize;
634e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
6358bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
636e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
6370b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
6380b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
6390b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
6400b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
641e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
642e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
64381ad6265SDimitry Andric   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
64481ad6265SDimitry Andric   ReturnsVoid = YamlMFI.ReturnsVoid;
645fe6060f1SDimitry Andric 
646fe6060f1SDimitry Andric   if (YamlMFI.ScavengeFI) {
647fe6060f1SDimitry Andric     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
648fe6060f1SDimitry Andric     if (!FIOrErr) {
649fe6060f1SDimitry Andric       // Create a diagnostic for a the frame index.
650fe6060f1SDimitry Andric       const MemoryBuffer &Buffer =
651fe6060f1SDimitry Andric           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
652fe6060f1SDimitry Andric 
653fe6060f1SDimitry Andric       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
654fe6060f1SDimitry Andric                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
655fe6060f1SDimitry Andric                            "", None, None);
656fe6060f1SDimitry Andric       SourceRange = YamlMFI.ScavengeFI->SourceRange;
657fe6060f1SDimitry Andric       return true;
658fe6060f1SDimitry Andric     }
659fe6060f1SDimitry Andric     ScavengeFI = *FIOrErr;
660fe6060f1SDimitry Andric   } else {
661fe6060f1SDimitry Andric     ScavengeFI = None;
662fe6060f1SDimitry Andric   }
6630b57cec5SDimitry Andric   return false;
6640b57cec5SDimitry Andric }
6655ffd83dbSDimitry Andric 
66681ad6265SDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
66781ad6265SDimitry Andric   for (const BasicBlock &BB : MF.getFunction()) {
66881ad6265SDimitry Andric     for (const Instruction &I : BB) {
66981ad6265SDimitry Andric       const auto *CB = dyn_cast<CallBase>(&I);
67081ad6265SDimitry Andric       if (!CB)
67181ad6265SDimitry Andric         continue;
67281ad6265SDimitry Andric 
67381ad6265SDimitry Andric       if (CB->isInlineAsm()) {
67481ad6265SDimitry Andric         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
67581ad6265SDimitry Andric         for (const auto &CI : IA->ParseConstraints()) {
67681ad6265SDimitry Andric           for (StringRef Code : CI.Codes) {
67781ad6265SDimitry Andric             Code.consume_front("{");
67881ad6265SDimitry Andric             if (Code.startswith("a"))
67981ad6265SDimitry Andric               return true;
68081ad6265SDimitry Andric           }
68181ad6265SDimitry Andric         }
68281ad6265SDimitry Andric         continue;
68381ad6265SDimitry Andric       }
68481ad6265SDimitry Andric 
68581ad6265SDimitry Andric       const Function *Callee =
68681ad6265SDimitry Andric           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
68781ad6265SDimitry Andric       if (!Callee)
68881ad6265SDimitry Andric         return true;
68981ad6265SDimitry Andric 
69081ad6265SDimitry Andric       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
69181ad6265SDimitry Andric         return true;
69281ad6265SDimitry Andric     }
69381ad6265SDimitry Andric   }
69481ad6265SDimitry Andric 
69581ad6265SDimitry Andric   return false;
69681ad6265SDimitry Andric }
69781ad6265SDimitry Andric 
698349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
699349cc55cSDimitry Andric   if (UsesAGPRs)
700349cc55cSDimitry Andric     return *UsesAGPRs;
701349cc55cSDimitry Andric 
70281ad6265SDimitry Andric   if (!mayNeedAGPRs()) {
70381ad6265SDimitry Andric     UsesAGPRs = false;
70481ad6265SDimitry Andric     return false;
70581ad6265SDimitry Andric   }
70681ad6265SDimitry Andric 
707349cc55cSDimitry Andric   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
708349cc55cSDimitry Andric       MF.getFrameInfo().hasCalls()) {
709349cc55cSDimitry Andric     UsesAGPRs = true;
710349cc55cSDimitry Andric     return true;
711349cc55cSDimitry Andric   }
712349cc55cSDimitry Andric 
713349cc55cSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
714349cc55cSDimitry Andric 
715349cc55cSDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
716349cc55cSDimitry Andric     const Register Reg = Register::index2VirtReg(I);
717349cc55cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
718349cc55cSDimitry Andric     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
719349cc55cSDimitry Andric       UsesAGPRs = true;
720349cc55cSDimitry Andric       return true;
721349cc55cSDimitry Andric     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
722349cc55cSDimitry Andric       // Defer caching UsesAGPRs, function might not yet been regbank selected.
723349cc55cSDimitry Andric       return true;
724349cc55cSDimitry Andric     }
725349cc55cSDimitry Andric   }
726349cc55cSDimitry Andric 
727349cc55cSDimitry Andric   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
728349cc55cSDimitry Andric     if (MRI.isPhysRegUsed(Reg)) {
729349cc55cSDimitry Andric       UsesAGPRs = true;
730349cc55cSDimitry Andric       return true;
731349cc55cSDimitry Andric     }
732349cc55cSDimitry Andric   }
733349cc55cSDimitry Andric 
734349cc55cSDimitry Andric   UsesAGPRs = false;
735349cc55cSDimitry Andric   return false;
736349cc55cSDimitry Andric }
737