xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
12fe6060f1SDimitry Andric #include "SIRegisterInfo.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
15fe6060f1SDimitry Andric #include "llvm/ADT/Optional.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
25fe6060f1SDimitry Andric #include <cassert>
26fe6060f1SDimitry Andric #include <vector>
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #define MAX_LANES 64
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
330b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
34*81ad6265SDimitry Andric     BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
35*81ad6265SDimitry Andric     ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
36*81ad6265SDimitry Andric     GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
370b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
380b57cec5SDimitry Andric     DispatchPtr(false),
390b57cec5SDimitry Andric     QueuePtr(false),
400b57cec5SDimitry Andric     KernargSegmentPtr(false),
410b57cec5SDimitry Andric     DispatchID(false),
420b57cec5SDimitry Andric     FlatScratchInit(false),
430b57cec5SDimitry Andric     WorkGroupIDX(false),
440b57cec5SDimitry Andric     WorkGroupIDY(false),
450b57cec5SDimitry Andric     WorkGroupIDZ(false),
460b57cec5SDimitry Andric     WorkGroupInfo(false),
470b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
480b57cec5SDimitry Andric     WorkItemIDX(false),
490b57cec5SDimitry Andric     WorkItemIDY(false),
500b57cec5SDimitry Andric     WorkItemIDZ(false),
510b57cec5SDimitry Andric     ImplicitBufferPtr(false),
520b57cec5SDimitry Andric     ImplicitArgPtr(false),
530b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
54*81ad6265SDimitry Andric     HighBitsOf32BitAddress(0) {
550b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
560b57cec5SDimitry Andric   const Function &F = MF.getFunction();
570b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
580b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
590b57cec5SDimitry Andric 
605ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
610b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
620b57cec5SDimitry Andric 
635ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
645ffd83dbSDimitry Andric   // calls.
655ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
665ffd83dbSDimitry Andric 
67349cc55cSDimitry Andric   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
68349cc55cSDimitry Andric                         CC == CallingConv::SPIR_KERNEL;
695ffd83dbSDimitry Andric 
70349cc55cSDimitry Andric   if (IsKernel) {
71349cc55cSDimitry Andric     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
720b57cec5SDimitry Andric       KernargSegmentPtr = true;
730b57cec5SDimitry Andric     WorkGroupIDX = true;
740b57cec5SDimitry Andric     WorkItemIDX = true;
750b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
760b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
770b57cec5SDimitry Andric   }
780b57cec5SDimitry Andric 
79*81ad6265SDimitry Andric   MayNeedAGPRs = ST.hasMAIInsts();
80*81ad6265SDimitry Andric 
810b57cec5SDimitry Andric   if (!isEntryFunction()) {
820eae32dcSDimitry Andric     if (CC != CallingConv::AMDGPU_Gfx)
83fe6060f1SDimitry Andric       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
84fe6060f1SDimitry Andric 
850b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
865ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
870b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
880b57cec5SDimitry Andric 
89e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
90e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
91e8d8bef9SDimitry Andric       // required for scratch access.
92e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
93e8d8bef9SDimitry Andric 
940b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
950b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
96e8d8bef9SDimitry Andric     }
970b57cec5SDimitry Andric 
98349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
990b57cec5SDimitry Andric       ImplicitArgPtr = true;
1000b57cec5SDimitry Andric   } else {
101349cc55cSDimitry Andric     ImplicitArgPtr = false;
1020b57cec5SDimitry Andric     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1030b57cec5SDimitry Andric                                MaxKernArgAlign);
104*81ad6265SDimitry Andric 
105*81ad6265SDimitry Andric     if (ST.hasGFX90AInsts() &&
106*81ad6265SDimitry Andric         ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
107*81ad6265SDimitry Andric         !mayUseAGPRs(MF))
108*81ad6265SDimitry Andric       MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
1090b57cec5SDimitry Andric   }
110349cc55cSDimitry Andric 
111349cc55cSDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
112349cc55cSDimitry Andric   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
113349cc55cSDimitry Andric     PrivateSegmentBuffer = true;
114349cc55cSDimitry Andric   else if (ST.isMesaGfxShader(F))
115349cc55cSDimitry Andric     ImplicitBufferPtr = true;
1160b57cec5SDimitry Andric 
1170eae32dcSDimitry Andric   if (!AMDGPU::isGraphics(CC)) {
118349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1190b57cec5SDimitry Andric       WorkGroupIDX = true;
1200b57cec5SDimitry Andric 
121349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1220b57cec5SDimitry Andric       WorkGroupIDY = true;
1230b57cec5SDimitry Andric 
124349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1250b57cec5SDimitry Andric       WorkGroupIDZ = true;
1260b57cec5SDimitry Andric 
127349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1280b57cec5SDimitry Andric       WorkItemIDX = true;
1290b57cec5SDimitry Andric 
13004eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y") &&
13104eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 1) != 0)
1320b57cec5SDimitry Andric       WorkItemIDY = true;
1330b57cec5SDimitry Andric 
13404eeddc0SDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z") &&
13504eeddc0SDimitry Andric         ST.getMaxWorkitemID(F, 2) != 0)
1360b57cec5SDimitry Andric       WorkItemIDZ = true;
137349cc55cSDimitry Andric 
138349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
139349cc55cSDimitry Andric       DispatchPtr = true;
140349cc55cSDimitry Andric 
141349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
142349cc55cSDimitry Andric       QueuePtr = true;
143349cc55cSDimitry Andric 
144349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
145349cc55cSDimitry Andric       DispatchID = true;
1465ffd83dbSDimitry Andric   }
1470b57cec5SDimitry Andric 
148349cc55cSDimitry Andric   // FIXME: This attribute is a hack, we just need an analysis on the function
149349cc55cSDimitry Andric   // to look for allocas.
1505ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
151349cc55cSDimitry Andric 
152349cc55cSDimitry Andric   // TODO: This could be refined a lot. The attribute is a poor way of
153349cc55cSDimitry Andric   // detecting calls or stack objects that may require it before argument
154349cc55cSDimitry Andric   // lowering.
155349cc55cSDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
156349cc55cSDimitry Andric       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
157349cc55cSDimitry Andric       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
158349cc55cSDimitry Andric       !ST.flatScratchIsArchitected()) {
159349cc55cSDimitry Andric     FlatScratchInit = true;
160349cc55cSDimitry Andric   }
161349cc55cSDimitry Andric 
1620b57cec5SDimitry Andric   if (isEntryFunction()) {
1630b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1640b57cec5SDimitry Andric     // enabled if Z is.
1650b57cec5SDimitry Andric     if (WorkItemIDZ)
1660b57cec5SDimitry Andric       WorkItemIDY = true;
1670b57cec5SDimitry Andric 
168fe6060f1SDimitry Andric     if (!ST.flatScratchIsArchitected()) {
1690b57cec5SDimitry Andric       PrivateSegmentWaveByteOffset = true;
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1720b57cec5SDimitry Andric       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1730b57cec5SDimitry Andric           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1740b57cec5SDimitry Andric         ArgInfo.PrivateSegmentWaveByteOffset =
1750b57cec5SDimitry Andric             ArgDescriptor::createRegister(AMDGPU::SGPR5);
1760b57cec5SDimitry Andric     }
177fe6060f1SDimitry Andric   }
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1800b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1810b57cec5SDimitry Andric   if (!S.empty())
1820b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1850b57cec5SDimitry Andric   S = A.getValueAsString();
1860b57cec5SDimitry Andric   if (!S.empty())
1870b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1880b57cec5SDimitry Andric 
189*81ad6265SDimitry Andric   // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
190*81ad6265SDimitry Andric   // VGPR available at all times. For now, reserve highest available VGPR. After
191*81ad6265SDimitry Andric   // RA, shift it to the lowest available unused VGPR if the one exist.
192*81ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
193*81ad6265SDimitry Andric     VGPRForAGPRCopy =
194*81ad6265SDimitry Andric         AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
195*81ad6265SDimitry Andric   }
196*81ad6265SDimitry Andric }
197*81ad6265SDimitry Andric 
198*81ad6265SDimitry Andric MachineFunctionInfo *SIMachineFunctionInfo::clone(
199*81ad6265SDimitry Andric     BumpPtrAllocator &Allocator, MachineFunction &DestMF,
200*81ad6265SDimitry Andric     const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
201*81ad6265SDimitry Andric     const {
202*81ad6265SDimitry Andric   return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
2060b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
2070b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
2080b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
2090b57cec5SDimitry Andric                  MF.getFunction()));
2100b57cec5SDimitry Andric }
2110b57cec5SDimitry Andric 
2125ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
2130b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
2140b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
2150b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2168bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
2170b57cec5SDimitry Andric   NumUserSGPRs += 4;
2180b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric 
2215ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2220b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2230b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2240b57cec5SDimitry Andric   NumUserSGPRs += 2;
2250b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric 
2285ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2290b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2300b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2310b57cec5SDimitry Andric   NumUserSGPRs += 2;
2320b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric 
2355ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2360b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2370b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2380b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2390b57cec5SDimitry Andric   NumUserSGPRs += 2;
2400b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric 
2435ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2440b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2450b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2460b57cec5SDimitry Andric   NumUserSGPRs += 2;
2470b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2480b57cec5SDimitry Andric }
2490b57cec5SDimitry Andric 
2505ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2510b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2520b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2530b57cec5SDimitry Andric   NumUserSGPRs += 2;
2540b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2550b57cec5SDimitry Andric }
2560b57cec5SDimitry Andric 
2575ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2580b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2590b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2600b57cec5SDimitry Andric   NumUserSGPRs += 2;
2610b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric 
2645ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
2655ffd83dbSDimitry Andric                                              MCPhysReg Reg) {
2660b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
2670b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
2680b57cec5SDimitry Andric       return true;
2690b57cec5SDimitry Andric   }
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric   return false;
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric 
2740b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
2750b57cec5SDimitry Andric /// SGPR spilling.
2760b57cec5SDimitry Andric //
2770b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
2780b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
2790b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
2800b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2810b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2820b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
2830b57cec5SDimitry Andric }
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
2860b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
2870b57cec5SDimitry Andric                                                     int FI) {
288*81ad6265SDimitry Andric   std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   // This has already been allocated.
2910b57cec5SDimitry Andric   if (!SpillLanes.empty())
2920b57cec5SDimitry Andric     return true;
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2950b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
2960b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
2970b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2980b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3015ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
3020b57cec5SDimitry Andric 
3035ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
3045ffd83dbSDimitry Andric     return false;
3055ffd83dbSDimitry Andric 
3065ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
3075ffd83dbSDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
3100b57cec5SDimitry Andric   // VGPRs.
3115ffd83dbSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
3125ffd83dbSDimitry Andric     Register LaneVGPR;
3130b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
3140b57cec5SDimitry Andric 
31504eeddc0SDimitry Andric     if (VGPRIndex == 0) {
3160b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
3170b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
3180b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
3190b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
3200b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
3210b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
322fe6060f1SDimitry Andric 
32304eeddc0SDimitry Andric         // FIXME: We can run out of free registers with split allocation if
32404eeddc0SDimitry Andric         // IPRA is enabled and a called function already uses every VGPR.
325fe6060f1SDimitry Andric #if 0
326fe6060f1SDimitry Andric         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
327fe6060f1SDimitry Andric                                                   "VGPRs for SGPR spilling",
328fe6060f1SDimitry Andric                                                   0, DS_Error);
329fe6060f1SDimitry Andric         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
330fe6060f1SDimitry Andric #endif
3310b57cec5SDimitry Andric         return false;
3320b57cec5SDimitry Andric       }
3330b57cec5SDimitry Andric 
334fe6060f1SDimitry Andric       Optional<int> SpillFI;
335fe6060f1SDimitry Andric       // We need to preserve inactive lanes, so always save, even caller-save
336fe6060f1SDimitry Andric       // registers.
337fe6060f1SDimitry Andric       if (!isEntryFunction()) {
338fe6060f1SDimitry Andric         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
3390b57cec5SDimitry Andric       }
3400b57cec5SDimitry Andric 
341fe6060f1SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
3420b57cec5SDimitry Andric 
343*81ad6265SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifier
3440b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
3450b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
3460b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
3470b57cec5SDimitry Andric     } else {
3480b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
3490b57cec5SDimitry Andric     }
3500b57cec5SDimitry Andric 
351*81ad6265SDimitry Andric     SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
3520b57cec5SDimitry Andric   }
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric   return true;
3550b57cec5SDimitry Andric }
3560b57cec5SDimitry Andric 
3570b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
3580b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
3590b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
3600b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
3610b57cec5SDimitry Andric                                                     int FI,
3620b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
3630b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3640b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3650b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric   // This has already been allocated.
3720b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
3730b57cec5SDimitry Andric     return Spill.FullyAllocated;
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3760b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
3770b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
3780b57cec5SDimitry Andric 
3790b57cec5SDimitry Andric   const TargetRegisterClass &RC =
3800b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
3810b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
3840b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3850b57cec5SDimitry Andric   Spill.FullyAllocated = true;
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
3880b57cec5SDimitry Andric   // once.
3890b57cec5SDimitry Andric   BitVector OtherUsedRegs;
3900b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric   const uint32_t *CSRMask =
3930b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
3940b57cec5SDimitry Andric   if (CSRMask)
3950b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
3960b57cec5SDimitry Andric 
3970b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
3980b57cec5SDimitry Andric   // usage.
3990b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4000b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4010b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4020b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
405349cc55cSDimitry Andric   for (int I = NumLanes - 1; I >= 0; --I) {
4060b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4070b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4080b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4090b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4100b57cec5SDimitry Andric         });
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4130b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4140b57cec5SDimitry Andric       break;
4150b57cec5SDimitry Andric     }
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4180b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
4190b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4200b57cec5SDimitry Andric   }
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric   return Spill.FullyAllocated;
4230b57cec5SDimitry Andric }
4240b57cec5SDimitry Andric 
425*81ad6265SDimitry Andric bool SIMachineFunctionInfo::removeDeadFrameIndices(
426*81ad6265SDimitry Andric     MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
427349cc55cSDimitry Andric   // Remove dead frame indices from function frame, however keep FP & BP since
428349cc55cSDimitry Andric   // spills for them haven't been inserted yet. And also make sure to remove the
429349cc55cSDimitry Andric   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
430349cc55cSDimitry Andric   // result in an unexpected side effect and bug, in case of any re-mapping of
431349cc55cSDimitry Andric   // freed frame indices by later pass(es) like "stack slot coloring".
432349cc55cSDimitry Andric   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
433349cc55cSDimitry Andric     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
4340b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
435349cc55cSDimitry Andric       SGPRToVGPRSpills.erase(R.first);
436349cc55cSDimitry Andric     }
4370b57cec5SDimitry Andric   }
4380b57cec5SDimitry Andric 
439*81ad6265SDimitry Andric   bool HaveSGPRToMemory = false;
440*81ad6265SDimitry Andric 
441*81ad6265SDimitry Andric   if (ResetSGPRSpillStackIDs) {
442*81ad6265SDimitry Andric     // All other SPGRs must be allocated on the default stack, so reset the
443*81ad6265SDimitry Andric     // stack ID.
4440b57cec5SDimitry Andric     for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
445*81ad6265SDimitry Andric          ++i) {
446*81ad6265SDimitry Andric       if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
447*81ad6265SDimitry Andric         if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
4480b57cec5SDimitry Andric           MFI.setStackID(i, TargetStackID::Default);
449*81ad6265SDimitry Andric           HaveSGPRToMemory = true;
450*81ad6265SDimitry Andric         }
451*81ad6265SDimitry Andric       }
452*81ad6265SDimitry Andric     }
453*81ad6265SDimitry Andric   }
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
4560eae32dcSDimitry Andric     if (R.second.IsDead)
4570b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4580b57cec5SDimitry Andric   }
459*81ad6265SDimitry Andric 
460*81ad6265SDimitry Andric   return HaveSGPRToMemory;
461*81ad6265SDimitry Andric }
462*81ad6265SDimitry Andric 
463*81ad6265SDimitry Andric void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
464*81ad6265SDimitry Andric     MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
465*81ad6265SDimitry Andric   assert(WWMReservedFrameIndexes.empty());
466*81ad6265SDimitry Andric 
467*81ad6265SDimitry Andric   WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
468*81ad6265SDimitry Andric 
469*81ad6265SDimitry Andric   int I = 0;
470*81ad6265SDimitry Andric   for (Register VGPR : WWMReservedRegs) {
471*81ad6265SDimitry Andric     const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
472*81ad6265SDimitry Andric     WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
473*81ad6265SDimitry Andric         TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
474*81ad6265SDimitry Andric   }
4750b57cec5SDimitry Andric }
4760b57cec5SDimitry Andric 
477fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
478fe6060f1SDimitry Andric                                          const SIRegisterInfo &TRI) {
479fe6060f1SDimitry Andric   if (ScavengeFI)
480fe6060f1SDimitry Andric     return *ScavengeFI;
481fe6060f1SDimitry Andric   if (isEntryFunction()) {
482fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateFixedObject(
483fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
484fe6060f1SDimitry Andric   } else {
485fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateStackObject(
486fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
487fe6060f1SDimitry Andric         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
488fe6060f1SDimitry Andric   }
489fe6060f1SDimitry Andric   return *ScavengeFI;
490fe6060f1SDimitry Andric }
491fe6060f1SDimitry Andric 
4920b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
4930b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
4940b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
4980b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric 
5015ffd83dbSDimitry Andric Register
5025ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
5035ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5045ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
5055ffd83dbSDimitry Andric     return Register();
5065ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5075ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
5085ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
5095ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
5105ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
5115ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5125ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
5135ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
5145ffd83dbSDimitry Andric       return GitPtrLo;
5155ffd83dbSDimitry Andric     default:
5165ffd83dbSDimitry Andric       return GitPtrLo;
5175ffd83dbSDimitry Andric     }
5185ffd83dbSDimitry Andric   }
5195ffd83dbSDimitry Andric   return GitPtrLo;
5205ffd83dbSDimitry Andric }
5215ffd83dbSDimitry Andric 
5225ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
5230b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
5240b57cec5SDimitry Andric   yaml::StringValue Dest;
5250b57cec5SDimitry Andric   {
5260b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
5270b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
5280b57cec5SDimitry Andric   }
5290b57cec5SDimitry Andric   return Dest;
5300b57cec5SDimitry Andric }
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
5330b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
5340b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
5350b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
5360b57cec5SDimitry Andric 
5370b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
5380b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
5390b57cec5SDimitry Andric     if (!Arg)
5400b57cec5SDimitry Andric       return false;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric     // Create a register or stack argument.
5430b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5440b57cec5SDimitry Andric     if (Arg.isRegister()) {
5450b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5460b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
5470b57cec5SDimitry Andric     } else
5480b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
5490b57cec5SDimitry Andric     // Check and update the optional mask.
5500b57cec5SDimitry Andric     if (Arg.isMasked())
5510b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric     A = SA;
5540b57cec5SDimitry Andric     return true;
5550b57cec5SDimitry Andric   };
5560b57cec5SDimitry Andric 
5570b57cec5SDimitry Andric   bool Any = false;
5580b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
5590b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
5600b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
5610b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
5620b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
5630b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
5640b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
5650b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
5660b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
5670b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
5680b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
5690b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
5700b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
5710b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
5720b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
5730b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
5740b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
5750b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
5760b57cec5SDimitry Andric 
5770b57cec5SDimitry Andric   if (Any)
5780b57cec5SDimitry Andric     return AI;
5790b57cec5SDimitry Andric 
5800b57cec5SDimitry Andric   return None;
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
584fe6060f1SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
585fe6060f1SDimitry Andric     const llvm::MachineFunction &MF)
5860b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
587e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
588*81ad6265SDimitry Andric       GDSSize(MFI.getGDSSize()),
589e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
5900b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
591e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
592e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
593e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
5948bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
595e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
5960b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
5970b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
5980b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
599*81ad6265SDimitry Andric       BytesInStackArgArea(MFI.getBytesInStackArgArea()),
600*81ad6265SDimitry Andric       ReturnsVoid(MFI.returnsVoid()),
601e8d8bef9SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
602*81ad6265SDimitry Andric   for (Register Reg : MFI.WWMReservedRegs)
603*81ad6265SDimitry Andric     WWMReservedRegs.push_back(regToString(Reg, TRI));
604*81ad6265SDimitry Andric 
605*81ad6265SDimitry Andric   if (MFI.getVGPRForAGPRCopy())
606*81ad6265SDimitry Andric     VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
607fe6060f1SDimitry Andric   auto SFI = MFI.getOptionalScavengeFI();
608fe6060f1SDimitry Andric   if (SFI)
609fe6060f1SDimitry Andric     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
610e8d8bef9SDimitry Andric }
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
6130b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
6140b57cec5SDimitry Andric }
6150b57cec5SDimitry Andric 
6160b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
617fe6060f1SDimitry Andric     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
618fe6060f1SDimitry Andric     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
6190b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
620*81ad6265SDimitry Andric   MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
6210b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
622*81ad6265SDimitry Andric   GDSSize = YamlMFI.GDSSize;
623e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
6248bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
625e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
6260b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
6270b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
6280b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
6290b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
630e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
631e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
632*81ad6265SDimitry Andric   BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
633*81ad6265SDimitry Andric   ReturnsVoid = YamlMFI.ReturnsVoid;
634fe6060f1SDimitry Andric 
635fe6060f1SDimitry Andric   if (YamlMFI.ScavengeFI) {
636fe6060f1SDimitry Andric     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
637fe6060f1SDimitry Andric     if (!FIOrErr) {
638fe6060f1SDimitry Andric       // Create a diagnostic for a the frame index.
639fe6060f1SDimitry Andric       const MemoryBuffer &Buffer =
640fe6060f1SDimitry Andric           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
641fe6060f1SDimitry Andric 
642fe6060f1SDimitry Andric       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
643fe6060f1SDimitry Andric                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
644fe6060f1SDimitry Andric                            "", None, None);
645fe6060f1SDimitry Andric       SourceRange = YamlMFI.ScavengeFI->SourceRange;
646fe6060f1SDimitry Andric       return true;
647fe6060f1SDimitry Andric     }
648fe6060f1SDimitry Andric     ScavengeFI = *FIOrErr;
649fe6060f1SDimitry Andric   } else {
650fe6060f1SDimitry Andric     ScavengeFI = None;
651fe6060f1SDimitry Andric   }
6520b57cec5SDimitry Andric   return false;
6530b57cec5SDimitry Andric }
6545ffd83dbSDimitry Andric 
655*81ad6265SDimitry Andric bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
656*81ad6265SDimitry Andric   for (const BasicBlock &BB : MF.getFunction()) {
657*81ad6265SDimitry Andric     for (const Instruction &I : BB) {
658*81ad6265SDimitry Andric       const auto *CB = dyn_cast<CallBase>(&I);
659*81ad6265SDimitry Andric       if (!CB)
660*81ad6265SDimitry Andric         continue;
661*81ad6265SDimitry Andric 
662*81ad6265SDimitry Andric       if (CB->isInlineAsm()) {
663*81ad6265SDimitry Andric         const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
664*81ad6265SDimitry Andric         for (const auto &CI : IA->ParseConstraints()) {
665*81ad6265SDimitry Andric           for (StringRef Code : CI.Codes) {
666*81ad6265SDimitry Andric             Code.consume_front("{");
667*81ad6265SDimitry Andric             if (Code.startswith("a"))
668*81ad6265SDimitry Andric               return true;
669*81ad6265SDimitry Andric           }
670*81ad6265SDimitry Andric         }
671*81ad6265SDimitry Andric         continue;
672*81ad6265SDimitry Andric       }
673*81ad6265SDimitry Andric 
674*81ad6265SDimitry Andric       const Function *Callee =
675*81ad6265SDimitry Andric           dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
676*81ad6265SDimitry Andric       if (!Callee)
677*81ad6265SDimitry Andric         return true;
678*81ad6265SDimitry Andric 
679*81ad6265SDimitry Andric       if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
680*81ad6265SDimitry Andric         return true;
681*81ad6265SDimitry Andric     }
682*81ad6265SDimitry Andric   }
683*81ad6265SDimitry Andric 
684*81ad6265SDimitry Andric   return false;
685*81ad6265SDimitry Andric }
686*81ad6265SDimitry Andric 
687349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
688349cc55cSDimitry Andric   if (UsesAGPRs)
689349cc55cSDimitry Andric     return *UsesAGPRs;
690349cc55cSDimitry Andric 
691*81ad6265SDimitry Andric   if (!mayNeedAGPRs()) {
692*81ad6265SDimitry Andric     UsesAGPRs = false;
693*81ad6265SDimitry Andric     return false;
694*81ad6265SDimitry Andric   }
695*81ad6265SDimitry Andric 
696349cc55cSDimitry Andric   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
697349cc55cSDimitry Andric       MF.getFrameInfo().hasCalls()) {
698349cc55cSDimitry Andric     UsesAGPRs = true;
699349cc55cSDimitry Andric     return true;
700349cc55cSDimitry Andric   }
701349cc55cSDimitry Andric 
702349cc55cSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
703349cc55cSDimitry Andric 
704349cc55cSDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
705349cc55cSDimitry Andric     const Register Reg = Register::index2VirtReg(I);
706349cc55cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
707349cc55cSDimitry Andric     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
708349cc55cSDimitry Andric       UsesAGPRs = true;
709349cc55cSDimitry Andric       return true;
710349cc55cSDimitry Andric     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
711349cc55cSDimitry Andric       // Defer caching UsesAGPRs, function might not yet been regbank selected.
712349cc55cSDimitry Andric       return true;
713349cc55cSDimitry Andric     }
714349cc55cSDimitry Andric   }
715349cc55cSDimitry Andric 
716349cc55cSDimitry Andric   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
717349cc55cSDimitry Andric     if (MRI.isPhysRegUsed(Reg)) {
718349cc55cSDimitry Andric       UsesAGPRs = true;
719349cc55cSDimitry Andric       return true;
720349cc55cSDimitry Andric     }
721349cc55cSDimitry Andric   }
722349cc55cSDimitry Andric 
723349cc55cSDimitry Andric   UsesAGPRs = false;
724349cc55cSDimitry Andric   return false;
725349cc55cSDimitry Andric }
726