xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
11fe6060f1SDimitry Andric #include "AMDGPUSubtarget.h"
12fe6060f1SDimitry Andric #include "SIRegisterInfo.h"
13fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
15fe6060f1SDimitry Andric #include "llvm/ADT/Optional.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
19fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
20fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
21fe6060f1SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
22fe6060f1SDimitry Andric #include "llvm/IR/CallingConv.h"
23fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
25fe6060f1SDimitry Andric #include <cassert>
26fe6060f1SDimitry Andric #include <vector>
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #define MAX_LANES 64
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
330b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
340b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
350b57cec5SDimitry Andric     DispatchPtr(false),
360b57cec5SDimitry Andric     QueuePtr(false),
370b57cec5SDimitry Andric     KernargSegmentPtr(false),
380b57cec5SDimitry Andric     DispatchID(false),
390b57cec5SDimitry Andric     FlatScratchInit(false),
400b57cec5SDimitry Andric     WorkGroupIDX(false),
410b57cec5SDimitry Andric     WorkGroupIDY(false),
420b57cec5SDimitry Andric     WorkGroupIDZ(false),
430b57cec5SDimitry Andric     WorkGroupInfo(false),
440b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
450b57cec5SDimitry Andric     WorkItemIDX(false),
460b57cec5SDimitry Andric     WorkItemIDY(false),
470b57cec5SDimitry Andric     WorkItemIDZ(false),
480b57cec5SDimitry Andric     ImplicitBufferPtr(false),
490b57cec5SDimitry Andric     ImplicitArgPtr(false),
500b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
510b57cec5SDimitry Andric     HighBitsOf32BitAddress(0),
520b57cec5SDimitry Andric     GDSSize(0) {
530b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
540b57cec5SDimitry Andric   const Function &F = MF.getFunction();
550b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
560b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
570b57cec5SDimitry Andric 
585ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
590b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
600b57cec5SDimitry Andric 
615ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
625ffd83dbSDimitry Andric   // calls.
635ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
645ffd83dbSDimitry Andric 
655ffd83dbSDimitry Andric   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
665ffd83dbSDimitry Andric   // have any calls.
675ffd83dbSDimitry Andric   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
68fe6060f1SDimitry Andric                            CC != CallingConv::AMDGPU_Gfx &&
695ffd83dbSDimitry Andric                            (!isEntryFunction() || HasCalls);
70*349cc55cSDimitry Andric   const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
71*349cc55cSDimitry Andric                         CC == CallingConv::SPIR_KERNEL;
725ffd83dbSDimitry Andric 
73*349cc55cSDimitry Andric   if (IsKernel) {
74*349cc55cSDimitry Andric     if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
750b57cec5SDimitry Andric       KernargSegmentPtr = true;
760b57cec5SDimitry Andric     WorkGroupIDX = true;
770b57cec5SDimitry Andric     WorkItemIDX = true;
780b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
790b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
800b57cec5SDimitry Andric   }
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   if (!isEntryFunction()) {
83fe6060f1SDimitry Andric     if (UseFixedABI)
84fe6060f1SDimitry Andric       ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
85fe6060f1SDimitry Andric 
860b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
875ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
880b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
890b57cec5SDimitry Andric 
90e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
91e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
92e8d8bef9SDimitry Andric       // required for scratch access.
93e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
94e8d8bef9SDimitry Andric 
950b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
960b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
97e8d8bef9SDimitry Andric     }
980b57cec5SDimitry Andric 
99*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
1000b57cec5SDimitry Andric       ImplicitArgPtr = true;
1010b57cec5SDimitry Andric   } else {
102*349cc55cSDimitry Andric     ImplicitArgPtr = false;
1030b57cec5SDimitry Andric     MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
1040b57cec5SDimitry Andric                                MaxKernArgAlign);
1050b57cec5SDimitry Andric   }
106*349cc55cSDimitry Andric 
107*349cc55cSDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
108*349cc55cSDimitry Andric   if (isAmdHsaOrMesa && !ST.enableFlatScratch())
109*349cc55cSDimitry Andric     PrivateSegmentBuffer = true;
110*349cc55cSDimitry Andric   else if (ST.isMesaGfxShader(F))
111*349cc55cSDimitry Andric     ImplicitBufferPtr = true;
1120b57cec5SDimitry Andric 
1135ffd83dbSDimitry Andric   if (UseFixedABI) {
114*349cc55cSDimitry Andric     DispatchPtr = true;
115*349cc55cSDimitry Andric     QueuePtr = true;
1165ffd83dbSDimitry Andric     ImplicitArgPtr = true;
117*349cc55cSDimitry Andric     WorkGroupIDX = true;
118*349cc55cSDimitry Andric     WorkGroupIDY = true;
119*349cc55cSDimitry Andric     WorkGroupIDZ = true;
120*349cc55cSDimitry Andric     WorkItemIDX = true;
121*349cc55cSDimitry Andric     WorkItemIDY = true;
122*349cc55cSDimitry Andric     WorkItemIDZ = true;
123*349cc55cSDimitry Andric 
124*349cc55cSDimitry Andric     // FIXME: We don't need this?
125*349cc55cSDimitry Andric     DispatchID = true;
126*349cc55cSDimitry Andric   } else if (!AMDGPU::isGraphics(CC)) {
127*349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
1280b57cec5SDimitry Andric       WorkGroupIDX = true;
1290b57cec5SDimitry Andric 
130*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
1310b57cec5SDimitry Andric       WorkGroupIDY = true;
1320b57cec5SDimitry Andric 
133*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
1340b57cec5SDimitry Andric       WorkGroupIDZ = true;
1350b57cec5SDimitry Andric 
136*349cc55cSDimitry Andric     if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
1370b57cec5SDimitry Andric       WorkItemIDX = true;
1380b57cec5SDimitry Andric 
139*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-y"))
1400b57cec5SDimitry Andric       WorkItemIDY = true;
1410b57cec5SDimitry Andric 
142*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-workitem-id-z"))
1430b57cec5SDimitry Andric       WorkItemIDZ = true;
144*349cc55cSDimitry Andric 
145*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
146*349cc55cSDimitry Andric       DispatchPtr = true;
147*349cc55cSDimitry Andric 
148*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
149*349cc55cSDimitry Andric       QueuePtr = true;
150*349cc55cSDimitry Andric 
151*349cc55cSDimitry Andric     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
152*349cc55cSDimitry Andric       DispatchID = true;
1535ffd83dbSDimitry Andric   }
1540b57cec5SDimitry Andric 
155*349cc55cSDimitry Andric   // FIXME: This attribute is a hack, we just need an analysis on the function
156*349cc55cSDimitry Andric   // to look for allocas.
1575ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
158*349cc55cSDimitry Andric 
159*349cc55cSDimitry Andric   // TODO: This could be refined a lot. The attribute is a poor way of
160*349cc55cSDimitry Andric   // detecting calls or stack objects that may require it before argument
161*349cc55cSDimitry Andric   // lowering.
162*349cc55cSDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
163*349cc55cSDimitry Andric       (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
164*349cc55cSDimitry Andric       (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
165*349cc55cSDimitry Andric       !ST.flatScratchIsArchitected()) {
166*349cc55cSDimitry Andric     FlatScratchInit = true;
167*349cc55cSDimitry Andric   }
168*349cc55cSDimitry Andric 
1690b57cec5SDimitry Andric   if (isEntryFunction()) {
1700b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1710b57cec5SDimitry Andric     // enabled if Z is.
1720b57cec5SDimitry Andric     if (WorkItemIDZ)
1730b57cec5SDimitry Andric       WorkItemIDY = true;
1740b57cec5SDimitry Andric 
175fe6060f1SDimitry Andric     if (!ST.flatScratchIsArchitected()) {
1760b57cec5SDimitry Andric       PrivateSegmentWaveByteOffset = true;
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric       // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1790b57cec5SDimitry Andric       if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1800b57cec5SDimitry Andric           (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1810b57cec5SDimitry Andric         ArgInfo.PrivateSegmentWaveByteOffset =
1820b57cec5SDimitry Andric             ArgDescriptor::createRegister(AMDGPU::SGPR5);
1830b57cec5SDimitry Andric     }
184fe6060f1SDimitry Andric   }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1870b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1880b57cec5SDimitry Andric   if (!S.empty())
1890b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1920b57cec5SDimitry Andric   S = A.getValueAsString();
1930b57cec5SDimitry Andric   if (!S.empty())
1940b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
1970b57cec5SDimitry Andric   if (!S.empty())
1980b57cec5SDimitry Andric     S.consumeInteger(0, GDSSize);
1990b57cec5SDimitry Andric }
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
2020b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
2030b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
2040b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
2050b57cec5SDimitry Andric                  MF.getFunction()));
2060b57cec5SDimitry Andric }
2070b57cec5SDimitry Andric 
2085ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
2090b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
2100b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
2110b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2128bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
2130b57cec5SDimitry Andric   NumUserSGPRs += 4;
2140b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
2150b57cec5SDimitry Andric }
2160b57cec5SDimitry Andric 
2175ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2180b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2190b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2200b57cec5SDimitry Andric   NumUserSGPRs += 2;
2210b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2220b57cec5SDimitry Andric }
2230b57cec5SDimitry Andric 
2245ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2250b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2260b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2270b57cec5SDimitry Andric   NumUserSGPRs += 2;
2280b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2290b57cec5SDimitry Andric }
2300b57cec5SDimitry Andric 
2315ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2320b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2330b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2340b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2350b57cec5SDimitry Andric   NumUserSGPRs += 2;
2360b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric 
2395ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2400b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2410b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2420b57cec5SDimitry Andric   NumUserSGPRs += 2;
2430b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2440b57cec5SDimitry Andric }
2450b57cec5SDimitry Andric 
2465ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2470b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2480b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2490b57cec5SDimitry Andric   NumUserSGPRs += 2;
2500b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
2535ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2540b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2550b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2560b57cec5SDimitry Andric   NumUserSGPRs += 2;
2570b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2580b57cec5SDimitry Andric }
2590b57cec5SDimitry Andric 
2605ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
2615ffd83dbSDimitry Andric                                              MCPhysReg Reg) {
2620b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
2630b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
2640b57cec5SDimitry Andric       return true;
2650b57cec5SDimitry Andric   }
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   return false;
2680b57cec5SDimitry Andric }
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
2710b57cec5SDimitry Andric /// SGPR spilling.
2720b57cec5SDimitry Andric //
2730b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
2740b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
2750b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
2760b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2770b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2780b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
2820b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
2830b57cec5SDimitry Andric                                                     int FI) {
2840b57cec5SDimitry Andric   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric   // This has already been allocated.
2870b57cec5SDimitry Andric   if (!SpillLanes.empty())
2880b57cec5SDimitry Andric     return true;
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2910b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
2920b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
2930b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2940b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2955ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
2985ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
2990b57cec5SDimitry Andric 
3005ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
3015ffd83dbSDimitry Andric     return false;
3025ffd83dbSDimitry Andric 
3035ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
3045ffd83dbSDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
3070b57cec5SDimitry Andric   // VGPRs.
3085ffd83dbSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
3095ffd83dbSDimitry Andric     Register LaneVGPR;
3100b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
3110b57cec5SDimitry Andric 
3125ffd83dbSDimitry Andric     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
3135ffd83dbSDimitry Andric     // when one of the two conditions is true:
3145ffd83dbSDimitry Andric     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
3155ffd83dbSDimitry Andric     // reserved.
3165ffd83dbSDimitry Andric     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
3175ffd83dbSDimitry Andric     // required.
3185ffd83dbSDimitry Andric     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
3195ffd83dbSDimitry Andric       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
3205ffd83dbSDimitry Andric       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
3215ffd83dbSDimitry Andric     } else if (VGPRIndex == 0) {
3220b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
3230b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
3240b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
3250b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
3260b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
3270b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
328fe6060f1SDimitry Andric 
329fe6060f1SDimitry Andric #if 0
330fe6060f1SDimitry Andric         DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
331fe6060f1SDimitry Andric                                                   "VGPRs for SGPR spilling",
332fe6060f1SDimitry Andric                                                   0, DS_Error);
333fe6060f1SDimitry Andric         MF.getFunction().getContext().diagnose(DiagOutOfRegs);
334fe6060f1SDimitry Andric #endif
3350b57cec5SDimitry Andric         return false;
3360b57cec5SDimitry Andric       }
3370b57cec5SDimitry Andric 
338fe6060f1SDimitry Andric       Optional<int> SpillFI;
339fe6060f1SDimitry Andric       // We need to preserve inactive lanes, so always save, even caller-save
340fe6060f1SDimitry Andric       // registers.
341fe6060f1SDimitry Andric       if (!isEntryFunction()) {
342fe6060f1SDimitry Andric         SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
3430b57cec5SDimitry Andric       }
3440b57cec5SDimitry Andric 
345fe6060f1SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
3460b57cec5SDimitry Andric 
3470b57cec5SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifer
3480b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
3490b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
3500b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
3510b57cec5SDimitry Andric     } else {
3520b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
3530b57cec5SDimitry Andric     }
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
3560b57cec5SDimitry Andric   }
3570b57cec5SDimitry Andric 
3580b57cec5SDimitry Andric   return true;
3590b57cec5SDimitry Andric }
3600b57cec5SDimitry Andric 
3615ffd83dbSDimitry Andric /// Reserve a VGPR for spilling of SGPRs
3625ffd83dbSDimitry Andric bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
3635ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3645ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3655ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
3665ffd83dbSDimitry Andric 
3675ffd83dbSDimitry Andric   Register LaneVGPR = TRI->findUnusedRegister(
3685ffd83dbSDimitry Andric       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
369e8d8bef9SDimitry Andric   if (LaneVGPR == Register())
370e8d8bef9SDimitry Andric     return false;
371fe6060f1SDimitry Andric   SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
3725ffd83dbSDimitry Andric   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
3735ffd83dbSDimitry Andric   return true;
3745ffd83dbSDimitry Andric }
3755ffd83dbSDimitry Andric 
3760b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
3770b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
3780b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
3790b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
3800b57cec5SDimitry Andric                                                     int FI,
3810b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
3820b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3830b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3840b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   // This has already been allocated.
3910b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
3920b57cec5SDimitry Andric     return Spill.FullyAllocated;
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3950b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
3960b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric   const TargetRegisterClass &RC =
3990b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
4000b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
4030b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
4040b57cec5SDimitry Andric   Spill.FullyAllocated = true;
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
4070b57cec5SDimitry Andric   // once.
4080b57cec5SDimitry Andric   BitVector OtherUsedRegs;
4090b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric   const uint32_t *CSRMask =
4120b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
4130b57cec5SDimitry Andric   if (CSRMask)
4140b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
4170b57cec5SDimitry Andric   // usage.
4180b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4190b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4200b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4210b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
424*349cc55cSDimitry Andric   for (int I = NumLanes - 1; I >= 0; --I) {
4250b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4260b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4270b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4280b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4290b57cec5SDimitry Andric         });
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4320b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4330b57cec5SDimitry Andric       break;
4340b57cec5SDimitry Andric     }
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4370b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
4380b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4390b57cec5SDimitry Andric   }
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   return Spill.FullyAllocated;
4420b57cec5SDimitry Andric }
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
445*349cc55cSDimitry Andric   // Remove dead frame indices from function frame, however keep FP & BP since
446*349cc55cSDimitry Andric   // spills for them haven't been inserted yet. And also make sure to remove the
447*349cc55cSDimitry Andric   // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
448*349cc55cSDimitry Andric   // result in an unexpected side effect and bug, in case of any re-mapping of
449*349cc55cSDimitry Andric   // freed frame indices by later pass(es) like "stack slot coloring".
450*349cc55cSDimitry Andric   for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
451*349cc55cSDimitry Andric     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
4520b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
453*349cc55cSDimitry Andric       SGPRToVGPRSpills.erase(R.first);
454*349cc55cSDimitry Andric     }
4550b57cec5SDimitry Andric   }
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   // All other SPGRs must be allocated on the default stack, so reset the stack
4580b57cec5SDimitry Andric   // ID.
4590b57cec5SDimitry Andric   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
4600b57cec5SDimitry Andric        ++i)
4615ffd83dbSDimitry Andric     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
4620b57cec5SDimitry Andric       MFI.setStackID(i, TargetStackID::Default);
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
4650b57cec5SDimitry Andric     if (R.second.FullyAllocated)
4660b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4670b57cec5SDimitry Andric   }
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric 
470fe6060f1SDimitry Andric int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
471fe6060f1SDimitry Andric                                          const SIRegisterInfo &TRI) {
472fe6060f1SDimitry Andric   if (ScavengeFI)
473fe6060f1SDimitry Andric     return *ScavengeFI;
474fe6060f1SDimitry Andric   if (isEntryFunction()) {
475fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateFixedObject(
476fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
477fe6060f1SDimitry Andric   } else {
478fe6060f1SDimitry Andric     ScavengeFI = MFI.CreateStackObject(
479fe6060f1SDimitry Andric         TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
480fe6060f1SDimitry Andric         TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
481fe6060f1SDimitry Andric   }
482fe6060f1SDimitry Andric   return *ScavengeFI;
483fe6060f1SDimitry Andric }
484fe6060f1SDimitry Andric 
4850b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
4860b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
4870b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
4910b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
4920b57cec5SDimitry Andric }
4930b57cec5SDimitry Andric 
4945ffd83dbSDimitry Andric Register
4955ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
4965ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4975ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
4985ffd83dbSDimitry Andric     return Register();
4995ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
5005ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
5015ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
5025ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
5035ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
5045ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
5055ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
5065ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
5075ffd83dbSDimitry Andric       return GitPtrLo;
5085ffd83dbSDimitry Andric     default:
5095ffd83dbSDimitry Andric       return GitPtrLo;
5105ffd83dbSDimitry Andric     }
5115ffd83dbSDimitry Andric   }
5125ffd83dbSDimitry Andric   return GitPtrLo;
5135ffd83dbSDimitry Andric }
5145ffd83dbSDimitry Andric 
5155ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
5160b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
5170b57cec5SDimitry Andric   yaml::StringValue Dest;
5180b57cec5SDimitry Andric   {
5190b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
5200b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
5210b57cec5SDimitry Andric   }
5220b57cec5SDimitry Andric   return Dest;
5230b57cec5SDimitry Andric }
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
5260b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
5270b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
5280b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
5310b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
5320b57cec5SDimitry Andric     if (!Arg)
5330b57cec5SDimitry Andric       return false;
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric     // Create a register or stack argument.
5360b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5370b57cec5SDimitry Andric     if (Arg.isRegister()) {
5380b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5390b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
5400b57cec5SDimitry Andric     } else
5410b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
5420b57cec5SDimitry Andric     // Check and update the optional mask.
5430b57cec5SDimitry Andric     if (Arg.isMasked())
5440b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric     A = SA;
5470b57cec5SDimitry Andric     return true;
5480b57cec5SDimitry Andric   };
5490b57cec5SDimitry Andric 
5500b57cec5SDimitry Andric   bool Any = false;
5510b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
5520b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
5530b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
5540b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
5550b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
5560b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
5570b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
5580b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
5590b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
5600b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
5610b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
5620b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
5630b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
5640b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
5650b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
5660b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
5670b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
5680b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric   if (Any)
5710b57cec5SDimitry Andric     return AI;
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   return None;
5740b57cec5SDimitry Andric }
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
577fe6060f1SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
578fe6060f1SDimitry Andric     const llvm::MachineFunction &MF)
5790b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
580e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
581e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
5820b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
583e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
584e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
585e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
5868bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
587e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
5880b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
5890b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
5900b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
591e8d8bef9SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
592fe6060f1SDimitry Andric   auto SFI = MFI.getOptionalScavengeFI();
593fe6060f1SDimitry Andric   if (SFI)
594fe6060f1SDimitry Andric     ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
595e8d8bef9SDimitry Andric }
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
5980b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
5990b57cec5SDimitry Andric }
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
602fe6060f1SDimitry Andric     const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
603fe6060f1SDimitry Andric     PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
6040b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
6058bcb0991SDimitry Andric   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
6060b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
607e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
6088bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
609e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
6100b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
6110b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
6120b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
6130b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
614e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
615e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
616fe6060f1SDimitry Andric 
617fe6060f1SDimitry Andric   if (YamlMFI.ScavengeFI) {
618fe6060f1SDimitry Andric     auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
619fe6060f1SDimitry Andric     if (!FIOrErr) {
620fe6060f1SDimitry Andric       // Create a diagnostic for a the frame index.
621fe6060f1SDimitry Andric       const MemoryBuffer &Buffer =
622fe6060f1SDimitry Andric           *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
623fe6060f1SDimitry Andric 
624fe6060f1SDimitry Andric       Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
625fe6060f1SDimitry Andric                            SourceMgr::DK_Error, toString(FIOrErr.takeError()),
626fe6060f1SDimitry Andric                            "", None, None);
627fe6060f1SDimitry Andric       SourceRange = YamlMFI.ScavengeFI->SourceRange;
628fe6060f1SDimitry Andric       return true;
629fe6060f1SDimitry Andric     }
630fe6060f1SDimitry Andric     ScavengeFI = *FIOrErr;
631fe6060f1SDimitry Andric   } else {
632fe6060f1SDimitry Andric     ScavengeFI = None;
633fe6060f1SDimitry Andric   }
6340b57cec5SDimitry Andric   return false;
6350b57cec5SDimitry Andric }
6365ffd83dbSDimitry Andric 
6375ffd83dbSDimitry Andric // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
6385ffd83dbSDimitry Andric bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
6395ffd83dbSDimitry Andric                                                    MachineFunction &MF) {
6405ffd83dbSDimitry Andric   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
6415ffd83dbSDimitry Andric     if (i->VGPR == ReservedVGPR) {
6425ffd83dbSDimitry Andric       SpillVGPRs.erase(i);
6435ffd83dbSDimitry Andric 
6445ffd83dbSDimitry Andric       for (MachineBasicBlock &MBB : MF) {
6455ffd83dbSDimitry Andric         MBB.removeLiveIn(ReservedVGPR);
6465ffd83dbSDimitry Andric         MBB.sortUniqueLiveIns();
6475ffd83dbSDimitry Andric       }
6485ffd83dbSDimitry Andric       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
6495ffd83dbSDimitry Andric       return true;
6505ffd83dbSDimitry Andric     }
6515ffd83dbSDimitry Andric   }
6525ffd83dbSDimitry Andric   return false;
6535ffd83dbSDimitry Andric }
654*349cc55cSDimitry Andric 
655*349cc55cSDimitry Andric bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
656*349cc55cSDimitry Andric   if (UsesAGPRs)
657*349cc55cSDimitry Andric     return *UsesAGPRs;
658*349cc55cSDimitry Andric 
659*349cc55cSDimitry Andric   if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
660*349cc55cSDimitry Andric       MF.getFrameInfo().hasCalls()) {
661*349cc55cSDimitry Andric     UsesAGPRs = true;
662*349cc55cSDimitry Andric     return true;
663*349cc55cSDimitry Andric   }
664*349cc55cSDimitry Andric 
665*349cc55cSDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
666*349cc55cSDimitry Andric 
667*349cc55cSDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
668*349cc55cSDimitry Andric     const Register Reg = Register::index2VirtReg(I);
669*349cc55cSDimitry Andric     const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
670*349cc55cSDimitry Andric     if (RC && SIRegisterInfo::isAGPRClass(RC)) {
671*349cc55cSDimitry Andric       UsesAGPRs = true;
672*349cc55cSDimitry Andric       return true;
673*349cc55cSDimitry Andric     } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
674*349cc55cSDimitry Andric       // Defer caching UsesAGPRs, function might not yet been regbank selected.
675*349cc55cSDimitry Andric       return true;
676*349cc55cSDimitry Andric     }
677*349cc55cSDimitry Andric   }
678*349cc55cSDimitry Andric 
679*349cc55cSDimitry Andric   for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
680*349cc55cSDimitry Andric     if (MRI.isPhysRegUsed(Reg)) {
681*349cc55cSDimitry Andric       UsesAGPRs = true;
682*349cc55cSDimitry Andric       return true;
683*349cc55cSDimitry Andric     }
684*349cc55cSDimitry Andric   }
685*349cc55cSDimitry Andric 
686*349cc55cSDimitry Andric   UsesAGPRs = false;
687*349cc55cSDimitry Andric   return false;
688*349cc55cSDimitry Andric }
689