xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
100b57cec5SDimitry Andric #include "AMDGPUArgumentUsageInfo.h"
11*5ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
120b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
130b57cec5SDimitry Andric #include "SIRegisterInfo.h"
140b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
150b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
160b57cec5SDimitry Andric #include "llvm/ADT/Optional.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
210b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
220b57cec5SDimitry Andric #include "llvm/IR/Function.h"
230b57cec5SDimitry Andric #include <cassert>
240b57cec5SDimitry Andric #include <vector>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #define MAX_LANES 64
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric using namespace llvm;
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
310b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
320b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
330b57cec5SDimitry Andric     DispatchPtr(false),
340b57cec5SDimitry Andric     QueuePtr(false),
350b57cec5SDimitry Andric     KernargSegmentPtr(false),
360b57cec5SDimitry Andric     DispatchID(false),
370b57cec5SDimitry Andric     FlatScratchInit(false),
380b57cec5SDimitry Andric     WorkGroupIDX(false),
390b57cec5SDimitry Andric     WorkGroupIDY(false),
400b57cec5SDimitry Andric     WorkGroupIDZ(false),
410b57cec5SDimitry Andric     WorkGroupInfo(false),
420b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
430b57cec5SDimitry Andric     WorkItemIDX(false),
440b57cec5SDimitry Andric     WorkItemIDY(false),
450b57cec5SDimitry Andric     WorkItemIDZ(false),
460b57cec5SDimitry Andric     ImplicitBufferPtr(false),
470b57cec5SDimitry Andric     ImplicitArgPtr(false),
480b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
490b57cec5SDimitry Andric     HighBitsOf32BitAddress(0),
500b57cec5SDimitry Andric     GDSSize(0) {
510b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
520b57cec5SDimitry Andric   const Function &F = MF.getFunction();
530b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
540b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
550b57cec5SDimitry Andric 
56*5ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
570b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
580b57cec5SDimitry Andric 
59*5ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
60*5ffd83dbSDimitry Andric   // calls.
61*5ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
62*5ffd83dbSDimitry Andric 
63*5ffd83dbSDimitry Andric   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
64*5ffd83dbSDimitry Andric   // have any calls.
65*5ffd83dbSDimitry Andric   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
66*5ffd83dbSDimitry Andric                            (!isEntryFunction() || HasCalls);
67*5ffd83dbSDimitry Andric 
680b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
690b57cec5SDimitry Andric     if (!F.arg_empty())
700b57cec5SDimitry Andric       KernargSegmentPtr = true;
710b57cec5SDimitry Andric     WorkGroupIDX = true;
720b57cec5SDimitry Andric     WorkItemIDX = true;
730b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
740b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
750b57cec5SDimitry Andric   }
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric   if (!isEntryFunction()) {
780b57cec5SDimitry Andric     // Non-entry functions have no special inputs for now, other registers
790b57cec5SDimitry Andric     // required for scratch access.
800b57cec5SDimitry Andric     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
83*5ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
840b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric     ArgInfo.PrivateSegmentBuffer =
870b57cec5SDimitry Andric       ArgDescriptor::createRegister(ScratchRSrcReg);
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
900b57cec5SDimitry Andric       ImplicitArgPtr = true;
910b57cec5SDimitry Andric   } else {
920b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
930b57cec5SDimitry Andric       KernargSegmentPtr = true;
940b57cec5SDimitry Andric       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
950b57cec5SDimitry Andric                                  MaxKernArgAlign);
960b57cec5SDimitry Andric     }
970b57cec5SDimitry Andric   }
980b57cec5SDimitry Andric 
99*5ffd83dbSDimitry Andric   if (UseFixedABI) {
100*5ffd83dbSDimitry Andric     WorkGroupIDX = true;
101*5ffd83dbSDimitry Andric     WorkGroupIDY = true;
102*5ffd83dbSDimitry Andric     WorkGroupIDZ = true;
103*5ffd83dbSDimitry Andric     WorkItemIDX = true;
104*5ffd83dbSDimitry Andric     WorkItemIDY = true;
105*5ffd83dbSDimitry Andric     WorkItemIDZ = true;
106*5ffd83dbSDimitry Andric     ImplicitArgPtr = true;
107*5ffd83dbSDimitry Andric   } else {
1080b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
1090b57cec5SDimitry Andric       WorkGroupIDX = true;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
1120b57cec5SDimitry Andric       WorkGroupIDY = true;
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
1150b57cec5SDimitry Andric       WorkGroupIDZ = true;
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
1180b57cec5SDimitry Andric       WorkItemIDX = true;
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
1210b57cec5SDimitry Andric       WorkItemIDY = true;
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
1240b57cec5SDimitry Andric       WorkItemIDZ = true;
125*5ffd83dbSDimitry Andric   }
1260b57cec5SDimitry Andric 
127*5ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
1280b57cec5SDimitry Andric   if (isEntryFunction()) {
1290b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1300b57cec5SDimitry Andric     // enabled if Z is.
1310b57cec5SDimitry Andric     if (WorkItemIDZ)
1320b57cec5SDimitry Andric       WorkItemIDY = true;
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset = true;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1370b57cec5SDimitry Andric     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1380b57cec5SDimitry Andric         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1390b57cec5SDimitry Andric       ArgInfo.PrivateSegmentWaveByteOffset =
1400b57cec5SDimitry Andric           ArgDescriptor::createRegister(AMDGPU::SGPR5);
1410b57cec5SDimitry Andric   }
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
1440b57cec5SDimitry Andric   if (isAmdHsaOrMesa) {
1450b57cec5SDimitry Andric     PrivateSegmentBuffer = true;
1460b57cec5SDimitry Andric 
147*5ffd83dbSDimitry Andric     if (UseFixedABI) {
148*5ffd83dbSDimitry Andric       DispatchPtr = true;
149*5ffd83dbSDimitry Andric       QueuePtr = true;
150*5ffd83dbSDimitry Andric 
151*5ffd83dbSDimitry Andric       // FIXME: We don't need this?
152*5ffd83dbSDimitry Andric       DispatchID = true;
153*5ffd83dbSDimitry Andric     } else {
1540b57cec5SDimitry Andric       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
1550b57cec5SDimitry Andric         DispatchPtr = true;
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric       if (F.hasFnAttribute("amdgpu-queue-ptr"))
1580b57cec5SDimitry Andric         QueuePtr = true;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric       if (F.hasFnAttribute("amdgpu-dispatch-id"))
1610b57cec5SDimitry Andric         DispatchID = true;
162*5ffd83dbSDimitry Andric     }
1630b57cec5SDimitry Andric   } else if (ST.isMesaGfxShader(F)) {
1640b57cec5SDimitry Andric     ImplicitBufferPtr = true;
1650b57cec5SDimitry Andric   }
1660b57cec5SDimitry Andric 
167*5ffd83dbSDimitry Andric   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
1680b57cec5SDimitry Andric     KernargSegmentPtr = true;
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
1710b57cec5SDimitry Andric     // TODO: This could be refined a lot. The attribute is a poor way of
172*5ffd83dbSDimitry Andric     // detecting calls or stack objects that may require it before argument
173*5ffd83dbSDimitry Andric     // lowering.
174*5ffd83dbSDimitry Andric     if (HasCalls || HasStackObjects)
1750b57cec5SDimitry Andric       FlatScratchInit = true;
1760b57cec5SDimitry Andric   }
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1790b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1800b57cec5SDimitry Andric   if (!S.empty())
1810b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1840b57cec5SDimitry Andric   S = A.getValueAsString();
1850b57cec5SDimitry Andric   if (!S.empty())
1860b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
1890b57cec5SDimitry Andric   if (!S.empty())
1900b57cec5SDimitry Andric     S.consumeInteger(0, GDSSize);
1910b57cec5SDimitry Andric }
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
1940b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
1950b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
1960b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
1970b57cec5SDimitry Andric                  MF.getFunction()));
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric 
200*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
2010b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
2020b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
2030b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2048bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
2050b57cec5SDimitry Andric   NumUserSGPRs += 4;
2060b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
2070b57cec5SDimitry Andric }
2080b57cec5SDimitry Andric 
209*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2100b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2110b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2120b57cec5SDimitry Andric   NumUserSGPRs += 2;
2130b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2140b57cec5SDimitry Andric }
2150b57cec5SDimitry Andric 
216*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2170b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2180b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2190b57cec5SDimitry Andric   NumUserSGPRs += 2;
2200b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric 
223*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2240b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2250b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2260b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2270b57cec5SDimitry Andric   NumUserSGPRs += 2;
2280b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2290b57cec5SDimitry Andric }
2300b57cec5SDimitry Andric 
231*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2320b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2330b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2340b57cec5SDimitry Andric   NumUserSGPRs += 2;
2350b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric 
238*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2390b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2400b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2410b57cec5SDimitry Andric   NumUserSGPRs += 2;
2420b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2430b57cec5SDimitry Andric }
2440b57cec5SDimitry Andric 
245*5ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2460b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2470b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2480b57cec5SDimitry Andric   NumUserSGPRs += 2;
2490b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric 
252*5ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
253*5ffd83dbSDimitry Andric                                              MCPhysReg Reg) {
2540b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
2550b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
2560b57cec5SDimitry Andric       return true;
2570b57cec5SDimitry Andric   }
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric   return false;
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
2630b57cec5SDimitry Andric /// SGPR spilling.
2640b57cec5SDimitry Andric //
2650b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
2660b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
2670b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
2680b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2690b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2700b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
2710b57cec5SDimitry Andric }
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
2740b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
2750b57cec5SDimitry Andric                                                     int FI) {
2760b57cec5SDimitry Andric   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric   // This has already been allocated.
2790b57cec5SDimitry Andric   if (!SpillLanes.empty())
2800b57cec5SDimitry Andric     return true;
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2830b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
2840b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
2850b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2860b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
287*5ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
290*5ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
2910b57cec5SDimitry Andric 
292*5ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
293*5ffd83dbSDimitry Andric     return false;
294*5ffd83dbSDimitry Andric 
295*5ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
296*5ffd83dbSDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
3010b57cec5SDimitry Andric   // VGPRs.
302*5ffd83dbSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
303*5ffd83dbSDimitry Andric     Register LaneVGPR;
3040b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
3050b57cec5SDimitry Andric 
306*5ffd83dbSDimitry Andric     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
307*5ffd83dbSDimitry Andric     // when one of the two conditions is true:
308*5ffd83dbSDimitry Andric     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
309*5ffd83dbSDimitry Andric     // reserved.
310*5ffd83dbSDimitry Andric     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
311*5ffd83dbSDimitry Andric     // required.
312*5ffd83dbSDimitry Andric     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
313*5ffd83dbSDimitry Andric       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
314*5ffd83dbSDimitry Andric       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
315*5ffd83dbSDimitry Andric     } else if (VGPRIndex == 0) {
3160b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
3170b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
3180b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
3190b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
3200b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
3210b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
3220b57cec5SDimitry Andric         return false;
3230b57cec5SDimitry Andric       }
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric       Optional<int> CSRSpillFI;
3260b57cec5SDimitry Andric       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
3270b57cec5SDimitry Andric           isCalleeSavedReg(CSRegs, LaneVGPR)) {
328*5ffd83dbSDimitry Andric         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
3290b57cec5SDimitry Andric       }
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifer
3340b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
3350b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
3360b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
3370b57cec5SDimitry Andric     } else {
3380b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
3390b57cec5SDimitry Andric     }
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
3420b57cec5SDimitry Andric   }
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric   return true;
3450b57cec5SDimitry Andric }
3460b57cec5SDimitry Andric 
347*5ffd83dbSDimitry Andric /// Reserve a VGPR for spilling of SGPRs
348*5ffd83dbSDimitry Andric bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
349*5ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
350*5ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
351*5ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
352*5ffd83dbSDimitry Andric 
353*5ffd83dbSDimitry Andric   Register LaneVGPR = TRI->findUnusedRegister(
354*5ffd83dbSDimitry Andric       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
355*5ffd83dbSDimitry Andric   SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
356*5ffd83dbSDimitry Andric   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
357*5ffd83dbSDimitry Andric   return true;
358*5ffd83dbSDimitry Andric }
359*5ffd83dbSDimitry Andric 
3600b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
3610b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
3620b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
3630b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
3640b57cec5SDimitry Andric                                                     int FI,
3650b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
3660b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3670b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3680b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
3690b57cec5SDimitry Andric 
3700b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   // This has already been allocated.
3750b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
3760b57cec5SDimitry Andric     return Spill.FullyAllocated;
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3790b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
3800b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   const TargetRegisterClass &RC =
3830b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
3840b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
3870b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3880b57cec5SDimitry Andric   Spill.FullyAllocated = true;
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
3910b57cec5SDimitry Andric   // once.
3920b57cec5SDimitry Andric   BitVector OtherUsedRegs;
3930b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric   const uint32_t *CSRMask =
3960b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
3970b57cec5SDimitry Andric   if (CSRMask)
3980b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
4010b57cec5SDimitry Andric   // usage.
4020b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
4030b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4040b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
4050b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
4080b57cec5SDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I) {
4090b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4100b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4110b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4120b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4130b57cec5SDimitry Andric         });
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4160b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4170b57cec5SDimitry Andric       break;
4180b57cec5SDimitry Andric     }
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4210b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
4220b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4230b57cec5SDimitry Andric   }
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   return Spill.FullyAllocated;
4260b57cec5SDimitry Andric }
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
429*5ffd83dbSDimitry Andric   // The FP & BP spills haven't been inserted yet, so keep them around.
4300b57cec5SDimitry Andric   for (auto &R : SGPRToVGPRSpills) {
431*5ffd83dbSDimitry Andric     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
4320b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4330b57cec5SDimitry Andric   }
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   // All other SPGRs must be allocated on the default stack, so reset the stack
4360b57cec5SDimitry Andric   // ID.
4370b57cec5SDimitry Andric   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
4380b57cec5SDimitry Andric        ++i)
439*5ffd83dbSDimitry Andric     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
4400b57cec5SDimitry Andric       MFI.setStackID(i, TargetStackID::Default);
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
4430b57cec5SDimitry Andric     if (R.second.FullyAllocated)
4440b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4450b57cec5SDimitry Andric   }
4460b57cec5SDimitry Andric }
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
4490b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
4500b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
4510b57cec5SDimitry Andric }
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
4540b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
4550b57cec5SDimitry Andric }
4560b57cec5SDimitry Andric 
457*5ffd83dbSDimitry Andric Register
458*5ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
459*5ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
460*5ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
461*5ffd83dbSDimitry Andric     return Register();
462*5ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
463*5ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
464*5ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
465*5ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
466*5ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
467*5ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
468*5ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
469*5ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
470*5ffd83dbSDimitry Andric       return GitPtrLo;
471*5ffd83dbSDimitry Andric     default:
472*5ffd83dbSDimitry Andric       return GitPtrLo;
473*5ffd83dbSDimitry Andric     }
474*5ffd83dbSDimitry Andric   }
475*5ffd83dbSDimitry Andric   return GitPtrLo;
476*5ffd83dbSDimitry Andric }
477*5ffd83dbSDimitry Andric 
478*5ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
4790b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
4800b57cec5SDimitry Andric   yaml::StringValue Dest;
4810b57cec5SDimitry Andric   {
4820b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
4830b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
4840b57cec5SDimitry Andric   }
4850b57cec5SDimitry Andric   return Dest;
4860b57cec5SDimitry Andric }
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
4890b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
4900b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
4910b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
4940b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
4950b57cec5SDimitry Andric     if (!Arg)
4960b57cec5SDimitry Andric       return false;
4970b57cec5SDimitry Andric 
4980b57cec5SDimitry Andric     // Create a register or stack argument.
4990b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
5000b57cec5SDimitry Andric     if (Arg.isRegister()) {
5010b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
5020b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
5030b57cec5SDimitry Andric     } else
5040b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
5050b57cec5SDimitry Andric     // Check and update the optional mask.
5060b57cec5SDimitry Andric     if (Arg.isMasked())
5070b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
5080b57cec5SDimitry Andric 
5090b57cec5SDimitry Andric     A = SA;
5100b57cec5SDimitry Andric     return true;
5110b57cec5SDimitry Andric   };
5120b57cec5SDimitry Andric 
5130b57cec5SDimitry Andric   bool Any = false;
5140b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
5150b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
5160b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
5170b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
5180b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
5190b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
5200b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
5210b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
5220b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
5230b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
5240b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
5250b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
5260b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
5270b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
5280b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
5290b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
5300b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
5310b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   if (Any)
5340b57cec5SDimitry Andric     return AI;
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   return None;
5370b57cec5SDimitry Andric }
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
5400b57cec5SDimitry Andric   const llvm::SIMachineFunctionInfo& MFI,
5410b57cec5SDimitry Andric   const TargetRegisterInfo &TRI)
5420b57cec5SDimitry Andric   : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
5430b57cec5SDimitry Andric     MaxKernArgAlign(MFI.getMaxKernArgAlign()),
5440b57cec5SDimitry Andric     LDSSize(MFI.getLDSSize()),
5450b57cec5SDimitry Andric     IsEntryFunction(MFI.isEntryFunction()),
5460b57cec5SDimitry Andric     NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
5470b57cec5SDimitry Andric     MemoryBound(MFI.isMemoryBound()),
5480b57cec5SDimitry Andric     WaveLimiter(MFI.needsWaveLimiter()),
5498bcb0991SDimitry Andric     HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
5500b57cec5SDimitry Andric     ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
5510b57cec5SDimitry Andric     FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
5520b57cec5SDimitry Andric     StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
5530b57cec5SDimitry Andric     ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
5540b57cec5SDimitry Andric     Mode(MFI.getMode()) {}
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
5570b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
5580b57cec5SDimitry Andric }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
5610b57cec5SDimitry Andric   const yaml::SIMachineFunctionInfo &YamlMFI) {
5620b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
5638bcb0991SDimitry Andric   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
5640b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
5658bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
5660b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
5670b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
5680b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
5690b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
5700b57cec5SDimitry Andric   return false;
5710b57cec5SDimitry Andric }
572*5ffd83dbSDimitry Andric 
573*5ffd83dbSDimitry Andric // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
574*5ffd83dbSDimitry Andric bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
575*5ffd83dbSDimitry Andric                                                    MachineFunction &MF) {
576*5ffd83dbSDimitry Andric   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
577*5ffd83dbSDimitry Andric     if (i->VGPR == ReservedVGPR) {
578*5ffd83dbSDimitry Andric       SpillVGPRs.erase(i);
579*5ffd83dbSDimitry Andric 
580*5ffd83dbSDimitry Andric       for (MachineBasicBlock &MBB : MF) {
581*5ffd83dbSDimitry Andric         MBB.removeLiveIn(ReservedVGPR);
582*5ffd83dbSDimitry Andric         MBB.sortUniqueLiveIns();
583*5ffd83dbSDimitry Andric       }
584*5ffd83dbSDimitry Andric       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
585*5ffd83dbSDimitry Andric       return true;
586*5ffd83dbSDimitry Andric     }
587*5ffd83dbSDimitry Andric   }
588*5ffd83dbSDimitry Andric   return false;
589*5ffd83dbSDimitry Andric }
590