xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
105ffd83dbSDimitry Andric #include "AMDGPUTargetMachine.h"
110b57cec5SDimitry Andric 
120b57cec5SDimitry Andric #define MAX_LANES 64
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric using namespace llvm;
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
170b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
180b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
190b57cec5SDimitry Andric     DispatchPtr(false),
200b57cec5SDimitry Andric     QueuePtr(false),
210b57cec5SDimitry Andric     KernargSegmentPtr(false),
220b57cec5SDimitry Andric     DispatchID(false),
230b57cec5SDimitry Andric     FlatScratchInit(false),
240b57cec5SDimitry Andric     WorkGroupIDX(false),
250b57cec5SDimitry Andric     WorkGroupIDY(false),
260b57cec5SDimitry Andric     WorkGroupIDZ(false),
270b57cec5SDimitry Andric     WorkGroupInfo(false),
280b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
290b57cec5SDimitry Andric     WorkItemIDX(false),
300b57cec5SDimitry Andric     WorkItemIDY(false),
310b57cec5SDimitry Andric     WorkItemIDZ(false),
320b57cec5SDimitry Andric     ImplicitBufferPtr(false),
330b57cec5SDimitry Andric     ImplicitArgPtr(false),
340b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
350b57cec5SDimitry Andric     HighBitsOf32BitAddress(0),
360b57cec5SDimitry Andric     GDSSize(0) {
370b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
380b57cec5SDimitry Andric   const Function &F = MF.getFunction();
390b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
400b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
410b57cec5SDimitry Andric 
425ffd83dbSDimitry Andric   Occupancy = ST.computeOccupancy(F, getLDSSize());
430b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
440b57cec5SDimitry Andric 
455ffd83dbSDimitry Andric   // FIXME: Should have analysis or something rather than attribute to detect
465ffd83dbSDimitry Andric   // calls.
475ffd83dbSDimitry Andric   const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
485ffd83dbSDimitry Andric 
495ffd83dbSDimitry Andric   // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
505ffd83dbSDimitry Andric   // have any calls.
515ffd83dbSDimitry Andric   const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
525ffd83dbSDimitry Andric                            (!isEntryFunction() || HasCalls);
535ffd83dbSDimitry Andric 
540b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
550b57cec5SDimitry Andric     if (!F.arg_empty())
560b57cec5SDimitry Andric       KernargSegmentPtr = true;
570b57cec5SDimitry Andric     WorkGroupIDX = true;
580b57cec5SDimitry Andric     WorkItemIDX = true;
590b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
600b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
610b57cec5SDimitry Andric   }
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric   if (!isEntryFunction()) {
640b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
655ffd83dbSDimitry Andric     FrameOffsetReg = AMDGPU::SGPR33;
660b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
670b57cec5SDimitry Andric 
68*e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch()) {
69*e8d8bef9SDimitry Andric       // Non-entry functions have no special inputs for now, other registers
70*e8d8bef9SDimitry Andric       // required for scratch access.
71*e8d8bef9SDimitry Andric       ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
72*e8d8bef9SDimitry Andric 
730b57cec5SDimitry Andric       ArgInfo.PrivateSegmentBuffer =
740b57cec5SDimitry Andric         ArgDescriptor::createRegister(ScratchRSrcReg);
75*e8d8bef9SDimitry Andric     }
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
780b57cec5SDimitry Andric       ImplicitArgPtr = true;
790b57cec5SDimitry Andric   } else {
800b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
810b57cec5SDimitry Andric       KernargSegmentPtr = true;
820b57cec5SDimitry Andric       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
830b57cec5SDimitry Andric                                  MaxKernArgAlign);
840b57cec5SDimitry Andric     }
850b57cec5SDimitry Andric   }
860b57cec5SDimitry Andric 
875ffd83dbSDimitry Andric   if (UseFixedABI) {
885ffd83dbSDimitry Andric     WorkGroupIDX = true;
895ffd83dbSDimitry Andric     WorkGroupIDY = true;
905ffd83dbSDimitry Andric     WorkGroupIDZ = true;
915ffd83dbSDimitry Andric     WorkItemIDX = true;
925ffd83dbSDimitry Andric     WorkItemIDY = true;
935ffd83dbSDimitry Andric     WorkItemIDZ = true;
945ffd83dbSDimitry Andric     ImplicitArgPtr = true;
955ffd83dbSDimitry Andric   } else {
960b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
970b57cec5SDimitry Andric       WorkGroupIDX = true;
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
1000b57cec5SDimitry Andric       WorkGroupIDY = true;
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
1030b57cec5SDimitry Andric       WorkGroupIDZ = true;
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
1060b57cec5SDimitry Andric       WorkItemIDX = true;
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
1090b57cec5SDimitry Andric       WorkItemIDY = true;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
1120b57cec5SDimitry Andric       WorkItemIDZ = true;
1135ffd83dbSDimitry Andric   }
1140b57cec5SDimitry Andric 
1155ffd83dbSDimitry Andric   bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
1160b57cec5SDimitry Andric   if (isEntryFunction()) {
1170b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1180b57cec5SDimitry Andric     // enabled if Z is.
1190b57cec5SDimitry Andric     if (WorkItemIDZ)
1200b57cec5SDimitry Andric       WorkItemIDY = true;
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset = true;
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1250b57cec5SDimitry Andric     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1260b57cec5SDimitry Andric         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1270b57cec5SDimitry Andric       ArgInfo.PrivateSegmentWaveByteOffset =
1280b57cec5SDimitry Andric           ArgDescriptor::createRegister(AMDGPU::SGPR5);
1290b57cec5SDimitry Andric   }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
1320b57cec5SDimitry Andric   if (isAmdHsaOrMesa) {
133*e8d8bef9SDimitry Andric     if (!ST.enableFlatScratch())
1340b57cec5SDimitry Andric       PrivateSegmentBuffer = true;
1350b57cec5SDimitry Andric 
1365ffd83dbSDimitry Andric     if (UseFixedABI) {
1375ffd83dbSDimitry Andric       DispatchPtr = true;
1385ffd83dbSDimitry Andric       QueuePtr = true;
1395ffd83dbSDimitry Andric 
1405ffd83dbSDimitry Andric       // FIXME: We don't need this?
1415ffd83dbSDimitry Andric       DispatchID = true;
1425ffd83dbSDimitry Andric     } else {
1430b57cec5SDimitry Andric       if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
1440b57cec5SDimitry Andric         DispatchPtr = true;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric       if (F.hasFnAttribute("amdgpu-queue-ptr"))
1470b57cec5SDimitry Andric         QueuePtr = true;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric       if (F.hasFnAttribute("amdgpu-dispatch-id"))
1500b57cec5SDimitry Andric         DispatchID = true;
1515ffd83dbSDimitry Andric     }
1520b57cec5SDimitry Andric   } else if (ST.isMesaGfxShader(F)) {
1530b57cec5SDimitry Andric     ImplicitBufferPtr = true;
1540b57cec5SDimitry Andric   }
1550b57cec5SDimitry Andric 
1565ffd83dbSDimitry Andric   if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
1570b57cec5SDimitry Andric     KernargSegmentPtr = true;
1580b57cec5SDimitry Andric 
159*e8d8bef9SDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() &&
160*e8d8bef9SDimitry Andric       (isAmdHsaOrMesa || ST.enableFlatScratch())) {
1610b57cec5SDimitry Andric     // TODO: This could be refined a lot. The attribute is a poor way of
1625ffd83dbSDimitry Andric     // detecting calls or stack objects that may require it before argument
1635ffd83dbSDimitry Andric     // lowering.
164*e8d8bef9SDimitry Andric     if (HasCalls || HasStackObjects || ST.enableFlatScratch())
1650b57cec5SDimitry Andric       FlatScratchInit = true;
1660b57cec5SDimitry Andric   }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1690b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1700b57cec5SDimitry Andric   if (!S.empty())
1710b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1740b57cec5SDimitry Andric   S = A.getValueAsString();
1750b57cec5SDimitry Andric   if (!S.empty())
1760b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
1790b57cec5SDimitry Andric   if (!S.empty())
1800b57cec5SDimitry Andric     S.consumeInteger(0, GDSSize);
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
1840b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
1850b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
1860b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
1870b57cec5SDimitry Andric                  MF.getFunction()));
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
1905ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
1910b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
1920b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
1930b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
1948bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
1950b57cec5SDimitry Andric   NumUserSGPRs += 4;
1960b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric 
1995ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
2000b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2010b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2020b57cec5SDimitry Andric   NumUserSGPRs += 2;
2030b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2040b57cec5SDimitry Andric }
2050b57cec5SDimitry Andric 
2065ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2070b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2080b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2090b57cec5SDimitry Andric   NumUserSGPRs += 2;
2100b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric 
2135ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2140b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2150b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2160b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2170b57cec5SDimitry Andric   NumUserSGPRs += 2;
2180b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric 
2215ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2220b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2230b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2240b57cec5SDimitry Andric   NumUserSGPRs += 2;
2250b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric 
2285ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2290b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2300b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2310b57cec5SDimitry Andric   NumUserSGPRs += 2;
2320b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric 
2355ffd83dbSDimitry Andric Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2360b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2370b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2380b57cec5SDimitry Andric   NumUserSGPRs += 2;
2390b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric 
2425ffd83dbSDimitry Andric bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
2435ffd83dbSDimitry Andric                                              MCPhysReg Reg) {
2440b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
2450b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
2460b57cec5SDimitry Andric       return true;
2470b57cec5SDimitry Andric   }
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   return false;
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
2530b57cec5SDimitry Andric /// SGPR spilling.
2540b57cec5SDimitry Andric //
2550b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
2560b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
2570b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
2580b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2590b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2600b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
2640b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
2650b57cec5SDimitry Andric                                                     int FI) {
2660b57cec5SDimitry Andric   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric   // This has already been allocated.
2690b57cec5SDimitry Andric   if (!SpillLanes.empty())
2700b57cec5SDimitry Andric     return true;
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2730b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
2740b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
2750b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2760b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2775ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
2805ffd83dbSDimitry Andric   unsigned NumLanes = Size / 4;
2810b57cec5SDimitry Andric 
2825ffd83dbSDimitry Andric   if (NumLanes > WaveSize)
2835ffd83dbSDimitry Andric     return false;
2845ffd83dbSDimitry Andric 
2855ffd83dbSDimitry Andric   assert(Size >= 4 && "invalid sgpr spill size");
2865ffd83dbSDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
2910b57cec5SDimitry Andric   // VGPRs.
2925ffd83dbSDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
2935ffd83dbSDimitry Andric     Register LaneVGPR;
2940b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
2950b57cec5SDimitry Andric 
2965ffd83dbSDimitry Andric     // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
2975ffd83dbSDimitry Andric     // when one of the two conditions is true:
2985ffd83dbSDimitry Andric     // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
2995ffd83dbSDimitry Andric     // reserved.
3005ffd83dbSDimitry Andric     // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
3015ffd83dbSDimitry Andric     // required.
3025ffd83dbSDimitry Andric     if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
3035ffd83dbSDimitry Andric       assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
3045ffd83dbSDimitry Andric       LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
3055ffd83dbSDimitry Andric     } else if (VGPRIndex == 0) {
3060b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
3070b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
3080b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
3090b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
3100b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
3110b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
3120b57cec5SDimitry Andric         return false;
3130b57cec5SDimitry Andric       }
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric       Optional<int> CSRSpillFI;
3160b57cec5SDimitry Andric       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
3170b57cec5SDimitry Andric           isCalleeSavedReg(CSRegs, LaneVGPR)) {
3185ffd83dbSDimitry Andric         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
3190b57cec5SDimitry Andric       }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
3220b57cec5SDimitry Andric 
3230b57cec5SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifer
3240b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
3250b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
3260b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
3270b57cec5SDimitry Andric     } else {
3280b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
3290b57cec5SDimitry Andric     }
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
3320b57cec5SDimitry Andric   }
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   return true;
3350b57cec5SDimitry Andric }
3360b57cec5SDimitry Andric 
3375ffd83dbSDimitry Andric /// Reserve a VGPR for spilling of SGPRs
3385ffd83dbSDimitry Andric bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
3395ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3405ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3415ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
3425ffd83dbSDimitry Andric 
3435ffd83dbSDimitry Andric   Register LaneVGPR = TRI->findUnusedRegister(
3445ffd83dbSDimitry Andric       MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
345*e8d8bef9SDimitry Andric   if (LaneVGPR == Register())
346*e8d8bef9SDimitry Andric     return false;
3475ffd83dbSDimitry Andric   SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
3485ffd83dbSDimitry Andric   FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
3495ffd83dbSDimitry Andric   return true;
3505ffd83dbSDimitry Andric }
3515ffd83dbSDimitry Andric 
3520b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
3530b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
3540b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
3550b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
3560b57cec5SDimitry Andric                                                     int FI,
3570b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
3580b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3590b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3600b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
3650b57cec5SDimitry Andric 
3660b57cec5SDimitry Andric   // This has already been allocated.
3670b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
3680b57cec5SDimitry Andric     return Spill.FullyAllocated;
3690b57cec5SDimitry Andric 
3700b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3710b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
3720b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   const TargetRegisterClass &RC =
3750b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
3760b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
3790b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3800b57cec5SDimitry Andric   Spill.FullyAllocated = true;
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
3830b57cec5SDimitry Andric   // once.
3840b57cec5SDimitry Andric   BitVector OtherUsedRegs;
3850b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   const uint32_t *CSRMask =
3880b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
3890b57cec5SDimitry Andric   if (CSRMask)
3900b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
3930b57cec5SDimitry Andric   // usage.
3940b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
3950b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
3960b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
3970b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
4000b57cec5SDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I) {
4010b57cec5SDimitry Andric     NextSpillReg = std::find_if(
4020b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
4030b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
4040b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
4050b57cec5SDimitry Andric         });
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
4080b57cec5SDimitry Andric       Spill.FullyAllocated = false;
4090b57cec5SDimitry Andric       break;
4100b57cec5SDimitry Andric     }
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
4130b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
4140b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
4150b57cec5SDimitry Andric   }
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   return Spill.FullyAllocated;
4180b57cec5SDimitry Andric }
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
4215ffd83dbSDimitry Andric   // The FP & BP spills haven't been inserted yet, so keep them around.
4220b57cec5SDimitry Andric   for (auto &R : SGPRToVGPRSpills) {
4235ffd83dbSDimitry Andric     if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
4240b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4250b57cec5SDimitry Andric   }
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   // All other SPGRs must be allocated on the default stack, so reset the stack
4280b57cec5SDimitry Andric   // ID.
4290b57cec5SDimitry Andric   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
4300b57cec5SDimitry Andric        ++i)
4315ffd83dbSDimitry Andric     if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
4320b57cec5SDimitry Andric       MFI.setStackID(i, TargetStackID::Default);
4330b57cec5SDimitry Andric 
4340b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
4350b57cec5SDimitry Andric     if (R.second.FullyAllocated)
4360b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4370b57cec5SDimitry Andric   }
4380b57cec5SDimitry Andric }
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
4410b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
4420b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
4460b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
4470b57cec5SDimitry Andric }
4480b57cec5SDimitry Andric 
4495ffd83dbSDimitry Andric Register
4505ffd83dbSDimitry Andric SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
4515ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4525ffd83dbSDimitry Andric   if (!ST.isAmdPalOS())
4535ffd83dbSDimitry Andric     return Register();
4545ffd83dbSDimitry Andric   Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
4555ffd83dbSDimitry Andric   if (ST.hasMergedShaders()) {
4565ffd83dbSDimitry Andric     switch (MF.getFunction().getCallingConv()) {
4575ffd83dbSDimitry Andric     case CallingConv::AMDGPU_HS:
4585ffd83dbSDimitry Andric     case CallingConv::AMDGPU_GS:
4595ffd83dbSDimitry Andric       // Low GIT address is passed in s8 rather than s0 for an LS+HS or
4605ffd83dbSDimitry Andric       // ES+GS merged shader on gfx9+.
4615ffd83dbSDimitry Andric       GitPtrLo = AMDGPU::SGPR8;
4625ffd83dbSDimitry Andric       return GitPtrLo;
4635ffd83dbSDimitry Andric     default:
4645ffd83dbSDimitry Andric       return GitPtrLo;
4655ffd83dbSDimitry Andric     }
4665ffd83dbSDimitry Andric   }
4675ffd83dbSDimitry Andric   return GitPtrLo;
4685ffd83dbSDimitry Andric }
4695ffd83dbSDimitry Andric 
4705ffd83dbSDimitry Andric static yaml::StringValue regToString(Register Reg,
4710b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
4720b57cec5SDimitry Andric   yaml::StringValue Dest;
4730b57cec5SDimitry Andric   {
4740b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
4750b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
4760b57cec5SDimitry Andric   }
4770b57cec5SDimitry Andric   return Dest;
4780b57cec5SDimitry Andric }
4790b57cec5SDimitry Andric 
4800b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
4810b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
4820b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
4830b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
4860b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
4870b57cec5SDimitry Andric     if (!Arg)
4880b57cec5SDimitry Andric       return false;
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric     // Create a register or stack argument.
4910b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
4920b57cec5SDimitry Andric     if (Arg.isRegister()) {
4930b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
4940b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
4950b57cec5SDimitry Andric     } else
4960b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
4970b57cec5SDimitry Andric     // Check and update the optional mask.
4980b57cec5SDimitry Andric     if (Arg.isMasked())
4990b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric     A = SA;
5020b57cec5SDimitry Andric     return true;
5030b57cec5SDimitry Andric   };
5040b57cec5SDimitry Andric 
5050b57cec5SDimitry Andric   bool Any = false;
5060b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
5070b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
5080b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
5090b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
5100b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
5110b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
5120b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
5130b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
5140b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
5150b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
5160b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
5170b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
5180b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
5190b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
5200b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
5210b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
5220b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
5230b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric   if (Any)
5260b57cec5SDimitry Andric     return AI;
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   return None;
5290b57cec5SDimitry Andric }
5300b57cec5SDimitry Andric 
5310b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
532*e8d8bef9SDimitry Andric     const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI)
5330b57cec5SDimitry Andric     : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
534*e8d8bef9SDimitry Andric       MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
535*e8d8bef9SDimitry Andric       DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
5360b57cec5SDimitry Andric       NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
537*e8d8bef9SDimitry Andric       MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
538*e8d8bef9SDimitry Andric       HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
539*e8d8bef9SDimitry Andric       HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
5408bcb0991SDimitry Andric       HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
541*e8d8bef9SDimitry Andric       Occupancy(MFI.getOccupancy()),
5420b57cec5SDimitry Andric       ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
5430b57cec5SDimitry Andric       FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
5440b57cec5SDimitry Andric       StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
545*e8d8bef9SDimitry Andric       ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
546*e8d8bef9SDimitry Andric }
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
5490b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
5500b57cec5SDimitry Andric }
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
5530b57cec5SDimitry Andric   const yaml::SIMachineFunctionInfo &YamlMFI) {
5540b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
5558bcb0991SDimitry Andric   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
5560b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
557*e8d8bef9SDimitry Andric   DynLDSAlign = YamlMFI.DynLDSAlign;
5588bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
559*e8d8bef9SDimitry Andric   Occupancy = YamlMFI.Occupancy;
5600b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
5610b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
5620b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
5630b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
564*e8d8bef9SDimitry Andric   HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
565*e8d8bef9SDimitry Andric   HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
5660b57cec5SDimitry Andric   return false;
5670b57cec5SDimitry Andric }
5685ffd83dbSDimitry Andric 
5695ffd83dbSDimitry Andric // Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
5705ffd83dbSDimitry Andric bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
5715ffd83dbSDimitry Andric                                                    MachineFunction &MF) {
5725ffd83dbSDimitry Andric   for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
5735ffd83dbSDimitry Andric     if (i->VGPR == ReservedVGPR) {
5745ffd83dbSDimitry Andric       SpillVGPRs.erase(i);
5755ffd83dbSDimitry Andric 
5765ffd83dbSDimitry Andric       for (MachineBasicBlock &MBB : MF) {
5775ffd83dbSDimitry Andric         MBB.removeLiveIn(ReservedVGPR);
5785ffd83dbSDimitry Andric         MBB.sortUniqueLiveIns();
5795ffd83dbSDimitry Andric       }
5805ffd83dbSDimitry Andric       this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
5815ffd83dbSDimitry Andric       return true;
5825ffd83dbSDimitry Andric     }
5835ffd83dbSDimitry Andric   }
5845ffd83dbSDimitry Andric   return false;
5855ffd83dbSDimitry Andric }
586