xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (revision 8bcb0991864975618c09697b1aca10683346d9f0)
10b57cec5SDimitry Andric //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
100b57cec5SDimitry Andric #include "AMDGPUArgumentUsageInfo.h"
110b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
120b57cec5SDimitry Andric #include "SIRegisterInfo.h"
130b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
140b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
150b57cec5SDimitry Andric #include "llvm/ADT/Optional.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
200b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
210b57cec5SDimitry Andric #include "llvm/IR/Function.h"
220b57cec5SDimitry Andric #include <cassert>
230b57cec5SDimitry Andric #include <vector>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric #define MAX_LANES 64
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
300b57cec5SDimitry Andric   : AMDGPUMachineFunction(MF),
310b57cec5SDimitry Andric     Mode(MF.getFunction()),
320b57cec5SDimitry Andric     PrivateSegmentBuffer(false),
330b57cec5SDimitry Andric     DispatchPtr(false),
340b57cec5SDimitry Andric     QueuePtr(false),
350b57cec5SDimitry Andric     KernargSegmentPtr(false),
360b57cec5SDimitry Andric     DispatchID(false),
370b57cec5SDimitry Andric     FlatScratchInit(false),
380b57cec5SDimitry Andric     WorkGroupIDX(false),
390b57cec5SDimitry Andric     WorkGroupIDY(false),
400b57cec5SDimitry Andric     WorkGroupIDZ(false),
410b57cec5SDimitry Andric     WorkGroupInfo(false),
420b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset(false),
430b57cec5SDimitry Andric     WorkItemIDX(false),
440b57cec5SDimitry Andric     WorkItemIDY(false),
450b57cec5SDimitry Andric     WorkItemIDZ(false),
460b57cec5SDimitry Andric     ImplicitBufferPtr(false),
470b57cec5SDimitry Andric     ImplicitArgPtr(false),
480b57cec5SDimitry Andric     GITPtrHigh(0xffffffff),
490b57cec5SDimitry Andric     HighBitsOf32BitAddress(0),
500b57cec5SDimitry Andric     GDSSize(0) {
510b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
520b57cec5SDimitry Andric   const Function &F = MF.getFunction();
530b57cec5SDimitry Andric   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
540b57cec5SDimitry Andric   WavesPerEU = ST.getWavesPerEU(F);
550b57cec5SDimitry Andric 
56*8bcb0991SDimitry Andric   Occupancy = ST.computeOccupancy(MF, getLDSSize());
570b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
600b57cec5SDimitry Andric     if (!F.arg_empty())
610b57cec5SDimitry Andric       KernargSegmentPtr = true;
620b57cec5SDimitry Andric     WorkGroupIDX = true;
630b57cec5SDimitry Andric     WorkItemIDX = true;
640b57cec5SDimitry Andric   } else if (CC == CallingConv::AMDGPU_PS) {
650b57cec5SDimitry Andric     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
660b57cec5SDimitry Andric   }
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   if (!isEntryFunction()) {
690b57cec5SDimitry Andric     // Non-entry functions have no special inputs for now, other registers
700b57cec5SDimitry Andric     // required for scratch access.
710b57cec5SDimitry Andric     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
720b57cec5SDimitry Andric     ScratchWaveOffsetReg = AMDGPU::SGPR33;
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric     // TODO: Pick a high register, and shift down, similar to a kernel.
750b57cec5SDimitry Andric     FrameOffsetReg = AMDGPU::SGPR34;
760b57cec5SDimitry Andric     StackPtrOffsetReg = AMDGPU::SGPR32;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric     ArgInfo.PrivateSegmentBuffer =
790b57cec5SDimitry Andric       ArgDescriptor::createRegister(ScratchRSrcReg);
800b57cec5SDimitry Andric     ArgInfo.PrivateSegmentWaveByteOffset =
810b57cec5SDimitry Andric       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
840b57cec5SDimitry Andric       ImplicitArgPtr = true;
850b57cec5SDimitry Andric   } else {
860b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
870b57cec5SDimitry Andric       KernargSegmentPtr = true;
880b57cec5SDimitry Andric       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
890b57cec5SDimitry Andric                                  MaxKernArgAlign);
900b57cec5SDimitry Andric     }
910b57cec5SDimitry Andric   }
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-group-id-x"))
940b57cec5SDimitry Andric     WorkGroupIDX = true;
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-group-id-y"))
970b57cec5SDimitry Andric     WorkGroupIDY = true;
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-group-id-z"))
1000b57cec5SDimitry Andric     WorkGroupIDZ = true;
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-item-id-x"))
1030b57cec5SDimitry Andric     WorkItemIDX = true;
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-item-id-y"))
1060b57cec5SDimitry Andric     WorkItemIDY = true;
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-work-item-id-z"))
1090b57cec5SDimitry Andric     WorkItemIDZ = true;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1120b57cec5SDimitry Andric   bool HasStackObjects = FrameInfo.hasStackObjects();
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric   if (isEntryFunction()) {
1150b57cec5SDimitry Andric     // X, XY, and XYZ are the only supported combinations, so make sure Y is
1160b57cec5SDimitry Andric     // enabled if Z is.
1170b57cec5SDimitry Andric     if (WorkItemIDZ)
1180b57cec5SDimitry Andric       WorkItemIDY = true;
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric     PrivateSegmentWaveByteOffset = true;
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
1230b57cec5SDimitry Andric     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
1240b57cec5SDimitry Andric         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
1250b57cec5SDimitry Andric       ArgInfo.PrivateSegmentWaveByteOffset =
1260b57cec5SDimitry Andric           ArgDescriptor::createRegister(AMDGPU::SGPR5);
1270b57cec5SDimitry Andric   }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
1300b57cec5SDimitry Andric   if (isAmdHsaOrMesa) {
1310b57cec5SDimitry Andric     PrivateSegmentBuffer = true;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
1340b57cec5SDimitry Andric       DispatchPtr = true;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-queue-ptr"))
1370b57cec5SDimitry Andric       QueuePtr = true;
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric     if (F.hasFnAttribute("amdgpu-dispatch-id"))
1400b57cec5SDimitry Andric       DispatchID = true;
1410b57cec5SDimitry Andric   } else if (ST.isMesaGfxShader(F)) {
1420b57cec5SDimitry Andric     ImplicitBufferPtr = true;
1430b57cec5SDimitry Andric   }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
1460b57cec5SDimitry Andric     KernargSegmentPtr = true;
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
1490b57cec5SDimitry Andric     auto hasNonSpillStackObjects = [&]() {
1500b57cec5SDimitry Andric       // Avoid expensive checking if there's no stack objects.
1510b57cec5SDimitry Andric       if (!HasStackObjects)
1520b57cec5SDimitry Andric         return false;
1530b57cec5SDimitry Andric       for (auto OI = FrameInfo.getObjectIndexBegin(),
1540b57cec5SDimitry Andric                 OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
1550b57cec5SDimitry Andric         if (!FrameInfo.isSpillSlotObjectIndex(OI))
1560b57cec5SDimitry Andric           return true;
1570b57cec5SDimitry Andric       // All stack objects are spill slots.
1580b57cec5SDimitry Andric       return false;
1590b57cec5SDimitry Andric     };
1600b57cec5SDimitry Andric     // TODO: This could be refined a lot. The attribute is a poor way of
1610b57cec5SDimitry Andric     // detecting calls that may require it before argument lowering.
1620b57cec5SDimitry Andric     if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
1630b57cec5SDimitry Andric       FlatScratchInit = true;
1640b57cec5SDimitry Andric   }
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
1670b57cec5SDimitry Andric   StringRef S = A.getValueAsString();
1680b57cec5SDimitry Andric   if (!S.empty())
1690b57cec5SDimitry Andric     S.consumeInteger(0, GITPtrHigh);
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
1720b57cec5SDimitry Andric   S = A.getValueAsString();
1730b57cec5SDimitry Andric   if (!S.empty())
1740b57cec5SDimitry Andric     S.consumeInteger(0, HighBitsOf32BitAddress);
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric   S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
1770b57cec5SDimitry Andric   if (!S.empty())
1780b57cec5SDimitry Andric     S.consumeInteger(0, GDSSize);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
1820b57cec5SDimitry Andric   limitOccupancy(getMaxWavesPerEU());
1830b57cec5SDimitry Andric   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
1840b57cec5SDimitry Andric   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
1850b57cec5SDimitry Andric                  MF.getFunction()));
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
1890b57cec5SDimitry Andric   const SIRegisterInfo &TRI) {
1900b57cec5SDimitry Andric   ArgInfo.PrivateSegmentBuffer =
1910b57cec5SDimitry Andric     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
192*8bcb0991SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
1930b57cec5SDimitry Andric   NumUserSGPRs += 4;
1940b57cec5SDimitry Andric   return ArgInfo.PrivateSegmentBuffer.getRegister();
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
1980b57cec5SDimitry Andric   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
1990b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2000b57cec5SDimitry Andric   NumUserSGPRs += 2;
2010b57cec5SDimitry Andric   return ArgInfo.DispatchPtr.getRegister();
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
2050b57cec5SDimitry Andric   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2060b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2070b57cec5SDimitry Andric   NumUserSGPRs += 2;
2080b57cec5SDimitry Andric   return ArgInfo.QueuePtr.getRegister();
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
2120b57cec5SDimitry Andric   ArgInfo.KernargSegmentPtr
2130b57cec5SDimitry Andric     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2140b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2150b57cec5SDimitry Andric   NumUserSGPRs += 2;
2160b57cec5SDimitry Andric   return ArgInfo.KernargSegmentPtr.getRegister();
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
2200b57cec5SDimitry Andric   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2210b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2220b57cec5SDimitry Andric   NumUserSGPRs += 2;
2230b57cec5SDimitry Andric   return ArgInfo.DispatchID.getRegister();
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
2270b57cec5SDimitry Andric   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2280b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2290b57cec5SDimitry Andric   NumUserSGPRs += 2;
2300b57cec5SDimitry Andric   return ArgInfo.FlatScratchInit.getRegister();
2310b57cec5SDimitry Andric }
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
2340b57cec5SDimitry Andric   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
2350b57cec5SDimitry Andric     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
2360b57cec5SDimitry Andric   NumUserSGPRs += 2;
2370b57cec5SDimitry Andric   return ArgInfo.ImplicitBufferPtr.getRegister();
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
2410b57cec5SDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
2420b57cec5SDimitry Andric     if (CSRegs[I] == Reg)
2430b57cec5SDimitry Andric       return true;
2440b57cec5SDimitry Andric   }
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   return false;
2470b57cec5SDimitry Andric }
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric /// \p returns true if \p NumLanes slots are available in VGPRs already used for
2500b57cec5SDimitry Andric /// SGPR spilling.
2510b57cec5SDimitry Andric //
2520b57cec5SDimitry Andric // FIXME: This only works after processFunctionBeforeFrameFinalized
2530b57cec5SDimitry Andric bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
2540b57cec5SDimitry Andric                                                       unsigned NumNeed) const {
2550b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2560b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2570b57cec5SDimitry Andric   return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
2580b57cec5SDimitry Andric }
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
2610b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
2620b57cec5SDimitry Andric                                                     int FI) {
2630b57cec5SDimitry Andric   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric   // This has already been allocated.
2660b57cec5SDimitry Andric   if (!SpillLanes.empty())
2670b57cec5SDimitry Andric     return true;
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2700b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
2710b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
2720b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2730b57cec5SDimitry Andric   unsigned WaveSize = ST.getWavefrontSize();
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
2760b57cec5SDimitry Andric   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
2770b57cec5SDimitry Andric   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   int NumLanes = Size / 4;
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   // Make sure to handle the case where a wide SGPR spill may span between two
2840b57cec5SDimitry Andric   // VGPRs.
2850b57cec5SDimitry Andric   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
2860b57cec5SDimitry Andric     unsigned LaneVGPR;
2870b57cec5SDimitry Andric     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric     if (VGPRIndex == 0) {
2900b57cec5SDimitry Andric       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
2910b57cec5SDimitry Andric       if (LaneVGPR == AMDGPU::NoRegister) {
2920b57cec5SDimitry Andric         // We have no VGPRs left for spilling SGPRs. Reset because we will not
2930b57cec5SDimitry Andric         // partially spill the SGPR to VGPRs.
2940b57cec5SDimitry Andric         SGPRToVGPRSpills.erase(FI);
2950b57cec5SDimitry Andric         NumVGPRSpillLanes -= I;
2960b57cec5SDimitry Andric         return false;
2970b57cec5SDimitry Andric       }
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric       Optional<int> CSRSpillFI;
3000b57cec5SDimitry Andric       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
3010b57cec5SDimitry Andric           isCalleeSavedReg(CSRegs, LaneVGPR)) {
3020b57cec5SDimitry Andric         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
3030b57cec5SDimitry Andric       }
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric       // Add this register as live-in to all blocks to avoid machine verifer
3080b57cec5SDimitry Andric       // complaining about use of an undefined physical register.
3090b57cec5SDimitry Andric       for (MachineBasicBlock &BB : MF)
3100b57cec5SDimitry Andric         BB.addLiveIn(LaneVGPR);
3110b57cec5SDimitry Andric     } else {
3120b57cec5SDimitry Andric       LaneVGPR = SpillVGPRs.back().VGPR;
3130b57cec5SDimitry Andric     }
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
3160b57cec5SDimitry Andric   }
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   return true;
3190b57cec5SDimitry Andric }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
3220b57cec5SDimitry Andric /// Either AGPR is spilled to VGPR to vice versa.
3230b57cec5SDimitry Andric /// Returns true if a \p FI can be eliminated completely.
3240b57cec5SDimitry Andric bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
3250b57cec5SDimitry Andric                                                     int FI,
3260b57cec5SDimitry Andric                                                     bool isAGPRtoVGPR) {
3270b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
3280b57cec5SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
3290b57cec5SDimitry Andric   const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric   assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric   auto &Spill = VGPRToAGPRSpills[FI];
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric   // This has already been allocated.
3360b57cec5SDimitry Andric   if (!Spill.Lanes.empty())
3370b57cec5SDimitry Andric     return Spill.FullyAllocated;
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   unsigned Size = FrameInfo.getObjectSize(FI);
3400b57cec5SDimitry Andric   unsigned NumLanes = Size / 4;
3410b57cec5SDimitry Andric   Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric   const TargetRegisterClass &RC =
3440b57cec5SDimitry Andric       isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
3450b57cec5SDimitry Andric   auto Regs = RC.getRegisters();
3460b57cec5SDimitry Andric 
3470b57cec5SDimitry Andric   auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
3480b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
3490b57cec5SDimitry Andric   Spill.FullyAllocated = true;
3500b57cec5SDimitry Andric 
3510b57cec5SDimitry Andric   // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
3520b57cec5SDimitry Andric   // once.
3530b57cec5SDimitry Andric   BitVector OtherUsedRegs;
3540b57cec5SDimitry Andric   OtherUsedRegs.resize(TRI->getNumRegs());
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric   const uint32_t *CSRMask =
3570b57cec5SDimitry Andric       TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
3580b57cec5SDimitry Andric   if (CSRMask)
3590b57cec5SDimitry Andric     OtherUsedRegs.setBitsInMask(CSRMask);
3600b57cec5SDimitry Andric 
3610b57cec5SDimitry Andric   // TODO: Should include register tuples, but doesn't matter with current
3620b57cec5SDimitry Andric   // usage.
3630b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillAGPR)
3640b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
3650b57cec5SDimitry Andric   for (MCPhysReg Reg : SpillVGPR)
3660b57cec5SDimitry Andric     OtherUsedRegs.set(Reg);
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
3690b57cec5SDimitry Andric   for (unsigned I = 0; I < NumLanes; ++I) {
3700b57cec5SDimitry Andric     NextSpillReg = std::find_if(
3710b57cec5SDimitry Andric         NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
3720b57cec5SDimitry Andric           return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
3730b57cec5SDimitry Andric                  !OtherUsedRegs[Reg];
3740b57cec5SDimitry Andric         });
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric     if (NextSpillReg == Regs.end()) { // Registers exhausted
3770b57cec5SDimitry Andric       Spill.FullyAllocated = false;
3780b57cec5SDimitry Andric       break;
3790b57cec5SDimitry Andric     }
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric     OtherUsedRegs.set(*NextSpillReg);
3820b57cec5SDimitry Andric     SpillRegs.push_back(*NextSpillReg);
3830b57cec5SDimitry Andric     Spill.Lanes[I] = *NextSpillReg++;
3840b57cec5SDimitry Andric   }
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   return Spill.FullyAllocated;
3870b57cec5SDimitry Andric }
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
3900b57cec5SDimitry Andric   // The FP spill hasn't been inserted yet, so keep it around.
3910b57cec5SDimitry Andric   for (auto &R : SGPRToVGPRSpills) {
3920b57cec5SDimitry Andric     if (R.first != FramePointerSaveIndex)
3930b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   // All other SPGRs must be allocated on the default stack, so reset the stack
3970b57cec5SDimitry Andric   // ID.
3980b57cec5SDimitry Andric   for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
3990b57cec5SDimitry Andric        ++i)
4000b57cec5SDimitry Andric     if (i != FramePointerSaveIndex)
4010b57cec5SDimitry Andric       MFI.setStackID(i, TargetStackID::Default);
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric   for (auto &R : VGPRToAGPRSpills) {
4040b57cec5SDimitry Andric     if (R.second.FullyAllocated)
4050b57cec5SDimitry Andric       MFI.RemoveStackObject(R.first);
4060b57cec5SDimitry Andric   }
4070b57cec5SDimitry Andric }
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
4100b57cec5SDimitry Andric   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
4110b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs;
4120b57cec5SDimitry Andric }
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
4150b57cec5SDimitry Andric   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric static yaml::StringValue regToString(unsigned Reg,
4190b57cec5SDimitry Andric                                      const TargetRegisterInfo &TRI) {
4200b57cec5SDimitry Andric   yaml::StringValue Dest;
4210b57cec5SDimitry Andric   {
4220b57cec5SDimitry Andric     raw_string_ostream OS(Dest.Value);
4230b57cec5SDimitry Andric     OS << printReg(Reg, &TRI);
4240b57cec5SDimitry Andric   }
4250b57cec5SDimitry Andric   return Dest;
4260b57cec5SDimitry Andric }
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric static Optional<yaml::SIArgumentInfo>
4290b57cec5SDimitry Andric convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
4300b57cec5SDimitry Andric                     const TargetRegisterInfo &TRI) {
4310b57cec5SDimitry Andric   yaml::SIArgumentInfo AI;
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric   auto convertArg = [&](Optional<yaml::SIArgument> &A,
4340b57cec5SDimitry Andric                         const ArgDescriptor &Arg) {
4350b57cec5SDimitry Andric     if (!Arg)
4360b57cec5SDimitry Andric       return false;
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric     // Create a register or stack argument.
4390b57cec5SDimitry Andric     yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
4400b57cec5SDimitry Andric     if (Arg.isRegister()) {
4410b57cec5SDimitry Andric       raw_string_ostream OS(SA.RegisterName.Value);
4420b57cec5SDimitry Andric       OS << printReg(Arg.getRegister(), &TRI);
4430b57cec5SDimitry Andric     } else
4440b57cec5SDimitry Andric       SA.StackOffset = Arg.getStackOffset();
4450b57cec5SDimitry Andric     // Check and update the optional mask.
4460b57cec5SDimitry Andric     if (Arg.isMasked())
4470b57cec5SDimitry Andric       SA.Mask = Arg.getMask();
4480b57cec5SDimitry Andric 
4490b57cec5SDimitry Andric     A = SA;
4500b57cec5SDimitry Andric     return true;
4510b57cec5SDimitry Andric   };
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric   bool Any = false;
4540b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
4550b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
4560b57cec5SDimitry Andric   Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
4570b57cec5SDimitry Andric   Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
4580b57cec5SDimitry Andric   Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
4590b57cec5SDimitry Andric   Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
4600b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
4610b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
4620b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
4630b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
4640b57cec5SDimitry Andric   Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
4650b57cec5SDimitry Andric   Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
4660b57cec5SDimitry Andric                     ArgInfo.PrivateSegmentWaveByteOffset);
4670b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
4680b57cec5SDimitry Andric   Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
4690b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
4700b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
4710b57cec5SDimitry Andric   Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
4720b57cec5SDimitry Andric 
4730b57cec5SDimitry Andric   if (Any)
4740b57cec5SDimitry Andric     return AI;
4750b57cec5SDimitry Andric 
4760b57cec5SDimitry Andric   return None;
4770b57cec5SDimitry Andric }
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
4800b57cec5SDimitry Andric   const llvm::SIMachineFunctionInfo& MFI,
4810b57cec5SDimitry Andric   const TargetRegisterInfo &TRI)
4820b57cec5SDimitry Andric   : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
4830b57cec5SDimitry Andric     MaxKernArgAlign(MFI.getMaxKernArgAlign()),
4840b57cec5SDimitry Andric     LDSSize(MFI.getLDSSize()),
4850b57cec5SDimitry Andric     IsEntryFunction(MFI.isEntryFunction()),
4860b57cec5SDimitry Andric     NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
4870b57cec5SDimitry Andric     MemoryBound(MFI.isMemoryBound()),
4880b57cec5SDimitry Andric     WaveLimiter(MFI.needsWaveLimiter()),
489*8bcb0991SDimitry Andric     HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
4900b57cec5SDimitry Andric     ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
4910b57cec5SDimitry Andric     ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
4920b57cec5SDimitry Andric     FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
4930b57cec5SDimitry Andric     StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
4940b57cec5SDimitry Andric     ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
4950b57cec5SDimitry Andric     Mode(MFI.getMode()) {}
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
4980b57cec5SDimitry Andric   MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric bool SIMachineFunctionInfo::initializeBaseYamlFields(
5020b57cec5SDimitry Andric   const yaml::SIMachineFunctionInfo &YamlMFI) {
5030b57cec5SDimitry Andric   ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
504*8bcb0991SDimitry Andric   MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
5050b57cec5SDimitry Andric   LDSSize = YamlMFI.LDSSize;
506*8bcb0991SDimitry Andric   HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
5070b57cec5SDimitry Andric   IsEntryFunction = YamlMFI.IsEntryFunction;
5080b57cec5SDimitry Andric   NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
5090b57cec5SDimitry Andric   MemoryBound = YamlMFI.MemoryBound;
5100b57cec5SDimitry Andric   WaveLimiter = YamlMFI.WaveLimiter;
5110b57cec5SDimitry Andric   return false;
5120b57cec5SDimitry Andric }
513